Source code for src.superphot_plus.data_generation.alerce

"""This script provides functions for importing and manipulating ZTF 
data from the Alerce API."""

import csv
import os
import pandas as pd

from alerce.core import Alerce


[docs]
alerce = Alerce()


[docs]
MIN_PER_FILTER = 5


# pylint: disable=global-variable-not-assigned



[docs]
def add_stamp_column(input_filename, output_filename):  # pragma: no cover
    """Checks whether stamp classifier categorizes each lightcurve in
    spreadsheet as a supernova-like transient, and adds as additional
    column.

    Parameters
    ----------
    input_filename : str
        Path to the input CSV file.
    output_filename : str
        Path to the output CSV file.
    """
    input_df = pd.read_csv(input_filename)
    
    names = input_df.NAME.to_numpy()
    stamp = []
    
    for name in names:
        try:
            p = alerce.query_probabilities(oid=name, format="pandas")

            p_class = p[p["classifier_name"] == "stamp_classifier"]
            prob = p_class[p_class["ranking"] == 1]["probability"].iat[0]
            best_label = p_class[p_class["ranking"] == 1]["class_name"].iat[0]

            stamp.append( (best_label == "SN") and (prob >= 0.5) )
        except:
            stamp.append( False )
    
    input_df['STAMP'] = stamp
    input_df.to_csv(output_filename, index=False)




[docs]
def get_all_unclassified_samples(save_csv):  # pragma: no cover
    """Get all unclassified samples and save them to a CSV file.

    Parameters
    ----------
    save_csv : str
        Path to the output CSV file.
    """
    global alerce

    classifiers = alerce.query_classifiers()
    print(classifiers)
    i = 0
    repeat_names = set()
    
    if os.path.exists(save_csv):
        with open(save_csv, "r", encoding="utf-8") as sc:
            csv_reader = csv.reader(sc, delimiter=",")
            next(csv_reader)
            for row in csv_reader:
                repeat_names.add(row[0])

    while True:
        print(i)

        while True:
            try:
                objs = alerce.query_objects(
                    classifier="lc_classifier_top",
                    #classifier_version="hierarchical_random_forest_1.0.0",
                    class_name="Transient",
                    format="pandas",
                    page_size=2000,
                    probability=0.5,
                    page=i,
                )
                break
            except:
                pass

        if len(objs) == 0:  # finished
            return None

        with open(save_csv, "a+", encoding="utf-8") as sc:
            csv_writer = csv.writer(sc, delimiter=",")

            for row_idx in range(len(objs)):
                try:
                    row = objs.iloc[row_idx]
                    name = row.iat[0]
                    if name in repeat_names:
                        #print("REPEAT")
                        continue
                    p = alerce.query_probabilities(oid=name, format="pandas")

                    p_class = p[p["classifier_name"] == "lc_classifier_transient"]
                    prob = p_class[p_class["ranking"] == 1]["probability"].iat[0]
                    best_label = p_class[p_class["ranking"] == 1]["class_name"].iat[0]
                    
                    csv_writer.writerow([name, prob, best_label])
                    repeat_names.add(name)

                except:
                    print("skipped")
                    continue
        i += 1




[docs]
def generate_flux_files(master_csv, save_folder):  # pragma: no cover
    """Generates flux files for all ZTF samples in the master CSV file,
    using ALeRCE's API.

    Parameters
    ----------
    master_csv : str
        Path to the master CSV file.
    save_folder : str
        Path to the folder where the flux files will be saved.
    """
    global alerce
    os.makedirs(save_folder, exist_ok=True)
    df = pd.read_csv(master_csv)
    names = df.NAME
    for ztf_name in names:
        try:
            if os.path.exists(os.path.join(save_folder, ztf_name + ".csv")):
                continue
            # print(ztf_name)
            # Getting detections for an object
            detections = alerce.query_detections(ztf_name, format="pandas")
            detections.to_csv(os.path.join(save_folder, ztf_name + ".csv"), index=False)
        except:
            continue




[docs]
def generate_single_flux_file(ztf_name, save_folder):
    """Generates a flux file for a single ZTF sample in the master CSV
    file, using ALeRCE's API.

    Parameters
    ----------
    ztf_name : str
        Name of the ZTF sample.
    save_folder : str
        Path to the folder where the flux file will be saved.
    """
    global alerce
    os.makedirs(save_folder, exist_ok=True)

    # Getting detections for an object
    detections = alerce.query_detections(ztf_name, format="pandas")
    print(os.path.join(save_folder, ztf_name + ".csv"))
    detections.to_csv(os.path.join(save_folder, ztf_name + ".csv"), index=False)