Source code for twinlab.helper

import json
from pprint import pprint

import pandas as pd
from typeguard import typechecked



[docs]
@typechecked
def load_dataset(filepath: str, verbose: bool = False) -> pd.DataFrame:
    """Load a dataset from a local file in ``.csv`` format into a pandas dataframe.

    Args:
        filepath (str): Path to the dataset file, which should be in csv format.
        verbose (bool, optional): Display information while running.

    Returns:
        pd.DataFrame: The dataset loaded from the file.

    Example:
        .. code-block:: python

            df = tl.load_dataset("path/to/data.csv", verbose=True)

        .. code-block:: console

            Dataset loaded:
                 x         y
            0  0.0  1.097485
            1  1.0  0.835439
            2  2.0  0.655124
    """
    df = pd.read_csv(filepath)
    if verbose:
        print("Dataset loaded:")
        print(df)
    return df




[docs]
def load_params(filepath: str, verbose: bool = False) -> dict:
    """Load a parameter set from a local file in ``.json`` format into a dictionary.

    Args:
        filepath (str): Path to the dataset file, which should be in json format.
        verbose (bool, optional): Display information while running.

    Returns:
        dict: The parameter set loaded from the file.

    Example:

        .. code-block:: python

                params = tl.load_params("path/to/params.json", verbose=True)
    """

    with open(filepath) as f:
        params = json.load(f)
    if verbose:
        print("Parameters loaded from file:")
        pprint(params)
    return params




[docs]
def get_sample(df: pd.DataFrame, key: int) -> pd.DataFrame:
    """Retrieve an individual sample from the multi-indexed dataframe returned by the ``Emulator.sample()`` method.

    The output from the ``Emulator.sample()`` method is a multi-indexed dataframe where the first level of the index is the parameter name and the second level is the sample number.
    This convenience method allows you to isolate an individual sample from the dataframe by providing the sample number.
    The sample is returned as a standard dataframe.

    Args:
        df (pd.DataFrame): A multi-indexed dataframe returned by the Emulator.sample() method.
        key (int): The integer key for the sample to retrieve.

    Returns:
        pd.DataFrame: The individual sample as a standard (non-multi-indexed) dataframe.

    Example:
        .. code-block:: python

            df = emulator.sample(df_X, 5) # Generates independent samples
            tl.get_sample(df, 1) # Isolate sample "1"

        .. code-block:: console

                      y
            0  1.097485
            1  0.835439
            2  0.655124

    """
    key_str = str(key)
    sample_df = df.xs(key=key_str, level=-1, axis="columns")
    return sample_df




[docs]
def join_samples(
    df_one: pd.DataFrame,
    df_two: pd.DataFrame,
) -> pd.DataFrame:
    """Join two dataframes that contain independent samples generated by the ``Emulator.sample()`` method.

    The output from the ``Emulator.sample()`` method is a multi-indexed dataframe where the first level of the index is the parameter name and the second level is the sample number.
    This convenience method allows you to join two dataframes that contain independent samples generated by the ``Emulator.sample()`` method together into a single dataframe.

    Args:
        df_one (pd.DataFrame): The first multi-indexed dataframe to join.
        df_two (pd.DataFrame): The second multi-indexed dataframe to join.

    Returns:
        pd.DataFrame: The joined dataframe

    Example:
        .. code-block:: python

            df_y1 = emulator.sample(df_X, 1) # Create first set of samples
            df_y2 = emulator.sample(df_X, 3) # Creates new independent samples
            tl.join_samples(df_y1, df_y2)

        .. code-block:: console

                      y
                      0         1         2         3
            0  0.784193  1.308067  0.176582  0.875387
            1  0.978259  1.039125  0.646922  0.887118
            2  1.086855  0.942270  0.864730  0.934348
    """
    # Get the number of samples in the first dataframe to offset the indices of the second dataframe
    n_samples = int(len(df_one.columns) / len(df_one.columns.levels[0]))

    # Convert the second dataframe to a dictionary
    df_dict = df_two.to_dict()
    new_dict, new_keys = {}, {}

    # Form the new dictionary with updated indices
    for key in df_dict.keys():
        _key = (key[0], str(int(key[1]) + n_samples))
        new_keys[key] = _key
    for key in new_keys:
        new_dict[new_keys[key]] = df_dict[key]

    # Convert the new dictionary to a dataframe
    new_df = pd.DataFrame(new_dict)
    sorted_df = pd.concat([df_one, new_df], axis=1).sort_index(axis=1, level=0)[
        df_one.columns.get_level_values(0).unique()
    ]
    return sorted_df