Source code for twinlab.helper

import json
from pprint import pprint

import pandas as pd
from typeguard import typechecked


[docs] @typechecked def load_dataset(filepath: str, verbose: bool = False) -> pd.DataFrame: """Load a dataset from a local file in ``.csv`` format into a pandas dataframe. Args: filepath (str): Path to the dataset file, which should be in csv format. verbose (bool, optional): Display information while running. Returns: pd.DataFrame: The dataset loaded from the file. Example: .. code-block:: python df = tl.load_dataset("path/to/data.csv", verbose=True) .. code-block:: console Dataset loaded: x y 0 0.0 1.097485 1 1.0 0.835439 2 2.0 0.655124 """ df = pd.read_csv(filepath) if verbose: print("Dataset loaded:") print(df) return df
[docs] def load_params(filepath: str, verbose: bool = False) -> dict: """Load a parameter set from a local file in ``.json`` format into a dictionary. Args: filepath (str): Path to the dataset file, which should be in json format. verbose (bool, optional): Display information while running. Returns: dict: The parameter set loaded from the file. Example: .. code-block:: python params = tl.load_params("path/to/params.json", verbose=True) """ with open(filepath) as f: params = json.load(f) if verbose: print("Parameters loaded from file:") pprint(params) return params
[docs] def get_sample(df: pd.DataFrame, key: int) -> pd.DataFrame: """Retrieve an individual sample from the multi-indexed dataframe returned by the ``Emulator.sample()`` method. The output from the ``Emulator.sample()`` method is a multi-indexed dataframe where the first level of the index is the parameter name and the second level is the sample number. This convenience method allows you to isolate an individual sample from the dataframe by providing the sample number. The sample is returned as a standard dataframe. Args: df (pd.DataFrame): A multi-indexed dataframe returned by the Emulator.sample() method. key (int): The integer key for the sample to retrieve. Returns: pd.DataFrame: The individual sample as a standard (non-multi-indexed) dataframe. Example: .. code-block:: python df = emulator.sample(df_X, 5) # Generates independent samples tl.get_sample(df, 1) # Isolate sample "1" .. code-block:: console y 0 1.097485 1 0.835439 2 0.655124 """ key_str = str(key) sample_df = df.xs(key=key_str, level=-1, axis="columns") return sample_df
[docs] def join_samples( df_one: pd.DataFrame, df_two: pd.DataFrame, ) -> pd.DataFrame: """Join two dataframes that contain independent samples generated by the ``Emulator.sample()`` method. The output from the ``Emulator.sample()`` method is a multi-indexed dataframe where the first level of the index is the parameter name and the second level is the sample number. This convenience method allows you to join two dataframes that contain independent samples generated by the ``Emulator.sample()`` method together into a single dataframe. Args: df_one (pd.DataFrame): The first multi-indexed dataframe to join. df_two (pd.DataFrame): The second multi-indexed dataframe to join. Returns: pd.DataFrame: The joined dataframe Example: .. code-block:: python df_y1 = emulator.sample(df_X, 1) # Create first set of samples df_y2 = emulator.sample(df_X, 3) # Creates new independent samples tl.join_samples(df_y1, df_y2) .. code-block:: console y 0 1 2 3 0 0.784193 1.308067 0.176582 0.875387 1 0.978259 1.039125 0.646922 0.887118 2 1.086855 0.942270 0.864730 0.934348 """ # Get the number of samples in the first dataframe to offset the indices of the second dataframe n_samples = int(len(df_one.columns) / len(df_one.columns.levels[0])) # Convert the second dataframe to a dictionary df_dict = df_two.to_dict() new_dict, new_keys = {}, {} # Form the new dictionary with updated indices for key in df_dict.keys(): _key = (key[0], str(int(key[1]) + n_samples)) new_keys[key] = _key for key in new_keys: new_dict[new_keys[key]] = df_dict[key] # Convert the new dictionary to a dataframe new_df = pd.DataFrame(new_dict) sorted_df = pd.concat([df_one, new_df], axis=1).sort_index(axis=1, level=0)[ df_one.columns.get_level_values(0).unique() ] return sorted_df