Source code for regtools.fe.dataprep

import pandas as pd
from typing import Tuple, Optional, List, Union

from ..tools import _to_list_if_str

DfListTuple = Tuple[pd.DataFrame, Optional[list]]


[docs]def fixed_effects_reg_df_and_cols_dict(df, fe_vars): fe_vars = _to_list_if_str(fe_vars) fe_cols_dict = {} for fe_var in fe_vars: df, cols = _fixed_effects_reg_df_and_cols(df, fe_var) fe_cols_dict[fe_var] = cols return df, fe_cols_dict
def _fixed_effects_reg_df_and_cols(df, fe_var): dummies = _get_dummy_df(df, fe_var) dummy_cols = [col for col in dummies.columns] fe_df = pd.concat([df, dummies], axis=1) if fe_var in fe_df.columns: fe_df.drop(fe_var, axis=1, inplace=True) return fe_df, dummy_cols def _get_dummy_df(df: pd.DataFrame, fe_var: str) -> pd.DataFrame: dummy_calc_df, index_cols = _get_dummy_calc_df(df, fe_var) dummies = pd.get_dummies(dummy_calc_df[fe_var].astype(str)) if index_cols is not None: # meed to add index back to dummy df dummies = pd.concat([dummy_calc_df[index_cols], dummies], axis=1) dummies.set_index(index_cols, inplace=True) dummies = dummies.iloc[:, 1:] # drop first dummy, this will be the excluded group return dummies def _get_dummy_calc_df(df: pd.DataFrame, fe_var: str) -> DfListTuple: index_cols: Optional[List[Union[str, float, int]]] if fe_var in df.index.names: # Won't work with fe_var in index. Need to reset index for calculation index_cols = [col for col in df.index.names] for_calc_df = df.reset_index() elif fe_var in df.columns: for_calc_df = df # fe_var is in columns, no extra processing needed before calculating dummies index_cols = None else: raise ValueError( f"fixed effects variable {fe_var} must be in columns or index." ) return for_calc_df, index_cols