import pandas as pd
[docs]def suppress_controls_in_summary_df(summ_df, regressor_order, dummy_col_dicts, info_dict):
regressors, fe, info = _each_row_set_to_keep_from_regressor_order_dummy_col_dicts_and_info_dict(
regressor_order, dummy_col_dicts, info_dict
)
variables_df, fe_df, info_df = _split_summ_df_into_variables_fixed_effects_info(summ_df, fe, info)
regressors_df, controls_df = _split_variable_df_into_regressors_and_controls(variables_df, regressors)
controls_row = _create_controls_row_as_df(controls_df)
return _combine_dfs(regressors_df, controls_row, fe_df, info_df)
def _all_rows_to_keep_from_regressor_order_dummy_col_dicts_and_info_dict(regressor_order, dummy_col_dicts, info_dict):
regressors, fe, info = _each_row_set_to_keep_from_regressor_order_dummy_col_dicts_and_info_dict(
regressor_order, dummy_col_dicts, info_dict
)
return regressors + fe + info
def _each_row_set_to_keep_from_regressor_order_dummy_col_dicts_and_info_dict(regressor_order, dummy_col_dicts,
info_dict):
regressors = regressor_order.copy()
fe = _extract_all_fe_names_from_dummy_col_dicts(dummy_col_dicts)
info = [col for col in info_dict]
return regressors, fe, info
def _extract_all_fe_names_from_dummy_col_dicts(dummy_col_dicts):
cols = []
# Loop through models, the dummy variable dictionary for each
for dummy_dict in dummy_col_dicts:
# Will come through as None instead of dict if no fixed effects for this model
if not dummy_dict:
continue
for col in dummy_dict:
full_name = col + ' Fixed Effects'
if full_name not in cols:
cols.append(full_name)
return cols
def _split_summ_df_into_variables_fixed_effects_info(df, fe, info):
fe_mask = df.index.isin(fe)
fe_df = df.loc[fe_mask]
info_mask = df.index.isin(info)
info_df = df.loc[info_mask]
variables_mask = ~fe_mask & ~info_mask
variables_df = df.loc[variables_mask]
return variables_df, fe_df, info_df
def _split_variable_df_into_regressors_and_controls(variables_df, regressors, stderr=True):
all_variables = [row for row in variables_df.index if row != '']
controls = [row for row in all_variables if row not in regressors + ['const']]
if stderr:
# Assign index values of name of regressor to stderr rows, that way both coef and stderr
# are referenced by the same index
f = lambda idx: sum([[x, x] for x in idx], [])
variables_df.index = f(all_variables)
# Select df of just regressors, with modified index
regressors_mask = variables_df.index.isin(regressors)
regressors_df = variables_df[regressors_mask]
controls_mask = variables_df.index.isin(controls)
controls_df = variables_df[controls_mask]
if stderr:
# Reset indices so that stderrs have blank indices again
f = lambda idx: sum([[x, ''] for x in idx], [])
regressors_df.index = f(regressors)
controls_df.index = f(controls)
return regressors_df, controls_df
def _create_controls_row_as_df(controls_df):
if controls_df.empty:
# No controls for any model
no_series = pd.Series(['No'] * len(controls_df.columns))
no_series.name = 'Controls'
no_df = pd.DataFrame(no_series).T
no_df.columns = controls_df.columns
return no_df
bool_df = pd.DataFrame(controls_df.apply(lambda x: bool(x.any()), axis=0)).T
yes_no_df = bool_df.applymap(lambda x: 'Yes' if x else 'No')
yes_no_df.index = ['Controls']
return yes_no_df
def _combine_dfs(regressors_df, controls_row, fe_df, info_df):
df_list = [regressors_df, controls_row, fe_df, info_df]
return pd.concat(df_list, axis=0)