Source code for regtools.summarize.yesno

import pandas as pd
from typing import List, Dict

ColBoolDict = Dict[str, List[bool]]


[docs]def col_boolean_dict_from_list_of_lists_of_columns( column_list_list: List[List[str]], ) -> ColBoolDict: """ transforms [ ['a', 'b'], ['a'], ['b'], None, False, [], ['a','b'] ] into { 'a': [True, True, False, False, False, False, True], 'b': [True, False, True, False, False, False, True] } Returns: """ all_columns = [] for column_list in column_list_list: if column_list: all_columns.extend(column_list) all_columns = list(set(all_columns)) col_bool_dict: Dict[str, List[bool]] = {col: [] for col in all_columns} for column_list in column_list_list: if not column_list: # None, False, [] for column list. Just add False to all columns for column in col_bool_dict: col_bool_dict[column].append(False) else: # Valid column list for column in col_bool_dict: if column in column_list: col_bool_dict[column].append(True) else: col_bool_dict[column].append(False) return col_bool_dict
def _standardize_col_boolean_dict(summ_df, fixed_effect_dict): num_models = len(summ_df.columns) out_dict = {} for fixed_effect_name, booleans in fixed_effect_dict.items(): # Here we are being passed a list of booleans matching the size of models. # This is the correct format so just output if (isinstance(booleans, list)) and (len(booleans) == num_models): out_booleans = booleans # Here we are being passed a single boolean or a list with single boolean # Need to expand to cover all models else: if not isinstance(booleans, list): booleans = [booleans] if len(booleans) > 1: raise ValueError( f"Incorrect shape of booleans for {fixed_effect_name} fixed effect passed. Got {len(booleans)} bools, was expecting {num_models}" ) out_booleans = [booleans[0]] * num_models # Final input checks assert isinstance(out_booleans, list) assert all([isinstance(b, bool) for b in out_booleans]) assert isinstance(fixed_effect_name, str) # Assign to output dictionary out_dict[fixed_effect_name] = out_booleans return out_dict def _add_yes_no_row(summ_df, bool_list, item_name="Time Fixed Effects"): yes_no_row = pd.DataFrame( [_get_yes_no(bool_) for bool_ in bool_list], columns=[item_name] ).T yes_no_row.columns = summ_df.columns return pd.concat([summ_df, yes_no_row], axis=0) def _get_yes_no(bool_): if bool_: return "Yes" else: return "No"