Source code for regtools.select

from typing import Sequence, List, Any


[docs]def select_models(reg_list, keepnum: int, xvars: Sequence[str]): """ Takes a list of fitted regression models and selects among them based on adjusted R-Squared. For each number of variables involved in the regressions, keepnum with the highest R-squareds will be kept. For example, if reg_list contains 3 regressions with two variables and 6 regressions with three variables, and keepnum is 2, will return a list of four regressions, 2 with two variables and 2 with three variables. :param reg_list: :param keepnum: number to keep for each amount of x variables. The total number of outputted regressions will be roughly keepnum * len(xvars) :param xvars: column names of x variables :return: """ outlist: List[Any] = [] for i in range(1, len(xvars) + 1): reg_list_match = [reg for reg in reg_list if reg.df_model == i] #select models with this many variables try: r2_min = sorted([reg.rsquared_adj for reg in reg_list_match])[-keepnum] #gets keepnumth highest r2 except IndexError: #should happen once there are less models run than keepnum (i.e. with all xvars) r2_min = sorted([reg.rsquared_adj for reg in reg_list_match])[0] #gets lowest r2 (keep all) outlist += [reg for reg in reg_list_match if reg.rsquared_adj >= r2_min] return outlist