from typing import Tuple
import pandas as pd
from functools import partial
from statsmodels.api import OLS
import re
LAG_NAME_PATTERN = re.compile(r'(.+)\$_{t - (\d+)}\$')
[docs]class SimplifiedBase:
def _set_attrs(self, attr_dict):
for attr in attr_dict:
setattr(self, attr, attr_dict[attr])
[docs]class SimplifiedRegressionResult(SimplifiedBase):
direct_attrs = [
'params', 'pvalues', 'tvalues', 'nobs', 'rsquared_adj', 'bse', 'conf_int',
'normalized_cov_params', 'cov_params_default', 'scale', 'cov_params',
't_test'
]
model_attrs = ['exog_names', 'endog_names']
[docs] def __init__(self, **kwargs):
_validate_attrs(kwargs, self.direct_attrs + self.model_attrs)
model_kwargs = SimplifiedRegressionResult.pop_model_attrs(kwargs)
self._set_attrs(kwargs)
self.model = SimplifiedModel(**model_kwargs)
[docs] @classmethod
def from_statsmodels_result(cls, result):
# Get direct attributes
result_dict = _extract_attrs_into_dict(result, cls.direct_attrs)
# Get attributes of model
result_dict.update(_extract_attrs_into_dict(result.model, cls.model_attrs))
return cls(**result_dict)
[docs] @classmethod
def pop_model_attrs(cls, attr_dict):
"""
Note: pops from attr_dict inplace
"""
outdict = {}
for attr in attr_dict:
if attr in cls.model_attrs:
outdict[attr] = attr_dict[attr]
# Must pop separately as cannot change size of iterating dict
[attr_dict.pop(attr) for attr in outdict]
return outdict
[docs]class SimplifiedModel(SimplifiedBase):
[docs] def __init__(self, **kwargs):
self._set_attrs(kwargs)
[docs]class UnsupportedResultAttributeException(Exception):
pass
def _validate_attrs(attr_dict, valid_attrs):
for attr in attr_dict:
if attr not in valid_attrs:
raise UnsupportedResultAttributeException(f'Attribute {attr} not supported for SimplifiedRegressionResult')
def _extract_attrs_into_dict(obj, attrs):
result_dict = {}
# Get direct attributes
for attr in attrs:
value = getattr(obj, attr)
if isinstance(value, (pd.Series, pd.DataFrame, list, dict)):
value = value.copy()
result_dict[attr] = value
return result_dict
[docs]def remove_lag_names_from_reg_results(reg_list, lags=(1,)):
"""
Note: partially inplace
"""
out_reg_list = []
for ambiguous_result in reg_list:
# Determine type of result
if isinstance(ambiguous_result, tuple): # Tuple of result, fe_dict
result = ambiguous_result[0]
else: # just a single result
result = ambiguous_result
# Actually replace names
result = _remove_lag_name_from_reg_result(result, lags=lags)
# Add to output, depending on type of result
if isinstance(ambiguous_result, tuple): # Tuple of result, fe_dict
out_reg_list.append((result, ambiguous_result[1]))
else: # just a single result
out_reg_list.append(result)
return out_reg_list
def _remove_lag_name_from_reg_result(result, lags=(1,)):
"""
Note: partially inplace
"""
result = SimplifiedRegressionResult.from_statsmodels_result(result)
# Modify base properties inplace
[
_remove_lag_names_from_ambiguous_property(getattr(result, item), lags=lags) for item in (
'params', 'pvalues', 'tvalues', 'bse', 'normalized_cov_params'
)
]
# Modify model properties and reassign (functions not inplace)
for attr in ['endog_names', 'exog_names']:
setattr(
result.model,
attr,
_remove_lag_names_from_ambiguous_property(
getattr(result.model, attr),
lags=lags)
)
return result
def _remove_lag_names_from_ambiguous_property(ambiguous, lags=(1,)):
"""
Note: Series and DataFrame operations inplace, str and list operations not inplace
"""
if isinstance(ambiguous, pd.DataFrame):
lag_func = partial(_remove_lag_names_from_df_index_and_columns, ambiguous)
elif isinstance(ambiguous, pd.Series):
lag_func = partial(_remove_lag_names_from_series_index, ambiguous)
elif isinstance(ambiguous, str):
lag_func = partial(_remove_lag_names_from_varname, ambiguous)
elif isinstance(ambiguous, list):
lag_func = partial(_remove_lag_names_from_list, ambiguous)
else:
raise ValueError(f'Must pass DataFrame, Series, str, or list. Got type {type(ambiguous)}')
return lag_func(lags=lags)
def _remove_lag_names_from_df_index_and_columns(df, lags=(1,)):
"""
Note: inplace
"""
[_remove_one_lag_names_from_df_index_and_columns(df, num_lags=num_lags) for num_lags in lags]
def _remove_lag_names_from_series_index(series, lags=(1,)):
"""
Note: inplace
"""
[_remove_one_lag_names_from_series_index(series, num_lags=num_lags) for num_lags in lags]
def _remove_one_lag_names_from_df_index_and_columns(df, num_lags=1):
"""
Note: inplace
"""
rename_dict = {col: lag_varname_to_varname(col, num_lags=num_lags) for col in df.index}
df.index = df.index.to_series().replace(rename_dict)
df.columns = df.columns.to_series().replace(rename_dict)
def _remove_one_lag_names_from_series_index(series, num_lags=1):
"""
Note: inplace
"""
rename_dict = {col: lag_varname_to_varname(col, num_lags=num_lags) for col in series.index}
series.index = series.index.to_series().replace(rename_dict)
def _remove_lag_names_from_list(list_, lags=(1,)):
for lag in lags:
list_ = _remove_one_lag_names_from_list(list_, lag)
return list_
def _remove_one_lag_names_from_list(list_, num_lags=1):
return [lag_varname_to_varname(item, num_lags=num_lags) for item in list_]
def _remove_lag_names_from_varname(varname, lags=(1,)):
for lag in lags:
varname = lag_varname_to_varname(varname, lag)
return varname
[docs]def lag_varname_to_varname(varname, num_lags=1):
return varname.replace(rf'$_{{t - {num_lags}}}$', '')
[docs]def lag_varname_to_varname_and_lag(varname: str) -> Tuple[str, int]:
match = LAG_NAME_PATTERN.match(varname)
if match is None:
raise VariableIsNotLaggedVariableException(f'could not parse {varname} as a lagged name')
base_varname = match.group(1)
lag_num = int(match.group(2))
return base_varname, lag_num
[docs]class VariableIsNotLaggedVariableException(Exception):
pass