Source code for regtools.lag.plot

import pandas as pd
import matplotlib.pyplot as plt

from regtools.interact import _interaction_tuple_to_var_name
from regtools.iter import _set_interaction_tuples
from regtools.hypothesis.lincom import hypothesis_test

# TODO [#2]: make lag plot module flexible to not having interaction


[docs]def interacted_lag_plot_from_reg_result_list( results, lag_tuple, main_iv, interaction_tuples, yvar, interaction_var_value=13.27, date_var="Year", outpath=None, clear_figure=True, ): plot_df = produce_simplified_result_df( results, lag_tuple, main_iv, interaction_tuples, interaction_var_value=interaction_var_value, ) return interacted_lag_plot_from_lag_result_df( plot_df, yvar, main_iv, date_var=date_var, outpath=outpath, clear_figure=clear_figure, )
[docs]def interacted_lag_plot_from_lag_result_df( result_df, yvar, main_iv, date_var="Year", outpath=None, clear_figure=True ): """ Creates a plot of effect of main_iv on yvar at different quantiles. To be used after reg_for_each_quantile_produce_result_df :param result_df: pd.DataFrame, result from reg_for_each_quantile_produce_result_df :param yvar: str, label of dependent variable :param main_iv: str, label of independent variable of interest :param outpath: str, filepath to output figure. must include matplotlib supported extension such as .pdf or .png :param clear_figure: bool, True wipe memory of matplotlib figure after running function :return: """ p1 = plt.plot(result_df.q, result_df.b, color="black", label="Lag Regression Slope") p2 = plt.fill_between( result_df.q, result_df.ub, result_df.lb, color="gray", label="95% Confidence Interval Bound", ) plt.title(f"Effect of {main_iv} on {yvar} over Time") plt.ylabel(r"Lag Coefficient") plt.xlabel(f"Number of {date_var}s Lagged") plt.legend() if outpath: plt.savefig(outpath) else: plt.show() if clear_figure: plt.clf()
[docs]def produce_simplified_result_df( results, lag_tuple, main_iv, interaction_tuples, interaction_var_value=13.27 ): simple_results = _produce_simplified_results( results, lag_tuple, main_iv, interaction_tuples, interaction_var_value=interaction_var_value, ) return pd.DataFrame(simple_results, columns=["q", "a", "b", "lb", "ub"])
def _produce_simplified_results( results, lag_tuple, main_iv, interaction_tuples, interaction_var_value=13.27 ): interaction_tuples = _set_interaction_tuples(interaction_tuples, len(results)) return [ _produce_simplified_result_list_from_one_result( result, lag_tuple[i], main_iv, interaction_tuples[i], interaction_var_value=interaction_var_value, ) for i, result in enumerate(results) ] def _produce_simplified_result_list_from_one_result( res, lag, main_iv, interaction_tuple, interaction_var_value=13.27 ): # Handle possibility of dummy cols dict coming through with result as tuple if isinstance(res, tuple): res = res[0] interaction_name = _interaction_tuple_to_var_name(interaction_tuple) # Just reassign names for brevity b_iv = res.params[main_iv] b_interact = res.params[interaction_name] # 95% confidence interval +/- amount # Get standard error of linear combination col_coef_dict = {main_iv: 1, interaction_name: interaction_var_value} hypothesis_result = hypothesis_test(res, col_coef_dict=col_coef_dict) return [ lag, res.params["const"], hypothesis_result.effect, hypothesis_result.conf_int()[0][0], # lower 95% confidence interval hypothesis_result.conf_int()[0][1], # upper 95% confidence interval ]