Source code for pd_utils.corr

from typing import Sequence, Optional

import pandas as pd
import numpy as np


[docs]def formatted_corr_df(df: pd.DataFrame, cols: Optional[Sequence[str]] = None) -> pd.DataFrame: """ Calculates correlations on a DataFrame and displays only the lower triangular of the resulting correlation DataFrame. :param df: :param cols: subset of column names on which to calculate correlations :return: """ if not cols: use_cols = list(df.columns) else: use_cols = list(cols) corr_df = df[use_cols].corr() corr_df = _lower_triangular_of_df(corr_df) return corr_df.applymap(lambda x: f'{x:.2f}' if not isinstance(x, str) else x)
def _lower_triangular_of_df(df): return pd.DataFrame(np.tril(df), index=df.index, columns=df.columns).replace(0, '')