Source code for finstmt.findata.period_data

import json
import warnings
from copy import deepcopy
from typing import Dict, List, Optional, cast

import numpy as np
import pandas as pd

from finstmt.clean.name import standardize_names_in_series_index
from finstmt.config_manage.data import DataConfigManager
from finstmt.exc import CouldNotParseException
from finstmt.findata.statement_item import StatementItem


[docs]class PeriodFinancialData: """ Base class for financial statement data. Should not be used directly. """ config_manager: DataConfigManager prior_statement: Optional["PeriodFinancialData"] unextracted_names: List[str] statement_items: Dict[str, StatementItem] # TODO: Set this via user config maximum_display_verbosity = 1
[docs] def __init__( self, data_dict: Dict[str, float], config_manager: DataConfigManager, unextracted_names: List[str], prior_statement: Optional["PeriodFinancialData"] = None, ): self.config_manager = DataConfigManager(deepcopy(config_manager.configs)) self.prior_statement = prior_statement self.unextracted_names = unextracted_names self.statement_items = {} for item in self.config_manager: self.statement_items[item.key] = StatementItem( item_config=deepcopy(item), value=data_dict.get(item.key, None), )
def _repr_html_(self): series = self.to_series() df = pd.DataFrame(series) return df.applymap( lambda x: f"${x:,.0f}" if not x == 0 else " - " )._repr_html_() def __repr__(self) -> str: statement_items: dict = cast(dict, self.statement_items) results = {} for k, v in statement_items.items(): val = v.get_value(self) # Some properties, e.g., nwc and effective tax rate, may be associated with a statements, but we don't # necessarily want to display it on the print-out if (val != 0) and ( v.item_config.display_verbosity <= self.maximum_display_verbosity ): results[k] = val return json.dumps(results, indent=2) def __dir__(self): normal_attrs = [ "config_manager", "prior_statement", "unextracted_names", "statement_items", "from_series", "to_series", "dict", ] return normal_attrs + list(self.statement_items.keys())
[docs] @classmethod def from_series( cls, series: pd.Series, config_manager: DataConfigManager, prior_statement: Optional["PeriodFinancialData"] = None, ): for_lookup = deepcopy(series) standardize_names_in_series_index(for_lookup) data_dict: Dict[str, float] = {} extracted_name_dict: Dict[str, str] = {} original_name_dict: Dict[str, str] = {} unextracted_names: List[str] = [] for i, name in enumerate(for_lookup.index): orig_name = series.index[i] for item_config in config_manager: if item_config.extract_names is None: # Not an extractable item, must be a calculated item continue if name in item_config.extract_names: # Got a match for series name to allowed names if item_config.key in data_dict: # Multiple matches for data item. # First see if data is the same, then just skip if for_lookup[name] == data_dict[item_config.key]: continue # Data is not the same, so take the one which is # earliest in extract_names current_match_idx = item_config.extract_names.index(name) existing_match_idx = item_config.extract_names.index( extracted_name_dict[item_config.key] ) current_match_is_preferred = ( current_match_idx < existing_match_idx ) if current_match_is_preferred: warnings.warn( f"Previously had {item_config.key} " f'extracted from "{original_name_dict[item_config.key]}". Replacing with ' f'value from "{orig_name}"' ) else: warnings.warn( f'Found {item_config.key} from "{orig_name}" but already ' f"had extracted from " f'"{original_name_dict[item_config.key]}" which has higher priority, ' f'keeping value from "{original_name_dict[item_config.key]}"' ) continue data_dict[item_config.key] = for_lookup[name] extracted_name_dict[item_config.key] = name original_name_dict[item_config.key] = orig_name if name not in extracted_name_dict.values(): unextracted_names.append(orig_name) if not data_dict: raise CouldNotParseException( "Passed Series did not have any statement items in the index. " "Got index:", series.index, ) return cls( data_dict=data_dict, config_manager=config_manager, unextracted_names=unextracted_names, prior_statement=prior_statement, )
[docs] def to_series(self) -> pd.Series: data_dict = {} for item_config in self.config_manager: data_dict[item_config.display_name] = getattr(self, item_config.key) return pd.Series(data_dict).fillna(0)
def __getattr__(self, key: str): try: statement_item = self.statement_items[key] except KeyError: raise AttributeError(key) return np.float64(statement_item.get_value(self))