Source code for finstmt.loaders.capiq

import datetime
import re
from typing import Optional, Union

import numpy as np
import pandas as pd

COLUMN_NAME_PATTERN = re.compile(
    r"((Restated)|(Reclassified)|(12 months)|(3 months)|(Q\d)|(\n))*(?P<date>\w\w\w-\d\d-\d\d\d\d)"
)


[docs]def load_capiq_df(file_path: str, sheet_name: str) -> pd.DataFrame: """ Loads financial statements downloaded from Capital IQ into a DataFrame which can be passed into IncomeStatements or BalanceSheets """ df = pd.read_excel(file_path, index_col=0, sheet_name=sheet_name, skiprows=14) # Rename columns, extracting date col_names = [_extract_date(col) for col in df.columns] df.columns = col_names # Drop non period ends such as LTM valid_col_names = [col for col in col_names if col is not None] df = df[valid_col_names] # Fill in - with mising df = df.replace("-", np.nan) return df
def _extract_date( column_name: Union[pd.Timestamp, datetime.datetime, str] ) -> Optional[Union[pd.Timestamp, datetime.datetime]]: """ Extracts column date from Capital IQ columns. Returns None for LTM columns """ if isinstance(column_name, (datetime.datetime, pd.Timestamp)): return column_name match = COLUMN_NAME_PATTERN.match(column_name) if not match: return None date_str = match.group("date") return pd.to_datetime(date_str)