Source code for capiq_excel.ids
from typing import Sequence, List
import pandas as pd
from exceldriver.tools import _start_excel_with_addins_and_attach
from processfiles.files import FileProcessTracker
from .workbook.populate import populate_capiq_ids_for_file
from capiq_excel.workbook.create import create_all_xlsx_with_id_commands
[docs]def download_capiq_ids(ids: Sequence[str], outpath: str = 'capiq ids.csv', folder: str = 'in_process_ids') -> List[str]:
"""
Downloads Capital IQ identifiers when passed other identifiers such as CUSIP,
ISIN, ticker, name, etc.
Stores in a CSV with matched names included and also returns capiq ids as a list
:param ids: identifiers such as CUSIP, ISIN, ticker, name. Can be a mixture.
:param folder: folder which will hold in process files
:param outpath: filepath to output csv, including file extension
:return: capiq ids
"""
print('Creating XLSX files with commands to get ids')
create_all_xlsx_with_id_commands(ids, folder, num_files=100)
print('Populating XLSX files for ids')
populate_all_ids_in_folder(folder)
print('Combining all ids into a single CSV file')
combine_all_capiq_ids_xlsx(folder, outpath)
return _get_ids_from_csv_path(outpath)
[docs]def populate_all_ids_in_folder(folder, restart=True):
excel = _start_excel_with_addins_and_attach()
file_tracker = FileProcessTracker(folder=folder, restart=restart, file_types=('xlsx',))
for file in file_tracker.file_generator():
populate_capiq_ids_for_file(file, excel)
[docs]def combine_all_capiq_ids_xlsx(infolder, outpath, restart=True):
df = pd.DataFrame()
file_tracker = FileProcessTracker(folder=infolder, restart=restart, file_types=('xlsx',))
for file in file_tracker.file_generator():
df = _append_capiq_xlsx_to_df(df, file)
_remove_useless_cols(df)
df.to_csv(outpath, index=False)
return df
def _append_capiq_xlsx_to_df(df, filepath):
temp_df = pd.read_excel(filepath)
df = df.append(temp_df)
return df
def _remove_useless_cols(df):
"""
NOTE: inplace
"""
blank_cols = [col for col in df.columns if 'blank' in col.lower()]
useless_cols = ['ID'] + blank_cols
df.drop(useless_cols, axis=1, inplace=True)
def _get_ids_from_csv_path(csv_path: str) -> List[str]:
df = pd.read_csv(csv_path)
return df['IQID'].tolist()