Source code for capiq_excel.downloader.tools
import pandas as pd
import warnings
from capiq_excel.downloader.timeout import TimeoutWrapper
from capiq_excel.fileops import get_path_of_failed_folder_add_if_necessary, move_file_to_failed_folder, get_path_of_additional_failed_folder_add_if_necessary
from capiq_excel.workbook.populate import populate_capiq_for_file
from exceldriver.tools import _start_excel_with_addins_and_attach, _get_excel_running_workbook, _restart_excel_with_addins_and_attach, NoExcelWorkbookException
from processfiles.files import FileProcessTracker
[docs]def populate_all_files_in_folder(folder, restart=True, timeout=240, run_failed=False):
"""
"""
_validate_populate_inputs(folder, restart, run_failed)
excel = _start_excel_with_addins_and_attach()
failed_folder = get_path_of_failed_folder_add_if_necessary(folder)
if run_failed:
# Set main folder as 'failed', then set failed folder as another failed folder inside the original
folder = failed_folder
failed_folder = get_path_of_additional_failed_folder_add_if_necessary(folder)
file_tracker = FileProcessTracker(folder=folder, restart=restart, file_types=('xlsx',))
populate_all_files_in_folder_with_timeout = TimeoutWrapper(timeout, _populate_capiq_for_multiprocess, timeout_callback=_return_false)
for i, file in enumerate(file_tracker.file_generator()):
successful = populate_all_files_in_folder_with_timeout(file, index=i + 1)
if not successful:
move_file_to_failed_folder(file, failed_folder)
def _get_company_id_list(id_filepath, id_col='IQID'):
df = pd.read_csv(id_filepath, usecols=[id_col])
unique = df[id_col].unique().tolist()
# Drop nans
return [i for i in unique if not pd.isnull(i)]
def _validate_populate_inputs(folder, restart, run_failed):
assert not (restart and run_failed)
if run_failed:
warnings.warn(f'run_failed flag passed. Folder {folder}, will not be run, instead the failed folder will')
### Functions below to assist with multiprocessing/timeout handling
def _populate_capiq_for_multiprocess(file, **kwargs):
try:
excel = _get_excel_running_workbook('Book1.xlsx')
except NoExcelWorkbookException:
excel = _restart_excel_with_addins_and_attach()
excel, successful = populate_capiq_for_file(file, excel, **kwargs)
return successful
def _restart_excel_and_return_false():
excel = _restart_excel_with_addins_and_attach(max_retries=10)
return False
def _return_false():
return False
## END TEMP