Source code for repo_splitter.git_tools.files.wanted
import os
from typing import Sequence, List
import glob
from git import Repo
from repo_splitter.git_tools.files.renames import all_file_names_which_have_contained_the_lines_in_multiple_files
[docs]def get_desired_files_from_patterns(repo: Repo, file_patterns: Sequence[str],
follow_renames: bool = True) -> List[str]:
"""
Pass glob file patterns relative to repo root such as data/** or code/* or code/my_module.py
Handles resolving within the repo, and expanding globs into full relative file paths
:param repo:
:param file_patterns: A sequence of glob file patterns relative to repo root such as
data/** or code/* or code/my_module.py
:param follow_renames: Whether to track previous names of files from the history and also include those
:return:
"""
# TODO: needs to handle passing patterns which match old files not in the current working directory
current_dir = os.getcwd()
os.chdir(repo.working_tree_dir)
all_files = []
for file_pattern in file_patterns:
all_files.extend(glob.glob(file_pattern, recursive=True))
os.chdir(current_dir)
if follow_renames:
print(f'Following renames for {all_files}')
all_files_set = set(all_files)
new_files = all_file_names_which_have_contained_the_lines_in_multiple_files(all_files, repo)
print(f'After tracking renames, added {new_files.difference(all_files_set)} to file list.')
all_files_set.update(new_files)
all_files = list(all_files_set)
return all_files