Source code for repo_splitter.git_tools.files.renames

import os
import re
from typing import Set, Sequence

from git import Repo


[docs]def all_file_names_which_have_contained_the_lines_in_multiple_files(file_paths: Sequence[str], repo: Repo) -> Set[str]: """ Parses the git log for all lines in multiple files, to determine all the file paths in which these lines have existed. Useful for tracking renames in a repo. :param file_paths: Relative paths to file within repo :param repo: :return: """ all_names = set() for in_file in file_paths: all_names.update( all_file_names_which_have_contained_the_lines_in_a_file(in_file, repo) ) return all_names
[docs]def all_file_names_which_have_contained_the_lines_in_a_file(file_path: str, repo: Repo) -> Set[str]: """ Parses the git log for all lines in a file, to determine all the file paths in which these lines have existed. Useful for tracking renames in a repo. :param file_path: Relative path to file within repo :param repo: :return: """ full_path = os.path.join(repo.working_tree_dir, file_path) if os.path.isdir(full_path): # Cannot detect changes directly on a directory return set() try: log = full_git_history_for_contents_of_file(full_path, repo) except EmptyFileException: return set() unique_matches = get_filenames_from_git_log(log) return unique_matches
[docs]def full_git_history_for_contents_of_file(file_path: str, repo: Repo) -> str: """ Runs git log on all of the lines in a file """ num_lines = file_length(file_path) if num_lines == 0: raise EmptyFileException('could not track history of lines in an empty file') file_search_str = f'1,{num_lines}:{file_path}' log = repo.git.log('--format=oneline', '--compact-summary', '-L', file_search_str) return log
[docs]def get_filenames_from_git_log(git_log: str) -> Set[str]: """ :param git_log: :return: """ pattern = re.compile(r'--- a\/(.+)\n\+\+\+ b\/(.+)') match_tuples = re.findall(pattern, git_log) unique_matches = {file for match_tup in match_tuples for file in match_tup} return unique_matches
[docs]def file_length(file_path: str) -> int: """ Returns the number of lines in a file """ i = -1 with open(file_path) as f: for i, l in enumerate(f): pass return i + 1
[docs]class EmptyFileException(Exception): pass