Source code for repo_splitter.git_tools.history

from typing import Sequence
import re

from git import Repo, GitCommandError

from repo_splitter.git_tools.files.unwanted import get_unwanted_files_from_repo
from repo_splitter.git_tools.files.wanted import get_desired_files_from_patterns


START_COMMIT_PATTERN = r"Start of output for commit: [\d\w]+"


[docs]def remove_history_for_files_not_matching(repo: Repo, file_patterns: Sequence[str], follow_renames: bool = True): wanted_files = get_desired_files_from_patterns(repo, file_patterns, follow_renames=follow_renames) _remove_history_except_for_files(repo, wanted_files)
[docs]def remove_history_for_files_matching(repo: Repo, file_patterns: Sequence[str], follow_renames: bool = True): unmatched_files = get_unwanted_files_from_repo(repo, file_patterns, follow_renames=follow_renames) _remove_history_except_for_files(repo, unmatched_files)
def _remove_history_except_for_files(repo: Repo, files: Sequence[str]) -> str: # Regex match for grep. Need to include ^$ as git log sends back one empty line, this will remove it starts_with_wanted_files = ['^' + file for file in files + ['$']] wanted_files_str = '|'.join(starts_with_wanted_files) # Now form ALL_FILES in bash as the files which should be removed. git log will return all the files which # were ever added (A), renamed (R), or copied (C). Then using grep with the -v flag means take the files # not matching the passed files. If if condition is there because if ALL_FILES is empty, this means that the only # remaining files are the desired files, which means that nothing should be done. index_filter_cmd = f""" ALL_FILES=$(git log --pretty=format: --name-only --diff-filter=ARC | sort -u | grep -vE "{wanted_files_str}"); if [ -n "$ALL_FILES" ]; then printf "$ALL_FILES" | xargs --delimiter="\\n" git rm -rf --cached --ignore-unmatch; fi """.strip() return index_filter_branch(repo, index_filter_cmd)
[docs]def index_filter_branch(repo: Repo, index_filter_cmd: str) -> str: # Add debug info index_filter_cmd = f""" set -x; echo "\n\n\nStart of output for commit: $GIT_COMMIT"; echo $(printenv); {index_filter_cmd}; EXIT_CODE=$?; echo "End of output for commit: $GIT_COMMIT \n\n\n"; (exit $EXIT_CODE); """ return _filter_branch(repo, '--prune-empty', '--index-filter', index_filter_cmd, '--', '--all')
def _filter_branch(repo: Repo, *args, **kwargs): try: output = repo.git.filter_branch(*args, **kwargs) return output except GitCommandError as e: exc = GitFilterBranchException.from_git_command_error(e) raise exc from None
[docs]class GitFilterBranchException(Exception):
[docs] def __init__(self, status: int, stdout: str, stderr: str, *args, **kwargs): self.status = status self.stdout = stdout self.stderr = stderr
[docs] @classmethod def from_git_command_error(cls, git_error: GitCommandError): return cls( git_error.status, git_error.stdout, git_error.stderr, )
@staticmethod def _extract_from_last_commit(output: str) -> str: last_commit_start = [match for match in re.finditer(START_COMMIT_PATTERN, output)][-1].start(0) return output[last_commit_start:] def __str__(self): try: stdout = GitFilterBranchException._extract_from_last_commit(self.stdout) stderr = GitFilterBranchException._extract_from_last_commit(self.stderr) except IndexError: stdout = self.stdout stderr = self.stderr message = f""" Stdout: {stdout} Stderr: {stderr} Note: Full stdout and stderr available in exception exc.stdout and exc.stderr """ return message