Source code for treecomp.main

import filecmp
import json
import os
from dataclasses import dataclass
from pathlib import Path
from typing import Iterator, List, Optional, Sequence, Set, Union

from treecomp.fs_utils import list_path_filter_by_matchers
from treecomp.ignore import parse_ignore_list_into_matcher
from treecomp.target import parse_target_list_into_matcher
from treecomp.unidiff import (
    _create_unified_diff_of_binary_files,
    _create_unified_diff_of_file_added,
    _create_unified_diff_of_file_removed,
    _create_unified_diff_of_files,
)


[docs]@dataclass(frozen=True) class FileDiff: path: Path left: bool right: bool diff: str
[docs] def dict(self) -> dict: raw = self.__dict__ def prepare_value(value: Union[Path, str, bool]) -> Union[str, bool]: if isinstance(value, bool): return value return str(value) return {k: prepare_value(v) for k, v in raw.items()}
[docs] def json(self, indent: Optional[int] = None) -> str: return json.dumps(self.dict(), indent=indent)
@dataclass(frozen=True) class _FileDiffWithDirs: diff: FileDiff dir1: Path dir2: Path @property def dir_1_path(self) -> Path: return self.dir1 / self.diff.path @property def dir_2_path(self) -> Path: return self.dir2 / self.diff.path @dataclass(frozen=True) class _FolderDiffResults: file_diffs: List[FileDiff]
[docs]@dataclass(frozen=True) class FileTreeComparison: dir1: Path dir2: Path diffs: List[FileDiff] def __str__(self) -> str: return "\n".join(diff.diff for diff in self.diffs) def __iter__(self) -> Iterator[FileDiff]: return iter(self.diffs) def __getitem__(self, item) -> FileDiff: return self.diffs[item] def __len__(self) -> int: return len(self.diffs)
[docs] def diff_for(self, path: Union[str, Path]) -> Optional[FileDiff]: for diff in self.diffs: diff_with_dirs = _FileDiffWithDirs( diff=diff, dir1=self.dir1, dir2=self.dir2 ) if Path(path) in [ diff.path, diff_with_dirs.dir_1_path, diff_with_dirs.dir_2_path, ]: return diff return None
[docs] def json(self, indent: Optional[int] = None) -> str: return json.dumps([diff.dict() for diff in self], indent=indent)
[docs]def diff_file_trees( dir1: Union[str, Path], dir2: Union[str, Path], ignore: Optional[Sequence[str]] = None, target: Optional[Sequence[str]] = None, ) -> FileTreeComparison: """ Compare two folders recursively, returning diffs of files that have differing content """ folder_diff_results = _diff_file_trees( dir1, dir2, ignore=ignore, target=target, ) return FileTreeComparison( dir1=Path(dir1), dir2=Path(dir2), diffs=folder_diff_results.file_diffs, )
def _diff_file_trees( dir1: Union[str, Path], dir2: Union[str, Path], ignore: Optional[Sequence[str]] = None, target: Optional[Sequence[str]] = None, relative_root: Path = Path("."), ) -> _FolderDiffResults: file_diffs: List[FileDiff] = [] left_only: Set[str] = set() right_only: Set[str] = set() common_files: Set[str] = set() dir1 = Path(dir1) dir2 = Path(dir2) ignore_matcher = parse_ignore_list_into_matcher(ignore) target_matcher = parse_target_list_into_matcher(target) def _get_files(dir: Path) -> Set[str]: return set( list_path_filter_by_matchers( dir, ignore_matcher, target_matcher, include_dirs=False, root=relative_root, ) ) def _get_dirs(dir: Path) -> Set[str]: return set( list_path_filter_by_matchers( dir, ignore_matcher, target_matcher, include_files=False, root=relative_root, ) ) left_files = _get_files(dir1) right_files = _get_files(dir2) if not dir1.exists(): right_only = right_files elif not dir2.exists(): left_only = left_files else: left_only = left_files - right_files right_only = right_files - left_files common_files = left_files & right_files # Handle files that are only in one of the directories if len(left_only) > 0: file_diffs.extend( [ FileDiff( path=relative_root / file, left=True, right=False, diff=_create_unified_diff_of_file_removed(dir1 / file), ) for file in left_only if (dir1 / file).is_file() ] ) if len(right_only) > 0: file_diffs.extend( [ FileDiff( path=relative_root / file, left=False, right=True, diff=_create_unified_diff_of_file_added(dir2 / file), ) for file in right_only if (dir2 / file).is_file() ] ) (_, mismatch, errors) = filecmp.cmpfiles(dir1, dir2, common_files, shallow=False) if len(mismatch) > 0: for file in mismatch: diff = _create_unified_diff_of_files( dir1 / file, dir2 / file, str(dir2 / file), str(dir2 / file) ) file_diffs.append( FileDiff( path=relative_root / file, left=True, right=True, diff=diff, ) ) if len(errors) > 0: file_diffs.extend( [ FileDiff( path=relative_root / file, left=True, right=True, diff=_create_unified_diff_of_binary_files( str(dir2 / file), str(dir2 / file) ), ) for file in errors ] ) # Find all directories at this level in both trees all_dir_names: Set[str] = _get_dirs(dir1) | _get_dirs(dir2) for dir in all_dir_names: new_dir1 = os.path.join(dir1, dir) new_dir2 = os.path.join(dir2, dir) new_relative_root = relative_root / dir nested_result = _diff_file_trees( new_dir1, new_dir2, ignore=ignore, target=target, relative_root=new_relative_root, ) file_diffs.extend(nested_result.file_diffs) return _FolderDiffResults(file_diffs=file_diffs)