from git import Repo, Diff, Blob from typing import List, Union, Iterator, Tuple, Any from itemchange import ItemChange, get_changed from pathlib import Path from propentry import PropEntry from propsutil import DEFAULT_PROPS_EXTENSION, get_entry_dict def get_text(blob: Blob) -> str: return blob.data_stream.read().decode('utf-8') def get_changed_from_diff(rel_path: str, diff: Diff) -> List[ItemChange]: """Determines changes from a git python diff. Args: rel_path (str): The relative path for the properties file. diff (Diff): The git python diff. Returns: List[ItemChange]: The changes in properties. """ # an item was added if diff.change_type == 'A': changes = get_changed(rel_path, '', get_text(diff.b_blob)) # an item was deleted elif diff.change_type == 'D': changes = get_changed(rel_path, get_text(diff.a_blob), '') # an item was modified elif diff.change_type == 'M': changes = get_changed(rel_path, get_text( diff.a_blob), get_text(diff.b_blob)) else: changes = [] return changes def get_rel_path(diff: Diff) -> Union[str, None]: """Determines the relative path based on the git python. Args: diff: The git python diff. Returns: str: The determined relative path. """ if diff.b_path is not None: return diff.b_path elif diff.a_path is not None: return diff.a_path else: return None def get_diff(repo_path: str, commit_1_id: str, commit_2_id: str) -> Any: """Determines the diff between two commits. Args: repo_path (str): The local path to the git repo. commit_1_id (str): The initial commit for the diff. commit_2_id (str): The latest commit for the diff. Returns: The determined diff. """ repo = Repo(repo_path) commit_1 = repo.commit(commit_1_id) commit_2 = repo.commit(commit_2_id) return commit_1.diff(commit_2) def get_commit_id(repo_path: str, commit_id: str) -> str: """Determines the hash for head commit. This does things like fetch the id of head if 'HEAD' is provided. Args: repo_path: The path to the repo. commit_id: The id for the commit. Returns: The hash for the commit in the repo. """ repo = Repo(repo_path) commit = repo.commit(commit_id) return str(commit.hexsha) def get_property_files_diff(repo_path: str, commit_1_id: str, commit_2_id: str, property_file_extension: str = DEFAULT_PROPS_EXTENSION) -> Iterator[ItemChange]: """Determines the item changes within property files as a diff between two commits. Args: repo_path (str): The repo path. commit_1_id (str): The first git commit. commit_2_id (str): The second git commit. property_file_extension (str): The extension for properties files to gather. Returns: All found item changes in values of keys between the property files. """ diffs = get_diff(repo_path, commit_1_id, commit_2_id) for diff in diffs: rel_path = get_rel_path(diff) if rel_path is None or not rel_path.endswith('.' + property_file_extension): continue yield from get_changed_from_diff(rel_path, diff) def list_paths(root_tree, path: Path = Path('.')) -> Iterator[Tuple[str, Blob]]: """ Given the root path to serve as a prefix, walks the tree of a git commit returning all files and blobs. Args: root_tree: The tree of the commit to walk. path: The path to use as a prefix. Returns: A tuple iterator where each tuple consists of the path as a string and a blob of the file. """ for blob in root_tree.blobs: ret_item = (str(path / blob.name), blob) yield ret_item for tree in root_tree.trees: yield from list_paths(tree, path / tree.name) def get_property_file_entries(repo_path: str, at_commit: str = 'HEAD', property_file_extension: str = DEFAULT_PROPS_EXTENSION) -> Iterator[PropEntry]: """ Retrieves all property files entries returning as an iterator of PropEntry objects. Args: repo_path: The path to the git repo. at_commit: The commit to use. property_file_extension: The extension to use for scanning for property files. Returns: An iterator of PropEntry objects. """ repo = Repo(repo_path) commit = repo.commit(at_commit) for item in list_paths(commit.tree): path, blob = item if path.endswith(property_file_extension): for key, val in get_entry_dict(get_text(blob)).items(): yield PropEntry(path, key, val)