2020-06-30 14:54:25 -04:00

221 lines
6.8 KiB
Python

"""This script determines the updated, added, and deleted properties from the '.properties-MERGED' files
and generates a csv file containing the items changed. This script requires the python libraries:
gitpython and jproperties. As a consequence, it also requires git >= 1.7.0 and python >= 3.4.
"""
from git import Repo
from typing import List, Dict, Tuple
import re
import csv
from jproperties import Properties
import sys
class ItemChange:
def __init__(self, rel_path: str, key: str, prev_val: str, cur_val: str):
"""Describes the change that occurred for a particular key of a properties file.
Args:
rel_path (str): The relative path of the properties file.
key (str): The key in the properties file.
prev_val (str): The previous value for the key.
cur_val (str): The current value for the key.
"""
self.rel_path = rel_path
self.key = key
self.prev_val = prev_val
self.cur_val = cur_val
if ItemChange.has_str_content(cur_val) and not ItemChange.has_str_content(prev_val):
self.type = 'ADDITION'
elif not ItemChange.has_str_content(cur_val) and ItemChange.has_str_content(prev_val):
self.type = 'DELETION'
else:
self.type = 'CHANGE'
@staticmethod
def has_str_content(content: str):
"""Determines whether or not the content is empty or None.
Args:
content (str): The text.
Returns:
bool: Whether or not it has content.
"""
return content is not None and len(content.strip()) > 0
@staticmethod
def get_headers() -> List[str]:
"""Returns the csv headers to insert when serializing a list of ItemChange objects to csv.
Returns:
List[str]: The column headers
"""
return ['Relative Path', 'Key', 'Change Type', 'Previous Value', 'Current Value']
def get_row(self) -> List[str]:
"""Returns the list of values to be entered as a row in csv serialization.
Returns:
List[str]: The list of values to be entered as a row in csv serialization.
"""
return [
self.rel_path,
self.key,
self.type,
self.prev_val,
self.cur_val]
def get_entry_dict(diff_str: str) -> Dict[str, str]:
"""Retrieves a dictionary mapping the properties represented in the string.
Args:
diff_str (str): The string of the properties file.
Returns:
Dict[str,str]: The mapping of keys to values in that properties file.
"""
props = Properties()
props.load(diff_str, "utf-8")
return props.properties
def get_item_change(rel_path: str, key: str, prev_val: str, cur_val: str) -> ItemChange:
"""Returns an ItemChange object if the previous value is not equal to the current value.
Args:
rel_path (str): The relative path for the properties file.
key (str): The key within the properties file for this potential change.
prev_val (str): The previous value.
cur_val (str): The current value.
Returns:
ItemChange: The ItemChange object or None if values are the same.
"""
if (prev_val == cur_val):
return None
else:
return ItemChange(rel_path, key, prev_val, cur_val)
def get_changed(rel_path: str, a_str: str, b_str: str) -> List[ItemChange]:
"""Given the relative path of the properties file that
Args:
rel_path (str): The relative path for the properties file.
a_str (str): The string representing the original state of the file.
b_str (str): The string representing the current state of the file.
Returns:
List[ItemChange]: The changes determined.
"""
print('Retrieving changes for {}...'.format(rel_path))
a_dict = get_entry_dict(a_str)
b_dict = get_entry_dict(b_str)
all_keys = set().union(a_dict.keys(), b_dict.keys())
mapped = map(lambda key: get_item_change(
rel_path, key, a_dict.get(key), b_dict.get(key)), all_keys)
return filter(lambda entry: entry is not None, mapped)
def get_text(blob) -> str:
return blob.data_stream.read().decode('utf-8')
def get_changed_from_diff(rel_path: str, diff) -> List[ItemChange]:
"""Determines changes from a git python diff.
Args:
rel_path (str): The relative path for the properties file.
diff: The git python diff.
Returns:
List[ItemChange]: The changes in properties.
"""
# an item was added
if diff.change_type == 'A':
changes = get_changed(rel_path, '', get_text(diff.b_blob))
# an item was deleted
elif diff.change_type == 'D':
changes = get_changed(rel_path, get_text(diff.a_blob), '')
# an item was modified
elif diff.change_type == 'M':
changes = get_changed(rel_path, get_text(
diff.a_blob), get_text(diff.b_blob))
else:
changes = []
return changes
def get_rel_path(diff) -> str:
"""Determines the relative path based on the git python.
Args:
diff: The git python diff.
Returns:
str: The determined relative path.
"""
if diff.b_path is not None:
return diff.b_path
elif diff.a_path is not None:
return diff.a_path
else:
return '<Uknown Path>'
def write_diff_to_csv(repo_path: str, output_path: str, commit_1_id: str, commit_2_id: str):
"""Determines the changes made in '.properties-MERGED' files from one commit to another commit.
Args:
repo_path (str): The local path to the git repo.
output_path (str): The output path for the csv file.
commit_1_id (str): The initial commit for the diff.
commit_2_id (str): The latest commit for the diff.
"""
repo = Repo(repo_path)
commit_1 = repo.commit(commit_1_id)
commit_2 = repo.commit(commit_2_id)
diffs = commit_1.diff(commit_2)
with open(output_path, 'w', newline='') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(ItemChange.get_headers())
for diff in diffs:
rel_path = get_rel_path(diff)
if not rel_path.endswith('.properties-MERGED'):
continue
changes = get_changed_from_diff(rel_path, diff)
for item_change in changes:
writer.writerow(item_change.get_row())
def print_help():
"""Prints a quick help message.
"""
print("diffscript.py [path to repo] [csv output path] [commit for previous release] [commit for current release (optional; defaults to 'HEAD')]")
def main():
if len(sys.argv) <= 3:
print_help()
sys.exit(1)
repo_path = sys.argv[1]
output_path = sys.argv[2]
commit_1_id = sys.argv[3]
commit_2_id = sys.argv[4] if len(sys.argv) > 4 else 'HEAD'
write_diff_to_csv(repo_path, output_path, commit_1_id, commit_2_id)
sys.exit(0)
if __name__ == "__main__":
main()