From 2a23c34b7cbeed082c2cb032a0722aaecce858e7 Mon Sep 17 00:00:00 2001 From: Greg DiCristofaro Date: Fri, 17 Jul 2020 10:06:50 -0400 Subject: [PATCH] utf-8-sig now for csv read & write; value regex (which needs to be fully implemented) --- .../localization_scripts/allbundlesscript.py | 15 +++++++++--- .../localization_scripts/csvutil.py | 6 ++--- .../localization_scripts/diffscript.py | 24 +++++++++++++++---- 3 files changed, 34 insertions(+), 11 deletions(-) diff --git a/release_scripts/localization_scripts/allbundlesscript.py b/release_scripts/localization_scripts/allbundlesscript.py index b9efd7b0b3..3f062424b8 100644 --- a/release_scripts/localization_scripts/allbundlesscript.py +++ b/release_scripts/localization_scripts/allbundlesscript.py @@ -9,10 +9,12 @@ import sys from envutil import get_proj_dir from gitutil import get_property_file_entries, get_commit_id, get_git_root from csvutil import records_to_csv +from typing import Union +import re import argparse -def write_items_to_csv(repo_path: str, output_path: str, show_commit: bool): +def write_items_to_csv(repo_path: str, output_path: str, show_commit: bool, value_regex: Union[str, None]): """Determines the contents of '.properties-MERGED' files and writes to a csv file. Args: @@ -28,7 +30,8 @@ def write_items_to_csv(repo_path: str, output_path: str, show_commit: bool): rows = [row_header] for entry in get_property_file_entries(repo_path): - rows.append([entry.rel_path, entry.key, entry.value]) + if value_regex is None or re.match(value_regex, entry.value): + rows.append([entry.rel_path, entry.key, entry.value]) records_to_csv(output_path, rows) @@ -42,13 +45,19 @@ def main(): help='The path to the repo. If not specified, path of script is used.') parser.add_argument('-nc', '--no_commit', dest='no_commit', action='store_true', default=False, required=False, help="Suppresses adding commits to the generated csv header.") + parser.add_argument('-vr', '--value-regex', dest='value_regex', type=str, default=None, required=False, + help='Specify the regex for the property value where a regex match against the property value ' + 'will display the key value pair in csv output (i.e. \'[a-zA-Z]\' or \'\\S\' for removing ' + 'just whitespace items). If this option is not specified, all key value pairs will be ' + 'accepted.') args = parser.parse_args() repo_path = args.repo_path if args.repo_path is not None else get_git_root(get_proj_dir()) output_path = args.output_path show_commit = not args.no_commit + value_regex = args.value_regex - write_items_to_csv(repo_path, output_path, show_commit) + write_items_to_csv(repo_path, output_path, show_commit, value_regex) sys.exit(0) diff --git a/release_scripts/localization_scripts/csvutil.py b/release_scripts/localization_scripts/csvutil.py index aa382944e1..acfe8e911f 100644 --- a/release_scripts/localization_scripts/csvutil.py +++ b/release_scripts/localization_scripts/csvutil.py @@ -4,6 +4,7 @@ from typing import List, Iterable, Tuple import csv import os +import codecs def records_to_csv(output_path: str, rows: Iterable[List[str]]): @@ -20,9 +21,8 @@ def records_to_csv(output_path: str, rows: Iterable[List[str]]): if not os.path.exists(parent_dir): os.makedirs(parent_dir) - with open(output_path, 'w', encoding="utf-8", newline='') as csvfile: + with open(output_path, 'w', encoding="utf-8-sig", newline='') as csvfile: writer = csv.writer(csvfile) - for row in rows: writer.writerow(row) @@ -35,7 +35,7 @@ def csv_to_records(input_path: str, header_row: bool) -> Tuple[List[List[str]], header_row (bool): Whether or not there is a header row to be skipped. """ - with open(input_path, encoding='utf-8') as csv_file: + with open(input_path, encoding='utf-8-sig') as csv_file: csv_reader = csv.reader(csv_file, delimiter=',') header = None diff --git a/release_scripts/localization_scripts/diffscript.py b/release_scripts/localization_scripts/diffscript.py index b8dfb522c2..fd4de08093 100644 --- a/release_scripts/localization_scripts/diffscript.py +++ b/release_scripts/localization_scripts/diffscript.py @@ -11,11 +11,14 @@ from itemchange import ItemChange from csvutil import records_to_csv import argparse import pathlib +from typing import Union +import re from langpropsutil import get_commit_for_language, LANG_FILENAME -def write_diff_to_csv(repo_path: str, output_path: str, commit_1_id: str, commit_2_id: str, show_commits: bool): +def write_diff_to_csv(repo_path: str, output_path: str, commit_1_id: str, commit_2_id: str, show_commits: bool, + value_regex: Union[str, None]): """Determines the changes made in '.properties-MERGED' files from one commit to another commit. Args: @@ -23,7 +26,9 @@ def write_diff_to_csv(repo_path: str, output_path: str, commit_1_id: str, commit output_path (str): The output path for the csv file. commit_1_id (str): The initial commit for the diff. commit_2_id (str): The latest commit for the diff. - show_commits (bool): show commits in the header row. + show_commits (bool): Show commits in the header row. + value_regex (Union[str, None]): If non-none, only key value pairs where the value is a regex match with this + value will be included. """ row_header = ItemChange.get_headers() @@ -32,8 +37,11 @@ def write_diff_to_csv(repo_path: str, output_path: str, commit_1_id: str, commit rows = [row_header] - rows += map(lambda item_change: item_change.get_row(), - get_property_files_diff(repo_path, commit_1_id, commit_2_id)) + item_changes = get_property_files_diff(repo_path, commit_1_id, commit_2_id) + if value_regex is not None: + item_changes = filter(lambda item_change: re.match(value_regex, item_change.cur_val) is not None, item_changes) + + rows += map(lambda item_change: item_change.get_row(), item_changes) records_to_csv(output_path, rows) @@ -57,11 +65,17 @@ def main(): parser.add_argument('-l', '--language', dest='language', type=str, default='HEAD', required=False, help='Specify the language in order to determine the first commit to use (i.e. \'ja\' for ' 'Japanese. This flag overrides the first-commit flag.') + parser.add_argument('-vr', '--value-regex', dest='value_regex', type=str, default=None, required=False, + help='Specify the regex for the property value where a regex match against the property value ' + 'will display the key value pair in csv output (i.e. \'[a-zA-Z]\' or \'\\S\' for removing ' + 'just whitespace items). If this option is not specified, all key value pairs will be ' + 'accepted.') args = parser.parse_args() repo_path = args.repo_path if args.repo_path is not None else get_git_root(get_proj_dir()) output_path = args.output_path commit_1_id = args.commit_1_id + value_regex = args.value_regex if args.language is not None: commit_1_id = get_commit_for_language(args.language) @@ -74,7 +88,7 @@ def main(): commit_2_id = args.commit_2_id show_commits = not args.no_commits - write_diff_to_csv(repo_path, output_path, commit_1_id, commit_2_id, show_commits) + write_diff_to_csv(repo_path, output_path, commit_1_id, commit_2_id, show_commits, value_regex) sys.exit(0)