working on translation dictionary implementation

2025-07-06 21:00:22 +00:00 · 2021-01-05 10:18:59 -05:00 · 2021-01-05 10:18:59 -05:00 · 48cdcdd602
commit 48cdcdd602
parent d84da21363
13 changed files with 89 additions and 26 deletions
--- a/release_scripts/localization_scripts/csvutil.py
+++ b/release_scripts/localization_scripts/csvutil.py
@ -4,7 +4,7 @@ from typing import List, Iterable, Tuple
 import csv
 import os
-from fileutil import OMITTED_ADDITION, get_filename_addition, DELETED_ADDITION, FOUND_ADDITION
+from fileutil import get_filename_addition
 from outputresult import OutputResult
@ -54,6 +54,16 @@ def csv_to_records(input_path: str, header_row: bool) -> Tuple[List[List[str]],
        return results, header
 # For use with creating csv filenames for entries that have been omitted.
 OMITTED_ADDITION = '-omitted'
 # For use with creating csv filenames for entries that have been deleted.
 DELETED_ADDITION = '-deleted'
 # For translations where
 FOUND_ADDITION = '-found'
 def write_results_to_csv(results: OutputResult, output_path: str):
    """
    Writes the result of processing to the output path as a csv file.  If omitted values are present, for output_path of
--- a/release_scripts/localization_scripts/diffscript.py
+++ b/release_scripts/localization_scripts/diffscript.py
@ -5,12 +5,14 @@ As a consequence, it also requires git >= 1.7.0 and python >= 3.4.
 import sys
 from envutil import get_proj_dir
 from excelutil import write_results_to_xlsx
-from gitutil import get_property_files_diff, get_git_root, get_commit_id
+from gitutil import get_property_files_diff, get_git_root, get_commit_id, get_tree
 from itemchange import convert_to_output
 from csvutil import write_results_to_csv
 import argparse
 from langpropsutil import get_commit_for_language, LANG_FILENAME
 from outputtype import OutputType
 from languagedictutil import extract_translations
 from propsutil import get_lang_bundle_name, DEFAULT_PROPS_FILENAME
 def main():
@ -41,7 +43,7 @@ def main():
                        help='Specify the path to the properties file containing key value pairs of language mapped to '
                             'the commit of when bundles for that language were most recently updated.')
-    parser.add_argument('-t', '--translation-dict', dest='translation_dict', type=bool, required=False, default=False,
+    parser.add_argument('-td', '--translation-dict', dest='translation_dict', type=bool, required=False, default=False,
                        help='If this flag is specified, a dictionary mapping original prop key values to translated '
                             'values.  If this flag is specified, it will ')
@ -55,7 +57,7 @@ def main():
    output_type = args.output_type
    show_translated_col = not args.no_translated_col
    language_updates_file = args.language_file
-
+    use_translation_dict = args.translation_dict
    lang = args.language
    if lang is not None:
        commit_1_id = get_commit_for_language(lang, language_updates_file)
@ -66,6 +68,13 @@ def main():
        parser.print_help(sys.stderr)
        sys.exit(1)
    translation_dict = None
    if use_translation_dict and lang:
        translation_dict = extract_translations(
            file_iter=get_tree(repo_path, commit_1_id),
            orig_filename=DEFAULT_PROPS_FILENAME,
            translated_filename=get_lang_bundle_name(lang))
    commit_2_id = args.commit_2_id
    show_commits = not args.no_commits
@ -73,6 +82,7 @@ def main():
    processing_result = convert_to_output(changes,
                                          commit1_id=get_commit_id(repo_path, commit_1_id) if show_commits else None,
                                          commit2_id=get_commit_id(repo_path, commit_2_id) if show_commits else None,
                                          translation_dict=translation_dict,
                                          show_translated_col=show_translated_col,
                                          separate_deleted=True)
--- a/release_scripts/localization_scripts/envutil.py
+++ b/release_scripts/localization_scripts/envutil.py
@ -8,6 +8,7 @@ from typing import Union
 def get_proj_dir(path: Union[pathlib.PurePath, str] = __file__) -> str:
    """
    Gets parent directory of this file (and subsequently, the project).
    Args:
        path: Can be overridden to provide a different file.  This will return the parent of that file in that instance.
--- a/release_scripts/localization_scripts/fileutil.py
+++ b/release_scripts/localization_scripts/fileutil.py
@ -6,6 +6,7 @@ from pathlib import Path
 def get_path_pieces(orig_path: str) -> Tuple[str, Union[str, None], Union[str, None]]:
    """Retrieves path pieces.  This is a naive approach as it determines if a file is present based on the
    presence of an extension.
    Args:
        orig_path:  The original path to deconstruct.
@ -27,6 +28,7 @@ def get_path_pieces(orig_path: str) -> Tuple[str, Union[str, None], Union[str, N
 def get_joined_path(folder: str, file_name: str) -> str:
    """
    Gets a joined folder and filename.
    Args:
        folder: The folder.
        file_name: The filename.
@ -53,16 +55,6 @@ def get_new_path(orig_path: str, new_filename: str) -> str:
    return str(Path(parent_dir) / Path(new_filename))
 # For use with creating csv filenames for entries that have been omitted.
 OMITTED_ADDITION = '-omitted'
 # For use with creating csv filenames for entries that have been deleted.
 DELETED_ADDITION = '-deleted'
 # For translations where
 FOUND_ADDITION = '-found'
 def get_filename_addition(orig_path: str, filename_addition: str) -> str:
    """Gets filename with addition.  So if item is '/path/name.ext' and the filename_addition is '-add', the new result
    would be '/path/name-add.ext'.
--- a/release_scripts/localization_scripts/gitutil.py
+++ b/release_scripts/localization_scripts/gitutil.py
@ -1,7 +1,7 @@
 """Functions relating to using git and GitPython with an existing repo.
 """
-from git import Repo, Diff, Blob
+from git import Repo, Diff, Blob, Tree
 from typing import List, Union, Iterator, Tuple, Any
 from itemchange import ItemChange, get_changed
 from pathlib import Path
@ -17,6 +17,7 @@ def get_git_root(child_path: str) -> str:
    """
    Taken from https://stackoverflow.com/questions/22081209/find-the-root-of-the-git-repository-where-the-file-lives,
    this obtains the root path of the git repo in which this file exists.
    Args:
        child_path:  The path of a child within the repo.
@ -146,6 +147,21 @@ def list_paths(root_tree, path: Path = Path('.')) -> Iterator[Tuple[str, Blob]]:
        yield from list_paths(tree, path / tree.name)
 def get_tree(repo_path: str, commit_id: str) -> Tree:
    """
    Retrieves the tree that can be walked for files and file content at the specified commit.
    Args:
        repo_path: The path to the repo or a child directory of the repo.
        commit_id: The commit id.
    Returns: The tree.
    """
    repo = Repo(repo_path, search_parent_directories=True)
    commit = repo.commit(commit_id.strip())
    return commit.tree
 def get_property_file_entries(repo_path: str, at_commit: str = 'HEAD',
                              property_file_extension: str = DEFAULT_PROPS_EXTENSION) -> Iterator[PropEntry]:
    """
@ -157,11 +173,8 @@ def get_property_file_entries(repo_path: str, at_commit: str = 'HEAD',
        property_file_extension: The extension to use for scanning for property files.
    Returns: An iterator of PropEntry objects.
    """
-    repo = Repo(repo_path, search_parent_directories=True)
+    for item in get_tree(repo_path, at_commit):
    commit = repo.commit(at_commit.strip())
    for item in list_paths(commit.tree):
        path, blob = item
        if path.endswith(property_file_extension):
            for key, val in get_entry_dict(get_text(blob)).items():
--- a/release_scripts/localization_scripts/itemchange.py
+++ b/release_scripts/localization_scripts/itemchange.py
@ -58,6 +58,7 @@ class ItemChange:
    def get_row(self, show_translated_col: bool) -> List[str]:
        """Returns the list of values to be entered as a row in csv serialization.
        Args:
            show_translated_col (bool): Whether or not the translated columns are showing; otherwise use default.
@ -92,6 +93,7 @@ def convert_to_output(items: Iterator[ItemChange],
                      separate_deleted: bool = True) -> OutputResult:
    """
    Converts PropEntry objects to an output result to be written to a tabular datasource.
    Args:
        items: The PropEntry items.
        commit1_id: The first commit id to be shown in the header or None.
--- a/release_scripts/localization_scripts/langpropsutil.py
+++ b/release_scripts/localization_scripts/langpropsutil.py
@ -23,6 +23,7 @@ def _get_props_path(language_updates_file: Union[str, None]):
 def get_commit_for_language(language: str, language_updates_file: Union[str, None] = None) -> Union[str, None]:
    """
    Retrieves the latest commit for a particular language.
    Args:
        language: The language key.
        language_updates_file: The file containing the most recent updates.  If not provided, the default file located
@ -45,6 +46,7 @@ def get_commit_for_language(language: str, language_updates_file: Union[str, Non
 def set_commit_for_language(language: str, latest_commit: str, language_updates_file: Union[str, None] = None):
    """
    Sets the most recent update for a language within the language updates file.
    Args:
        language: The language key.
        latest_commit: The commit for how recent the language is.
--- a/release_scripts/localization_scripts/languagedictutil.py
+++ b/release_scripts/localization_scripts/languagedictutil.py
@ -19,6 +19,7 @@ class FoundValue:
    def __init__(self, common_path, original_file, translated_file, key, orig_val, translated_val):
        """
        Constructor.
        Args:
            common_path: The folder common to both files.
            original_file: The original file path.
@ -38,15 +39,22 @@ class FoundValue:
 def extract_translations(file_iter: Iterator[Tuple[str, Blob]], orig_filename: str, translated_filename: str) \
        -> Dict[str, FoundValue]:
    """
    Creates a translations dictionary based on comparing the values of keys in an original bundles file and a translated
    bundles file in the same directory.  For instance, if /path/to/original.properties and
    /path/to/translated.properties both exist and in both files, a key-value pairing for keyA exists, the dictionary
    will contain an entry mapping the original value for keyA to the translated value and other metadata for that
    key.
    Args:
-        file_iter:
+        file_iter: An iterator of tuples containing the path and the content of the file.
-        orig_filename:
+        orig_filename: The original file name (i.e. 'bundle.properties-MERGED').
-        translated_filename:
+        translated_filename: The translated file name (i.e. 'Bundle_ja.properties').
-    Returns:
+    Returns: A dictionary mapping original values to translated values.
    """
    # Create a dictionary mapping parent path to the file content for both original and translated files
    original_files: Dict[str, Tuple[str, Blob]] = dict()
    translated_files: Dict[str, Tuple[str, Blob]] = dict()
@ -57,8 +65,8 @@ def extract_translations(file_iter: Iterator[Tuple[str, Blob]], orig_filename: s
        elif file_name.strip().lower() == translated_filename.strip().lower():
            translated_files[file_name] = (parent_dir, content)
    # determine original and translated files with common parent folders and find common keys
    to_ret: Dict[str, FoundValue] = dict()
    for common_folder, ((original_path, original_blob), (translated_path, translated_blob))\
            in common_entries(original_files, translated_files):
        orig_dict = sanitize_prop_dict_keys(get_entry_dict(original_blob))
@ -77,6 +85,15 @@ def extract_translations(file_iter: Iterator[Tuple[str, Blob]], orig_filename: s
 def sanitize_prop_dict_keys(dct: Dict[str, str]) -> Dict[str, str]:
    """
    Sanitizes all the keys in a dictionary (i.e. strips white space and makes lower case).
    Args:
        dct: The dictionary.
    Returns: The dictionary with sanitized keys.
    """
    return {k.strip().lower(): v for k, v in dct.items()}
@ -89,6 +106,7 @@ def common_entries(*dcts: Dict[K, V]) -> Iterator[Tuple[K, Tuple[V, ...]]]:
    Taken from https://stackoverflow.com/questions/16458340/python-equivalent-of-zip-for-dictionaries,
    creates creates an iterator of tuples where the left value is the common key value and the right hand value is
    a tuple of all the matching values in order that the dictionaries were ordered in parameters.
    Args:
        *dcts: The dictionaries in order to provide common key/values.
--- a/release_scripts/localization_scripts/outputresult.py
+++ b/release_scripts/localization_scripts/outputresult.py
@ -25,6 +25,7 @@ class OutputResult:
                 style: Union[List[ColumnStyle], None] = None, freeze_first_row: bool = True):
        """
        Constructs a ProcessingResult.
        Args:
            results: Items to be written as results.  Data will be written such that the item at row,cell will be
            located within result at results[row][col].
--- a/release_scripts/localization_scripts/propentry.py
+++ b/release_scripts/localization_scripts/propentry.py
@ -40,6 +40,7 @@ def convert_to_output(items: Iterator[PropEntry], commit_id: Union[str, None] =
                      show_translated_col: bool = True, value_regex: Union[str, None] = None) -> OutputResult:
    """
    Converts PropEntry objects to an output result to be written to a tabular datasource.
    Args:
        items: The PropEntry items.
        commit_id: The commit id to be shown in the header or None.
--- a/release_scripts/localization_scripts/propsutil.py
+++ b/release_scripts/localization_scripts/propsutil.py
@ -5,9 +5,11 @@ from jproperties import Properties
 import os
 # The default extension for property files in autopsy repo
 DEFAULT_PROPS_EXTENSION = 'properties-MERGED'
 # The default filename for property files in autopsy repo
 DEFAULT_PROPS_FILENAME = 'Bundle.{ext}'.format(ext=DEFAULT_PROPS_EXTENSION)
 def get_lang_bundle_name(language: str) -> str:
    """
@ -43,6 +45,7 @@ def get_entry_dict_from_path(props_path: str) -> Union[Dict[str, str], None]:
    """
    Retrieves a dictionary mapping the properties represented in the string or None if no properties file can be found
    at that path.
    Args:
        props_path: The path to the properties file.
--- a/release_scripts/localization_scripts/tabularutil.py
+++ b/release_scripts/localization_scripts/tabularutil.py
@ -32,6 +32,7 @@ def create_output_result(row_header: List[str], results: List[List[str]],
    """
    Creates OutputResult from components.
    Args:
        row_header: The row header.
        results: The results.
@ -47,4 +48,9 @@ def create_output_result(row_header: List[str], results: List[List[str]],
    deleted_result = [row_header] + deleted if deleted else None
    found_result = [row_header] + found_translation if found_translation else None
-    return OutputResult([row_header] + results, omitted_result, deleted_result, style)
+    return OutputResult(
        results=[row_header] + results,
        omitted=omitted_result,
        deleted=deleted_result,
        found=found_result,
        style=style)
--- a/release_scripts/localization_scripts/updatepropsscript.py
+++ b/release_scripts/localization_scripts/updatepropsscript.py
@ -195,6 +195,7 @@ class DataRows:
                 deleted_results: Union[List[List[str]], None] = None):
        """
        Creates a DataRows object.
        Args:
            results: The 2d list of strings representing cells.
            header: The header row if present.
@ -208,6 +209,7 @@ class DataRows:
 def get_csv_rows(input_path: str, has_header: bool) -> DataRows:
    """
    Gets rows of a csv file in a DataRows format.
    Args:
        input_path: The input path of the file.
        has_header: Whether or not it has a header.
@ -222,6 +224,7 @@ def get_csv_rows(input_path: str, has_header: bool) -> DataRows:
 def get_xlsx_rows(input_path: str, has_header: bool, results_sheet: str, deleted_sheet: str) -> DataRows:
    """
    Gets worksheets of an excel workbook in a DataRows format.
    Args:
        input_path: The input path of the file.
        has_header: Whether or not is has a header.
@ -250,6 +253,7 @@ def get_prop_entries_from_data(datarows: DataRows, path_idx: int, key_idx: int,
                               path_converter: Callable) -> List[PropEntry]:
    """
    Converts a DataRows object into PropEntry objects.
    Args:
        datarows: The DataRows object.
        path_idx: The index of the column containing the path.