mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-06 21:00:22 +00:00
working on translation dictionary implementation
This commit is contained in:
parent
d84da21363
commit
48cdcdd602
@ -4,7 +4,7 @@ from typing import List, Iterable, Tuple
|
||||
import csv
|
||||
import os
|
||||
|
||||
from fileutil import OMITTED_ADDITION, get_filename_addition, DELETED_ADDITION, FOUND_ADDITION
|
||||
from fileutil import get_filename_addition
|
||||
from outputresult import OutputResult
|
||||
|
||||
|
||||
@ -54,6 +54,16 @@ def csv_to_records(input_path: str, header_row: bool) -> Tuple[List[List[str]],
|
||||
return results, header
|
||||
|
||||
|
||||
# For use with creating csv filenames for entries that have been omitted.
|
||||
OMITTED_ADDITION = '-omitted'
|
||||
|
||||
# For use with creating csv filenames for entries that have been deleted.
|
||||
DELETED_ADDITION = '-deleted'
|
||||
|
||||
# For translations where
|
||||
FOUND_ADDITION = '-found'
|
||||
|
||||
|
||||
def write_results_to_csv(results: OutputResult, output_path: str):
|
||||
"""
|
||||
Writes the result of processing to the output path as a csv file. If omitted values are present, for output_path of
|
||||
|
@ -5,12 +5,14 @@ As a consequence, it also requires git >= 1.7.0 and python >= 3.4.
|
||||
import sys
|
||||
from envutil import get_proj_dir
|
||||
from excelutil import write_results_to_xlsx
|
||||
from gitutil import get_property_files_diff, get_git_root, get_commit_id
|
||||
from gitutil import get_property_files_diff, get_git_root, get_commit_id, get_tree
|
||||
from itemchange import convert_to_output
|
||||
from csvutil import write_results_to_csv
|
||||
import argparse
|
||||
from langpropsutil import get_commit_for_language, LANG_FILENAME
|
||||
from outputtype import OutputType
|
||||
from languagedictutil import extract_translations
|
||||
from propsutil import get_lang_bundle_name, DEFAULT_PROPS_FILENAME
|
||||
|
||||
|
||||
def main():
|
||||
@ -41,7 +43,7 @@ def main():
|
||||
help='Specify the path to the properties file containing key value pairs of language mapped to '
|
||||
'the commit of when bundles for that language were most recently updated.')
|
||||
|
||||
parser.add_argument('-t', '--translation-dict', dest='translation_dict', type=bool, required=False, default=False,
|
||||
parser.add_argument('-td', '--translation-dict', dest='translation_dict', type=bool, required=False, default=False,
|
||||
help='If this flag is specified, a dictionary mapping original prop key values to translated '
|
||||
'values. If this flag is specified, it will ')
|
||||
|
||||
@ -55,7 +57,7 @@ def main():
|
||||
output_type = args.output_type
|
||||
show_translated_col = not args.no_translated_col
|
||||
language_updates_file = args.language_file
|
||||
|
||||
use_translation_dict = args.translation_dict
|
||||
lang = args.language
|
||||
if lang is not None:
|
||||
commit_1_id = get_commit_for_language(lang, language_updates_file)
|
||||
@ -66,6 +68,13 @@ def main():
|
||||
parser.print_help(sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
translation_dict = None
|
||||
if use_translation_dict and lang:
|
||||
translation_dict = extract_translations(
|
||||
file_iter=get_tree(repo_path, commit_1_id),
|
||||
orig_filename=DEFAULT_PROPS_FILENAME,
|
||||
translated_filename=get_lang_bundle_name(lang))
|
||||
|
||||
commit_2_id = args.commit_2_id
|
||||
show_commits = not args.no_commits
|
||||
|
||||
@ -73,6 +82,7 @@ def main():
|
||||
processing_result = convert_to_output(changes,
|
||||
commit1_id=get_commit_id(repo_path, commit_1_id) if show_commits else None,
|
||||
commit2_id=get_commit_id(repo_path, commit_2_id) if show_commits else None,
|
||||
translation_dict=translation_dict,
|
||||
show_translated_col=show_translated_col,
|
||||
separate_deleted=True)
|
||||
|
||||
|
@ -8,6 +8,7 @@ from typing import Union
|
||||
def get_proj_dir(path: Union[pathlib.PurePath, str] = __file__) -> str:
|
||||
"""
|
||||
Gets parent directory of this file (and subsequently, the project).
|
||||
|
||||
Args:
|
||||
path: Can be overridden to provide a different file. This will return the parent of that file in that instance.
|
||||
|
||||
|
@ -6,6 +6,7 @@ from pathlib import Path
|
||||
def get_path_pieces(orig_path: str) -> Tuple[str, Union[str, None], Union[str, None]]:
|
||||
"""Retrieves path pieces. This is a naive approach as it determines if a file is present based on the
|
||||
presence of an extension.
|
||||
|
||||
Args:
|
||||
orig_path: The original path to deconstruct.
|
||||
|
||||
@ -27,6 +28,7 @@ def get_path_pieces(orig_path: str) -> Tuple[str, Union[str, None], Union[str, N
|
||||
def get_joined_path(folder: str, file_name: str) -> str:
|
||||
"""
|
||||
Gets a joined folder and filename.
|
||||
|
||||
Args:
|
||||
folder: The folder.
|
||||
file_name: The filename.
|
||||
@ -53,16 +55,6 @@ def get_new_path(orig_path: str, new_filename: str) -> str:
|
||||
return str(Path(parent_dir) / Path(new_filename))
|
||||
|
||||
|
||||
# For use with creating csv filenames for entries that have been omitted.
|
||||
OMITTED_ADDITION = '-omitted'
|
||||
|
||||
# For use with creating csv filenames for entries that have been deleted.
|
||||
DELETED_ADDITION = '-deleted'
|
||||
|
||||
# For translations where
|
||||
FOUND_ADDITION = '-found'
|
||||
|
||||
|
||||
def get_filename_addition(orig_path: str, filename_addition: str) -> str:
|
||||
"""Gets filename with addition. So if item is '/path/name.ext' and the filename_addition is '-add', the new result
|
||||
would be '/path/name-add.ext'.
|
||||
|
@ -1,7 +1,7 @@
|
||||
"""Functions relating to using git and GitPython with an existing repo.
|
||||
"""
|
||||
|
||||
from git import Repo, Diff, Blob
|
||||
from git import Repo, Diff, Blob, Tree
|
||||
from typing import List, Union, Iterator, Tuple, Any
|
||||
from itemchange import ItemChange, get_changed
|
||||
from pathlib import Path
|
||||
@ -17,6 +17,7 @@ def get_git_root(child_path: str) -> str:
|
||||
"""
|
||||
Taken from https://stackoverflow.com/questions/22081209/find-the-root-of-the-git-repository-where-the-file-lives,
|
||||
this obtains the root path of the git repo in which this file exists.
|
||||
|
||||
Args:
|
||||
child_path: The path of a child within the repo.
|
||||
|
||||
@ -146,6 +147,21 @@ def list_paths(root_tree, path: Path = Path('.')) -> Iterator[Tuple[str, Blob]]:
|
||||
yield from list_paths(tree, path / tree.name)
|
||||
|
||||
|
||||
def get_tree(repo_path: str, commit_id: str) -> Tree:
|
||||
"""
|
||||
Retrieves the tree that can be walked for files and file content at the specified commit.
|
||||
|
||||
Args:
|
||||
repo_path: The path to the repo or a child directory of the repo.
|
||||
commit_id: The commit id.
|
||||
|
||||
Returns: The tree.
|
||||
"""
|
||||
repo = Repo(repo_path, search_parent_directories=True)
|
||||
commit = repo.commit(commit_id.strip())
|
||||
return commit.tree
|
||||
|
||||
|
||||
def get_property_file_entries(repo_path: str, at_commit: str = 'HEAD',
|
||||
property_file_extension: str = DEFAULT_PROPS_EXTENSION) -> Iterator[PropEntry]:
|
||||
"""
|
||||
@ -157,11 +173,8 @@ def get_property_file_entries(repo_path: str, at_commit: str = 'HEAD',
|
||||
property_file_extension: The extension to use for scanning for property files.
|
||||
|
||||
Returns: An iterator of PropEntry objects.
|
||||
|
||||
"""
|
||||
repo = Repo(repo_path, search_parent_directories=True)
|
||||
commit = repo.commit(at_commit.strip())
|
||||
for item in list_paths(commit.tree):
|
||||
for item in get_tree(repo_path, at_commit):
|
||||
path, blob = item
|
||||
if path.endswith(property_file_extension):
|
||||
for key, val in get_entry_dict(get_text(blob)).items():
|
||||
|
@ -58,6 +58,7 @@ class ItemChange:
|
||||
|
||||
def get_row(self, show_translated_col: bool) -> List[str]:
|
||||
"""Returns the list of values to be entered as a row in csv serialization.
|
||||
|
||||
Args:
|
||||
show_translated_col (bool): Whether or not the translated columns are showing; otherwise use default.
|
||||
|
||||
@ -92,6 +93,7 @@ def convert_to_output(items: Iterator[ItemChange],
|
||||
separate_deleted: bool = True) -> OutputResult:
|
||||
"""
|
||||
Converts PropEntry objects to an output result to be written to a tabular datasource.
|
||||
|
||||
Args:
|
||||
items: The PropEntry items.
|
||||
commit1_id: The first commit id to be shown in the header or None.
|
||||
|
@ -23,6 +23,7 @@ def _get_props_path(language_updates_file: Union[str, None]):
|
||||
def get_commit_for_language(language: str, language_updates_file: Union[str, None] = None) -> Union[str, None]:
|
||||
"""
|
||||
Retrieves the latest commit for a particular language.
|
||||
|
||||
Args:
|
||||
language: The language key.
|
||||
language_updates_file: The file containing the most recent updates. If not provided, the default file located
|
||||
@ -45,6 +46,7 @@ def get_commit_for_language(language: str, language_updates_file: Union[str, Non
|
||||
def set_commit_for_language(language: str, latest_commit: str, language_updates_file: Union[str, None] = None):
|
||||
"""
|
||||
Sets the most recent update for a language within the language updates file.
|
||||
|
||||
Args:
|
||||
language: The language key.
|
||||
latest_commit: The commit for how recent the language is.
|
||||
|
@ -19,6 +19,7 @@ class FoundValue:
|
||||
def __init__(self, common_path, original_file, translated_file, key, orig_val, translated_val):
|
||||
"""
|
||||
Constructor.
|
||||
|
||||
Args:
|
||||
common_path: The folder common to both files.
|
||||
original_file: The original file path.
|
||||
@ -38,15 +39,22 @@ class FoundValue:
|
||||
def extract_translations(file_iter: Iterator[Tuple[str, Blob]], orig_filename: str, translated_filename: str) \
|
||||
-> Dict[str, FoundValue]:
|
||||
"""
|
||||
Creates a translations dictionary based on comparing the values of keys in an original bundles file and a translated
|
||||
bundles file in the same directory. For instance, if /path/to/original.properties and
|
||||
/path/to/translated.properties both exist and in both files, a key-value pairing for keyA exists, the dictionary
|
||||
will contain an entry mapping the original value for keyA to the translated value and other metadata for that
|
||||
key.
|
||||
|
||||
Args:
|
||||
file_iter:
|
||||
orig_filename:
|
||||
translated_filename:
|
||||
file_iter: An iterator of tuples containing the path and the content of the file.
|
||||
orig_filename: The original file name (i.e. 'bundle.properties-MERGED').
|
||||
translated_filename: The translated file name (i.e. 'Bundle_ja.properties').
|
||||
|
||||
Returns:
|
||||
Returns: A dictionary mapping original values to translated values.
|
||||
|
||||
"""
|
||||
|
||||
# Create a dictionary mapping parent path to the file content for both original and translated files
|
||||
original_files: Dict[str, Tuple[str, Blob]] = dict()
|
||||
translated_files: Dict[str, Tuple[str, Blob]] = dict()
|
||||
|
||||
@ -57,8 +65,8 @@ def extract_translations(file_iter: Iterator[Tuple[str, Blob]], orig_filename: s
|
||||
elif file_name.strip().lower() == translated_filename.strip().lower():
|
||||
translated_files[file_name] = (parent_dir, content)
|
||||
|
||||
# determine original and translated files with common parent folders and find common keys
|
||||
to_ret: Dict[str, FoundValue] = dict()
|
||||
|
||||
for common_folder, ((original_path, original_blob), (translated_path, translated_blob))\
|
||||
in common_entries(original_files, translated_files):
|
||||
orig_dict = sanitize_prop_dict_keys(get_entry_dict(original_blob))
|
||||
@ -77,6 +85,15 @@ def extract_translations(file_iter: Iterator[Tuple[str, Blob]], orig_filename: s
|
||||
|
||||
|
||||
def sanitize_prop_dict_keys(dct: Dict[str, str]) -> Dict[str, str]:
|
||||
"""
|
||||
Sanitizes all the keys in a dictionary (i.e. strips white space and makes lower case).
|
||||
|
||||
Args:
|
||||
dct: The dictionary.
|
||||
|
||||
Returns: The dictionary with sanitized keys.
|
||||
|
||||
"""
|
||||
return {k.strip().lower(): v for k, v in dct.items()}
|
||||
|
||||
|
||||
@ -89,6 +106,7 @@ def common_entries(*dcts: Dict[K, V]) -> Iterator[Tuple[K, Tuple[V, ...]]]:
|
||||
Taken from https://stackoverflow.com/questions/16458340/python-equivalent-of-zip-for-dictionaries,
|
||||
creates creates an iterator of tuples where the left value is the common key value and the right hand value is
|
||||
a tuple of all the matching values in order that the dictionaries were ordered in parameters.
|
||||
|
||||
Args:
|
||||
*dcts: The dictionaries in order to provide common key/values.
|
||||
|
||||
|
@ -25,6 +25,7 @@ class OutputResult:
|
||||
style: Union[List[ColumnStyle], None] = None, freeze_first_row: bool = True):
|
||||
"""
|
||||
Constructs a ProcessingResult.
|
||||
|
||||
Args:
|
||||
results: Items to be written as results. Data will be written such that the item at row,cell will be
|
||||
located within result at results[row][col].
|
||||
|
@ -40,6 +40,7 @@ def convert_to_output(items: Iterator[PropEntry], commit_id: Union[str, None] =
|
||||
show_translated_col: bool = True, value_regex: Union[str, None] = None) -> OutputResult:
|
||||
"""
|
||||
Converts PropEntry objects to an output result to be written to a tabular datasource.
|
||||
|
||||
Args:
|
||||
items: The PropEntry items.
|
||||
commit_id: The commit id to be shown in the header or None.
|
||||
|
@ -5,9 +5,11 @@ from jproperties import Properties
|
||||
import os
|
||||
|
||||
# The default extension for property files in autopsy repo
|
||||
|
||||
DEFAULT_PROPS_EXTENSION = 'properties-MERGED'
|
||||
|
||||
# The default filename for property files in autopsy repo
|
||||
DEFAULT_PROPS_FILENAME = 'Bundle.{ext}'.format(ext=DEFAULT_PROPS_EXTENSION)
|
||||
|
||||
|
||||
def get_lang_bundle_name(language: str) -> str:
|
||||
"""
|
||||
@ -43,6 +45,7 @@ def get_entry_dict_from_path(props_path: str) -> Union[Dict[str, str], None]:
|
||||
"""
|
||||
Retrieves a dictionary mapping the properties represented in the string or None if no properties file can be found
|
||||
at that path.
|
||||
|
||||
Args:
|
||||
props_path: The path to the properties file.
|
||||
|
||||
|
@ -32,6 +32,7 @@ def create_output_result(row_header: List[str], results: List[List[str]],
|
||||
|
||||
"""
|
||||
Creates OutputResult from components.
|
||||
|
||||
Args:
|
||||
row_header: The row header.
|
||||
results: The results.
|
||||
@ -47,4 +48,9 @@ def create_output_result(row_header: List[str], results: List[List[str]],
|
||||
deleted_result = [row_header] + deleted if deleted else None
|
||||
found_result = [row_header] + found_translation if found_translation else None
|
||||
|
||||
return OutputResult([row_header] + results, omitted_result, deleted_result, style)
|
||||
return OutputResult(
|
||||
results=[row_header] + results,
|
||||
omitted=omitted_result,
|
||||
deleted=deleted_result,
|
||||
found=found_result,
|
||||
style=style)
|
||||
|
@ -195,6 +195,7 @@ class DataRows:
|
||||
deleted_results: Union[List[List[str]], None] = None):
|
||||
"""
|
||||
Creates a DataRows object.
|
||||
|
||||
Args:
|
||||
results: The 2d list of strings representing cells.
|
||||
header: The header row if present.
|
||||
@ -208,6 +209,7 @@ class DataRows:
|
||||
def get_csv_rows(input_path: str, has_header: bool) -> DataRows:
|
||||
"""
|
||||
Gets rows of a csv file in a DataRows format.
|
||||
|
||||
Args:
|
||||
input_path: The input path of the file.
|
||||
has_header: Whether or not it has a header.
|
||||
@ -222,6 +224,7 @@ def get_csv_rows(input_path: str, has_header: bool) -> DataRows:
|
||||
def get_xlsx_rows(input_path: str, has_header: bool, results_sheet: str, deleted_sheet: str) -> DataRows:
|
||||
"""
|
||||
Gets worksheets of an excel workbook in a DataRows format.
|
||||
|
||||
Args:
|
||||
input_path: The input path of the file.
|
||||
has_header: Whether or not is has a header.
|
||||
@ -250,6 +253,7 @@ def get_prop_entries_from_data(datarows: DataRows, path_idx: int, key_idx: int,
|
||||
path_converter: Callable) -> List[PropEntry]:
|
||||
"""
|
||||
Converts a DataRows object into PropEntry objects.
|
||||
|
||||
Args:
|
||||
datarows: The DataRows object.
|
||||
path_idx: The index of the column containing the path.
|
||||
|
Loading…
x
Reference in New Issue
Block a user