"""This script finds all '.properties-MERGED' files and writes relative path, key, and value to a CSV file. This script requires the python libraries: jproperties, pyexcel-xlsx, xlsxwriter and pyexcel. It also requires Python 3.x. """ from typing import List, Dict, Tuple, Callable, Iterator, Union import sys import os from envutil import get_proj_dir from excelutil import excel_to_records from fileutil import get_new_path, get_path_pieces from gitutil import get_git_root from langpropsutil import set_commit_for_language from propsutil import set_entry_dict, get_entry_dict_from_path, get_lang_bundle_name from csvutil import csv_to_records from propentry import PropEntry import argparse def write_prop_entries(entries: Iterator[PropEntry], repo_path: str): """Writes property entry items to their expected relative path within the repo path. Previously existing files will be overwritten and prop entries marked as should_be_deleted will not be included. Args: entries (List[PropEntry]): the prop entry items to write to disk. repo_path (str): The path to the git repo. """ items_by_file = get_by_file(entries) for rel_path, (entries, ignored) in items_by_file.items(): abs_path = os.path.join(repo_path, rel_path) set_entry_dict(entries, abs_path) def update_prop_entries(entries: Iterator[PropEntry], repo_path: str): """Updates property entry items to their expected relative path within the repo path. The union of entries provided and any previously existing entries will be created. Keys marked for deletion will be removed from the generated property files. Args: entries (List[PropEntry]): the prop entry items to write to disk. repo_path (str): The path to the git repo. """ items_by_file = get_by_file(entries) for rel_path, (entries, to_delete) in items_by_file.items(): abs_path = os.path.join(repo_path, rel_path) changed = False prop_items = get_entry_dict_from_path(abs_path) if prop_items is None: prop_items = {} for key_to_delete in to_delete: if key_to_delete in prop_items: changed = True del prop_items[key_to_delete] for key, val in entries.items(): changed = True prop_items[key] = val # only write to disk if a change was made if changed: set_entry_dict(prop_items, abs_path) def get_by_file(entries: Iterator[PropEntry]) -> Dict[str, Tuple[Dict[str, str], List[str]]]: """Sorts a prop entry list by file. The return type is a dictionary mapping the file path to a tuple containing the key-value pairs to be updated and a list of keys to be deleted. Args: entries (List[PropEntry]): The entries to be sorted. Returns: Dict[str, Tuple[Dict[str,str], List[str]]]: A dictionary mapping the file path to a tuple containing the key-value pairs to be updated and a list of keys to be deleted. """ to_ret = {} for prop_entry in entries: rel_path = prop_entry.rel_path key = prop_entry.key value = prop_entry.value if rel_path not in to_ret: to_ret[rel_path] = ({}, []) if prop_entry.should_delete: to_ret[rel_path][1].append(prop_entry.key) else: to_ret[rel_path][0][key] = value return to_ret def idx_bounded(num: int, max_exclusive: int) -> bool: return 0 <= num < max_exclusive def get_prop_entry(row: List[str], path_idx: int = 0, key_idx: int = 1, value_idx: int = 3, should_delete_converter: Callable[[List[str]], bool] = None, path_converter: Callable[[str], str] = None) -> PropEntry: """Parses a PropEntry object from a row of values in a csv. Args: row (List[str]): The csv file row to parse. path_idx (int, optional): The column index for the relative path of the properties file. Defaults to 0. key_idx (int, optional): The column index for the properties key. Defaults to 1. value_idx (int, optional): The column index for the properties value. Defaults to 2. should_delete_converter (Callable[[List[str]], bool], optional): If not None, this determines if the key should be deleted from the row values. Defaults to None. path_converter (Callable[[str], str], optional): If not None, this determines the relative path to use in the created PropEntry given the original relative path. Defaults to None. Returns: PropEntry: The generated prop entry object. """ path = row[path_idx] if idx_bounded(path_idx, len(row)) else None if path_converter is not None: path = path_converter(path) key = row[key_idx] if idx_bounded(key_idx, len(row)) else None value = row[value_idx] if idx_bounded(value_idx, len(row)) else None should_delete = False if should_delete_converter is None else should_delete_converter(row) # delete this key if no value provided if not value or not value.strip(): should_delete = True return PropEntry(path, key, value, should_delete) def get_prop_entries(rows: List[List[str]], path_idx: int = 0, key_idx: int = 1, value_idx: int = 2, should_delete_converter: Callable[[List[str]], bool] = None, path_converter: Callable[[str], str] = None) -> Iterator[PropEntry]: """Parses PropEntry objects from rows of values in a csv. Any items that have an empty string value will be ignored. Args: rows (List[List[str]]): The csv file rows to parse. path_idx (int, optional): The column index for the relative path of the properties file. Defaults to 0. key_idx (int, optional): The column index for the properties key. Defaults to 1. value_idx (int, optional): The column index for the properties value. Defaults to 2. should_delete_converter (Callable[[List[str]], bool], optional): If not None, this determines if the key should be deleted from the row values. Defaults to None. path_converter (Callable[[str], str], optional): If not None, this determines the relative path to use in the created PropEntry given the original relative path. Defaults to None. Returns: List[PropEntry]: The generated prop entry objects. """ prop_entries = map(lambda row: get_prop_entry(row, path_idx, key_idx, value_idx, should_delete_converter, path_converter), rows) # ensure a value is present return filter(lambda prop_entry: prop_entry and prop_entry.key.strip() and prop_entry.rel_path.strip(), prop_entries) def get_should_deleted(row_items: List[str], requested_idx: int) -> bool: """If there is a value at row_items[requested_idx] and that value starts with 'DELET', then this will return true. Args: row_items (List[str]): The row items. requested_idx (int): The index specifying if the property should be deleted. Returns: bool: True if the row specifies it should be deleted. """ if idx_bounded(requested_idx, len(row_items)) and row_items[requested_idx].strip().upper().startswith('DELET'): return True else: return False class DataRows: """ Defines pieces of an intermediate parsed result from a data source including the header row (if present), results as a 2d list, and deleted results as a 2d list. """ header: Union[List[str], None] results: List[List[str]] deleted_results: Union[List[List[str]], None] def __init__(self, results: List[List[str]], header: Union[List[str], None] = None, deleted_results: Union[List[List[str]], None] = None): """ Creates a DataRows object. Args: results: The 2d list of strings representing cells. header: The header row if present. deleted_results: The 2d list of strings representing cells or None. """ self.header = header self.results = results self.deleted_results = deleted_results def get_csv_rows(input_path: str, has_header: bool) -> DataRows: """ Gets rows of a csv file in a DataRows format. Args: input_path: The input path of the file. has_header: Whether or not it has a header. Returns: An intermediate result DataRows object for further parsing. """ all_items, header = csv_to_records(input_path, has_header) return DataRows(header=header, results=all_items) def get_xlsx_rows(input_path: str, has_header: bool, results_sheet: str, deleted_sheet: str) -> DataRows: """ Gets worksheets of an excel workbook in a DataRows format. Args: input_path: The input path of the file. has_header: Whether or not is has a header. results_sheet: The name of the results sheet. deleted_sheet: The name of the sheet containing deleted items. Returns: An intermediate result DataRows object for further parsing. """ workbook = excel_to_records(input_path) results_items = workbook[results_sheet] header = None if has_header and len(results_items) > 0: header = results_items[0] results_items = results_items[1:len(results_items)] deleted_items = workbook[deleted_sheet] if deleted_sheet and deleted_sheet in workbook else None if has_header and deleted_items and len(deleted_items) > 0: deleted_items = deleted_items[1:len(deleted_items)] return DataRows(header=header, results=results_items, deleted_results=deleted_items) def get_prop_entries_from_data(datarows: DataRows, path_idx: int, key_idx: int, value_idx: int, should_delete_converter: Union[Callable[[List[str]], bool], None], path_converter: Callable) -> List[PropEntry]: """ Converts a DataRows object into PropEntry objects. Args: datarows: The DataRows object. path_idx: The index of the column containing the path. key_idx: The index of the column containing the key. value_idx: The index of the column containing the value. should_delete_converter: Given a list of strings representing a row, returns true if the entry should be deleted. path_converter: Converts the path to the proper format. Returns: A list of PropEntry items. """ prop_entries = list(get_prop_entries(datarows.results, path_idx, key_idx, value_idx, should_delete_converter, path_converter)) if datarows.deleted_results and len(datarows.deleted_results) > 0: prop_entries += list(get_prop_entries(datarows.deleted_results, path_idx, key_idx, value_idx, lambda row: True, path_converter)) return prop_entries def main(): # noinspection PyTypeChecker parser = argparse.ArgumentParser(description='Updates properties files in the autopsy git repo.', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument(dest='file', type=str, help='The path to the file (ending in either .xlsx or .csv). ' 'The default format for the file has columns of relative path, ' 'properties file key, properties file value, translated value, ' 'first commit, and commit id for how recent these updates ' 'are. If the key should be deleted, the deletion row should be ' '\'DELETION.\' A header row is expected by default and the ' 'commit id, if specified, should only be in the first row. The' ' input path should be specified as a relative path with the ' 'dot slash notation (i.e. `./inputpath.csv`) or an absolute ' 'path.') parser.add_argument('-r', '--repo', dest='repo_path', type=str, required=False, help='The path to the repo. If not specified, parent repo of path of script is used.') parser.add_argument('-p', '--path-idx', dest='path_idx', action='store', type=int, default=0, required=False, help='The column index in the csv file providing the relative path to the properties file.') parser.add_argument('-k', '--key-idx', dest='key_idx', action='store', type=int, default=1, required=False, help='The column index in the csv file providing the key within the properties file.') parser.add_argument('-v', '--value-idx', dest='value_idx', action='store', type=int, default=3, required=False, help='The column index in the csv file providing the value within the properties file.') parser.add_argument('-c', '--commit-idx', dest='latest_commit_idx', action='store', type=int, default=5, required=False, help='The column index in the csv file providing the commit for which this ' 'update applies. The commit should be located in the header row. ') parser.add_argument('-rs', '--results-sheet', dest='results_sheet', action='store', type=str, default='results', required=False, help='In an excel workbook, the sheet that indicates ' 'results items. This is only used for xlsx files.') parser.add_argument('-ds', '--deleted-sheet', dest='deleted_sheet', action='store', type=str, default='deleted', required=False, help='In an excel workbook, the sheet that indicates ' 'deleted items. This is only used for xlsx files.') parser.add_argument('-di', '--should-delete-idx', dest='should_delete_idx', action='store', type=int, default=-1, required=False, help='The column index in the csv file providing whether or not the file ' 'should be deleted. Any non-blank content will be treated as True.') parser.add_argument('-z', '--has-no-header', dest='has_no_header', action='store_true', default=False, required=False, help='Specify whether or not there is a header within the csv file.') parser.add_argument('-f', '--file-rename', dest='file_rename', action='store', type=str, default=None, required=False, help='If specified, the properties file will be renamed to the argument' ' preserving the specified relative path.') parser.add_argument('-o', '--should-overwrite', dest='should_overwrite', action='store_true', default=False, required=False, help="Whether or not to overwrite the previously existing properties files" " ignoring previously existing values.") parser.add_argument('-l', '--language', dest='language', type=str, default=None, required=False, help='Specify the language in order to update the last updated properties file and rename ' 'files within directories. This flag overrides the file-rename flag.') parser.add_argument('-lf', '--language-updates-file', dest='language_file', type=str, default=None, required=False, help='Specify the path to the properties file containing key value pairs of language mapped to ' 'the commit of when bundles for that language were most recently updated.') args = parser.parse_args() repo_path = args.repo_path if args.repo_path is not None else get_git_root(get_proj_dir()) input_path = args.file path_idx = args.path_idx key_idx = args.key_idx value_idx = args.value_idx has_header = not args.has_no_header overwrite = args.should_overwrite deleted_sheet = args.deleted_sheet results_sheet = args.results_sheet # means of determining if a key should be deleted from a file if args.should_delete_idx < 0: should_delete_converter = None else: def should_delete_converter(row_items: List[str]): return get_should_deleted(row_items, args.should_delete_idx) # provides the means of renaming the bundle file if args.language is not None: def path_converter(orig_path: str): return get_new_path(orig_path, get_lang_bundle_name(args.language)) elif args.file_rename is not None: def path_converter(orig_path: str): return get_new_path(orig_path, args.file_rename) else: path_converter = None # retrieve records from file ext = get_path_pieces(input_path)[2] if ext == 'xlsx': data_rows = get_xlsx_rows(input_path, has_header, results_sheet, deleted_sheet) elif ext == 'csv': data_rows = get_csv_rows(input_path, has_header) else: raise ValueError('Expected either a csv file or xlsx file for input.') # convert to PropEntry objects prop_entries = get_prop_entries_from_data(data_rows, path_idx, key_idx, value_idx, should_delete_converter, path_converter) header = data_rows.header # write to files if overwrite: write_prop_entries(prop_entries, repo_path) else: update_prop_entries(prop_entries, repo_path) # update the language last update if applicable if args.language and header is not None and len(header) > args.latest_commit_idx >= 0: set_commit_for_language(args.language, header[args.latest_commit_idx], args.language_file) sys.exit(0) if __name__ == "__main__": main()