merge from 6631

This commit is contained in:
Greg DiCristofaro 2020-07-30 12:18:39 -04:00
commit 4e3ee4f01b
36 changed files with 1350 additions and 255 deletions

View File

@ -22,7 +22,6 @@ import java.util.Map;
import org.sleuthkit.autopsy.coreutils.Logger;
import javax.swing.JLabel;
import javax.swing.table.DefaultTableCellRenderer;
import javax.swing.table.DefaultTableModel;
import org.openide.util.NbBundle.Messages;
import org.sleuthkit.autopsy.casemodule.Case;
import org.sleuthkit.datamodel.DataSource;
@ -75,6 +74,7 @@ class DataSourceSummaryCountsPanel extends javax.swing.JPanel {
rightAlignedRenderer.setHorizontalAlignment(JLabel.RIGHT);
initComponents();
fileCountsByCategoryTable.getTableHeader().setReorderingAllowed(false);
artifactCountsTable.getTableHeader().setReorderingAllowed(false);
setDataSource(null);
}
@ -110,11 +110,11 @@ class DataSourceSummaryCountsPanel extends javax.swing.JPanel {
* @param artifactDataModel The artifact type data model.
*/
private void updateCountsTableData(Object[][] fileCategoryDataModel, Object[][] artifactDataModel) {
fileCountsByCategoryTable.setModel(new DefaultTableModel(fileCategoryDataModel, FILE_BY_CATEGORY_COLUMN_HEADERS));
fileCountsByCategoryTable.setModel(new NonEditableTableModel(fileCategoryDataModel, FILE_BY_CATEGORY_COLUMN_HEADERS));
fileCountsByCategoryTable.getColumnModel().getColumn(1).setCellRenderer(rightAlignedRenderer);
fileCountsByCategoryTable.getColumnModel().getColumn(0).setPreferredWidth(130);
artifactCountsTable.setModel(new DefaultTableModel(artifactDataModel, ARTIFACT_COUNTS_COLUMN_HEADERS));
artifactCountsTable.setModel(new NonEditableTableModel(artifactDataModel, ARTIFACT_COUNTS_COLUMN_HEADERS));
artifactCountsTable.getColumnModel().getColumn(0).setPreferredWidth(230);
artifactCountsTable.getColumnModel().getColumn(1).setCellRenderer(rightAlignedRenderer);
@ -230,7 +230,6 @@ class DataSourceSummaryCountsPanel extends javax.swing.JPanel {
org.openide.awt.Mnemonics.setLocalizedText(resultsByTypeLabel, org.openide.util.NbBundle.getMessage(DataSourceSummaryCountsPanel.class, "DataSourceSummaryCountsPanel.resultsByTypeLabel.text")); // NOI18N
artifactCountsTable.setAutoCreateRowSorter(true);
artifactCountsScrollPane.setViewportView(artifactCountsTable);
fileTypePiePanel.setPreferredSize(new java.awt.Dimension(400, 300));

View File

@ -0,0 +1,36 @@
/*
* Autopsy Forensic Browser
*
* Copyright 2020 Basis Technology Corp.
* Contact: carrier <at> sleuthkit <dot> org
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.sleuthkit.autopsy.casemodule.datasourcesummary;
import javax.swing.table.DefaultTableModel;
/**
* A Table model where cells are not editable.
*/
class NonEditableTableModel extends DefaultTableModel {
NonEditableTableModel(Object[][] data, Object[] columnNames) {
super(data, columnNames);
}
@Override
public boolean isCellEditable(int row, int column) {
return false;
}
}

View File

@ -305,7 +305,7 @@ public final class CentralRepoAccount {
normalizedAccountIdentifier = accountIdentifier.toLowerCase().trim();
}
} catch (CorrelationAttributeNormalizationException ex) {
throw new InvalidAccountIDException("Failed to normalize the account idenitier.", ex);
throw new InvalidAccountIDException("Failed to normalize the account idenitier " + accountIdentifier, ex);
}
return normalizedAccountIdentifier;
}

View File

@ -185,11 +185,11 @@ public class CorrelationAttributeUtil {
}
}
} catch (CorrelationAttributeNormalizationException ex) {
logger.log(Level.SEVERE, String.format("Error normalizing correlation attribute (%s)", artifact), ex); // NON-NLS
logger.log(Level.WARNING, String.format("Error normalizing correlation attribute (%s)", artifact), ex); // NON-NLS
return correlationAttrs;
}
catch (InvalidAccountIDException ex) {
logger.log(Level.SEVERE, String.format("Invalid account identifier (%s)", artifact), ex); // NON-NLS
logger.log(Level.WARNING, String.format("Invalid account identifier (artifactID: %d)", artifact.getId())); // NON-NLS
return correlationAttrs;
}
catch (CentralRepoException ex) {

View File

@ -65,8 +65,10 @@ abstract class CVTFilterRefresher implements RefreshThrottler.Refresher {
try (SleuthkitCase.CaseDbQuery dbQuery = skCase.executeQuery("SELECT MAX(date_time) as end, MIN(date_time) as start from account_relationships")) {
// ResultSet is closed by CasDBQuery
ResultSet rs = dbQuery.getResultSet();
rs.next();
startTime = rs.getInt("start"); // NON-NLS
endTime = rs.getInt("end"); // NON-NLS
}
// Get the devices with CVT artifacts
List<Integer> deviceObjIds = new ArrayList<>();

View File

@ -73,5 +73,5 @@ SummaryViewer.referencesLabel.text=Communication References:
SummaryViewer.referencesDataLabel.text=<reference count>
SummaryViewer.contactsLabel.text=Book Entries:
SummaryViewer.accountCountry.text=<account country>
SummaryViewer.fileRefPane.border.title=File Referernce(s) in Current Case
SummaryViewer.fileRefPane.border.title=File References in Current Case
SummaryViewer.selectAccountFileRefLabel.text=<Select a single account to see File References>

View File

@ -22,6 +22,8 @@ import com.google.common.eventbus.Subscribe;
import java.awt.BorderLayout;
import java.awt.Color;
import java.awt.Graphics;
import java.beans.PropertyChangeEvent;
import java.beans.PropertyChangeListener;
import java.util.List;
import java.util.stream.Collectors;
import javax.swing.JSplitPane;
@ -47,14 +49,12 @@ public final class DiscoveryTopComponent extends TopComponent {
private static final long serialVersionUID = 1L;
private static final String PREFERRED_ID = "Discovery"; // NON-NLS
private static final int ANIMATION_INCREMENT = 30;
private volatile static int resultsAreaSize = 250;
private final GroupListPanel groupListPanel;
private final DetailsPanel detailsPanel;
private final ResultsPanel resultsPanel;
private int dividerLocation = -1;
private static final int ANIMATION_INCREMENT = 30;
private static final int RESULTS_AREA_SMALL_SIZE = 250;
private SwingAnimator animator = null;
/**
@ -78,6 +78,19 @@ public final class DiscoveryTopComponent extends TopComponent {
}
});
rightSplitPane.addPropertyChangeListener(JSplitPane.DIVIDER_LOCATION_PROPERTY, new PropertyChangeListener() {
@Override
public void propertyChange(PropertyChangeEvent evt) {
if (evt.getPropertyName().equalsIgnoreCase(JSplitPane.DIVIDER_LOCATION_PROPERTY)) {
//Only change the saved location when it was a manual change by the user and not the animation or the window opening initially
if ((animator == null || !animator.isRunning()) && evt.getNewValue() instanceof Integer
&& ((int) evt.getNewValue() + 5) < (rightSplitPane.getHeight() - rightSplitPane.getDividerSize())) {
resultsAreaSize = (int) evt.getNewValue();
}
}
}
});
}
/**
@ -130,6 +143,7 @@ public final class DiscoveryTopComponent extends TopComponent {
@Override
protected void componentClosed() {
DiscoveryDialog.getDiscoveryDialogInstance().cancelSearch();
DiscoveryEventUtils.getDiscoveryEventBus().post(new DiscoveryEventUtils.ClearInstanceSelectionEvent());
DiscoveryEventUtils.getDiscoveryEventBus().unregister(this);
DiscoveryEventUtils.getDiscoveryEventBus().unregister(groupListPanel);
DiscoveryEventUtils.getDiscoveryEventBus().unregister(resultsPanel);
@ -245,6 +259,7 @@ public final class DiscoveryTopComponent extends TopComponent {
void handleDetailsVisibleEvent(DiscoveryEventUtils.DetailsVisibleEvent detailsVisibleEvent) {
if (animator != null && animator.isRunning()) {
animator.stop();
animator = null;
}
dividerLocation = rightSplitPane.getDividerLocation();
if (detailsVisibleEvent.isShowDetailsArea()) {
@ -316,8 +331,9 @@ public final class DiscoveryTopComponent extends TopComponent {
@Override
public boolean hasTerminated() {
if (dividerLocation != JSplitPane.UNDEFINED_CONDITION && dividerLocation < RESULTS_AREA_SMALL_SIZE) {
dividerLocation = RESULTS_AREA_SMALL_SIZE;
if (dividerLocation != JSplitPane.UNDEFINED_CONDITION && dividerLocation < resultsAreaSize) {
dividerLocation = resultsAreaSize;
animator = null;
return true;
}
return false;
@ -340,6 +356,7 @@ public final class DiscoveryTopComponent extends TopComponent {
public boolean hasTerminated() {
if (dividerLocation > rightSplitPane.getHeight() || dividerLocation == JSplitPane.UNDEFINED_CONDITION) {
dividerLocation = rightSplitPane.getHeight();
animator = null;
return true;
}
return false;
@ -362,8 +379,9 @@ public final class DiscoveryTopComponent extends TopComponent {
@Override
public void paintComponent(Graphics g) {
if ((dividerLocation == JSplitPane.UNDEFINED_CONDITION) || (dividerLocation <= rightSplitPane.getHeight() && dividerLocation >= RESULTS_AREA_SMALL_SIZE)) {
rightSplitPane.setDividerLocation(dividerLocation);
if (animator != null && animator.isRunning() && (dividerLocation == JSplitPane.UNDEFINED_CONDITION
|| (dividerLocation <= getHeight() && dividerLocation >= resultsAreaSize))) {
setDividerLocation(dividerLocation);
}
super.paintComponent(g);
}

View File

@ -3,7 +3,7 @@
<Form version="1.5" maxVersion="1.9" type="org.netbeans.modules.form.forminfo.JPanelFormInfo">
<Properties>
<Property name="minimumSize" type="java.awt.Dimension" editor="org.netbeans.beaninfo.editors.DimensionEditor">
<Dimension value="[700, 200]"/>
<Dimension value="[300, 60]"/>
</Property>
<Property name="preferredSize" type="java.awt.Dimension" editor="org.netbeans.beaninfo.editors.DimensionEditor">
<Dimension value="[700, 700]"/>
@ -315,7 +315,7 @@
<Container class="javax.swing.JPanel" name="resultsViewerPanel">
<Properties>
<Property name="minimumSize" type="java.awt.Dimension" editor="org.netbeans.beaninfo.editors.DimensionEditor">
<Dimension value="[0, 160]"/>
<Dimension value="[0, 60]"/>
</Property>
<Property name="preferredSize" type="java.awt.Dimension" editor="org.netbeans.beaninfo.editors.DimensionEditor">
<Dimension value="[700, 700]"/>

View File

@ -376,7 +376,7 @@ final class ResultsPanel extends javax.swing.JPanel {
javax.swing.Box.Filler filler4 = new javax.swing.Box.Filler(new java.awt.Dimension(0, 0), new java.awt.Dimension(0, 0), new java.awt.Dimension(32767, 0));
resultsViewerPanel = new javax.swing.JPanel();
setMinimumSize(new java.awt.Dimension(700, 200));
setMinimumSize(new java.awt.Dimension(300, 60));
setPreferredSize(new java.awt.Dimension(700, 700));
setLayout(new java.awt.BorderLayout());
@ -533,7 +533,7 @@ final class ResultsPanel extends javax.swing.JPanel {
add(pagingPanel, java.awt.BorderLayout.PAGE_START);
resultsViewerPanel.setMinimumSize(new java.awt.Dimension(0, 160));
resultsViewerPanel.setMinimumSize(new java.awt.Dimension(0, 60));
resultsViewerPanel.setPreferredSize(new java.awt.Dimension(700, 700));
resultsViewerPanel.setLayout(new java.awt.BorderLayout());
add(resultsViewerPanel, java.awt.BorderLayout.CENTER);

View File

@ -501,9 +501,9 @@ class GeoFilterPanel extends javax.swing.JPanel {
DataSource dataSource, BlackboardArtifact.ARTIFACT_TYPE artifactType) throws TskCoreException {
long count = 0;
String queryStr
= "SELECT count(DISTINCT artifact_id) AS count FROM"
= "SELECT count(DISTINCT artIds) AS count FROM"
+ " ("
+ " SELECT * FROM blackboard_artifacts as arts"
+ " SELECT arts.artifact_id as artIds, * FROM blackboard_artifacts as arts"
+ " INNER JOIN blackboard_attributes as attrs"
+ " ON attrs.artifact_id = arts.artifact_id"
+ " WHERE arts.artifact_type_id = " + artifactType.getTypeID()
@ -516,7 +516,7 @@ class GeoFilterPanel extends javax.swing.JPanel {
+ " or attrs.attribute_type_id = " + BlackboardAttribute.ATTRIBUTE_TYPE.TSK_GEO_TRACKPOINTS.getTypeID()
+ " or attrs.attribute_type_id = " + BlackboardAttribute.ATTRIBUTE_TYPE.TSK_GEO_WAYPOINTS.getTypeID()
+ " )"
+ " )";
+ " ) as innerTable";
try (SleuthkitCase.CaseDbQuery queryResult = sleuthkitCase.executeQuery(queryStr);
ResultSet resultSet = queryResult.getResultSet()) {
if (resultSet.next()) {

View File

@ -201,21 +201,32 @@ public final class CaseUcoReportModule implements GeneralReportModule {
Set<Long> dataSourceIds = dataSources.stream()
.map((datasource) -> datasource.getId())
.collect(Collectors.toSet());
logger.log(Level.INFO, "Writing all artifacts to the CASE-UCO report. "
+ "Keyword hits will be skipped as they can't be represented"
+ " in CASE format.");
// Write all standard artifacts that are contained within the
// selected data sources.
for (ARTIFACT_TYPE artType : currentCase.getSleuthkitCase().getBlackboardArtifactTypesInUse()) {
if(artType.equals(BlackboardArtifact.ARTIFACT_TYPE.TSK_KEYWORD_HIT)) {
// Keyword hits cannot be represented in CASE.
continue;
}
for (BlackboardArtifact artifact : currentCase.getSleuthkitCase().getBlackboardArtifacts(artType)) {
if (dataSourceIds.contains(artifact.getDataSource().getId())) {
try {
for (JsonElement element : exporter.exportBlackboardArtifact(artifact)) {
gson.toJson(element, reportWriter);
}
} catch (ContentNotExportableException | BlackboardJsonAttrUtil.InvalidJsonException ex) {
logger.log(Level.WARNING, String.format("Unable to export blackboard artifact (id: %d) to CASE/UCO. "
} catch (ContentNotExportableException ex) {
logger.log(Level.INFO, String.format("Unable to export blackboard artifact (id: %d, type: %d) to CASE/UCO. "
+ "The artifact type is either not supported or the artifact instance does not have any "
+ "exportable attributes.", artifact.getId()));
+ "exportable attributes.", artifact.getId(), artType.getTypeID()));
} catch (BlackboardJsonAttrUtil.InvalidJsonException ex) {
logger.log(Level.WARNING, String.format("Artifact instance (id: %d, type: %d) contained a "
+ "malformed json attribute.", artifact.getId(), artType.getTypeID()), ex);
}
}
}

View File

@ -23,6 +23,7 @@ environment:
PYTHON: "C:\\Python36-x64"
install:
- ps: choco install nuget.commandline
- ps: choco install ant --ignore-dependencies
- git clone https://github.com/sleuthkit/sleuthkit
- ps: $env:Path="C:\Program Files\Java\jdk1.8.0\bin;$($env:Path);C:\ProgramData\chocolatey\lib\ant"
@ -36,6 +37,7 @@ services:
build_script:
- cd %TSK_HOME%
- nuget restore win32\libtsk -PackagesDirectory win32\packages
- python setupDevRepos.py
- python win32\updateAndBuildAll.py -m
- ps: pushd bindings/java

View File

@ -22,8 +22,7 @@ The next step is to add an input data source to the case. The <strong>Add Data S
- For local disk, select one of the detected disks. Autopsy will add the current view of the disk to the case (i.e. snapshot of the meta-data). However, the individual file content (not meta-data) does get updated with the changes made to the disk. You can optionally create a copy of all data read from the local disk to a VHD file, which can be useful for triage situations. Note, you may need run Autopsy as an Administrator to detect all disks.
- For logical files (a single file or folder of files), use the "Add" button to add one or more files or folders on your system to the case. Folders will be recursively added to the case.
After supplying the needed data, Autopsy will quickly review the data sources and add minimal metadata to the case databases so that it can schedule the files for analysis. While it is doing that, it will prompt you to configure the Ingest Modules.
Next it will prompt you to configure the Ingest Modules.
\subsection s1c Ingest Modules
@ -35,18 +34,21 @@ The standard ingest modules included with Autopsy are:
- <strong>\subpage recent_activity_page</strong> extracts user activity as saved by web browsers and the OS. Also runs Regripper on the registry hive.
- <strong>\subpage hash_db_page</strong> uses hash sets to ignore known files from the NIST NSRL and flag known bad files. Use the "Advanced" button to add and configure the hash sets to use during this process. You will get updates on known bad file hits as the ingest occurs. You can later add hash sets via the Tools -&gt; Options menu in the main UI. You can download an index of the NIST NSRL from http://sourceforge.net/projects/autopsy/files/NSRL/
- <strong>\subpage file_type_identification_page</strong> determines file types based on signatures and reports them based on MIME type. It stores the results in the Blackboard and many modules depend on this. It uses the Tika open source library. You can define your own custom file types in Tools, Options, File Types.
- <strong>\subpage extension_mismatch_detector_page</strong> uses the results from the File Type Identification and flags files that have an extension not traditionally associated with the file's detected type. Ignores 'known' (NSRL) files. You can customize the MIME types and file extensions per MIME type in Tools, Options, File Extension Mismatch.
- <strong>\subpage embedded_file_extractor_page</strong> opens ZIP, RAR, other archive formats, Doc, Docx, PPT, PPTX, XLS, and XLSX and sends the derived files from those files back through the ingest pipeline for analysis.
- <strong>\subpage EXIF_parser_page</strong> extracts EXIF information from JPEG files and posts the results into the tree in the main UI.
- <strong>\subpage keyword_search_page</strong> uses keyword lists to identify files with specific words in them. You can select the keyword lists to search for automatically and you can create new lists using the "Advanced" button. Note that with keyword search, you can always conduct searches after ingest has finished. The keyword lists that you select during ingest will be searched for at periodic intervals and you will get the results in real-time. You do not need to wait for all files to be indexed before performing a keyword search, however you will only get results from files that have already been indexed when you perform your search.
- <strong>\subpage email_parser_page</strong> identifies Thunderbird MBOX files and PST format files based on file signatures, extracting the e-mails from them, adding the results to the Blackboard.
- <strong>\subpage extension_mismatch_detector_page</strong> uses the results from the File Type Identification and flags files that have an extension not traditionally associated with the file's detected type. Ignores 'known' (NSRL) files. You can customize the MIME types and file extensions per MIME type in Tools, Options, File Extension Mismatch.
- <strong>\subpage data_source_integrity_page</strong> computes a checksum on E01 files and compares with the E01 file's internal checksum to ensure they match.
- <strong>\subpage android_analyzer_page</strong> allows you to parse common items from Android devices. Places artifacts into the BlackBoard.
- <strong>\subpage interesting_files_identifier_page</strong> searches for files and directories based on user-specified rules in Tools, Options, Interesting Files. It works as a "File Alerting Module". It generates messages in the inbox when specified files are found.
- <strong>\subpage photorec_carver_page</strong> carves files from unallocated space and sends them through the file processing chain.
- <strong>\subpage cr_ingest_module</strong> adds file hashes and other extracted properties to a central repository for future correlation and to flag previously notable files.
- <strong>\subpage encryption_page</strong> looks for encrypted files.
- <strong>\subpage interesting_files_identifier_page</strong> searches for files and directories based on user-specified rules in Tools, Options, Interesting Files. It works as a "File Alerting Module". It generates messages in the inbox when specified files are found.
- <strong>\subpage cr_ingest_module</strong> adds file hashes and other extracted properties to a central repository for future correlation and to flag previously notable files.
- <strong>\subpage photorec_carver_page</strong> carves files from unallocated space and sends them through the file processing chain.
- <strong>\subpage vm_extractor_page</strong> extracts data from virtual machine files
- <strong>\subpage data_source_integrity_page</strong> computes a checksum on E01 files and compares with the E01 file's internal checksum to ensure they match.
- <strong>\subpage drone_page</strong> extracts data from drone files.
- <strong>\subpage plaso_page</strong> uses Plaso to create \ref timeline_page "timeline" events.
- <strong>\subpage android_analyzer_page</strong> allows you to parse common items from Android devices. Places artifacts into the BlackBoard.
- <strong>\subpage gpx_page</strong> extracts geolocation data from .gpx files.
When you select a module, you will have the option to change its settings. For example, you can configure which keyword search lists to use during ingest and which hash sets to use. Refer to the individual module help for details on configuring each module.

View File

@ -1,220 +0,0 @@
"""This script determines the updated, added, and deleted properties from the '.properties-MERGED' files
and generates a csv file containing the items changed. This script requires the python libraries:
gitpython and jproperties. As a consequence, it also requires git >= 1.7.0 and python >= 3.4.
"""
from git import Repo
from typing import List, Dict, Tuple
import re
import csv
from jproperties import Properties
import sys
class ItemChange:
def __init__(self, rel_path: str, key: str, prev_val: str, cur_val: str):
"""Describes the change that occurred for a particular key of a properties file.
Args:
rel_path (str): The relative path of the properties file.
key (str): The key in the properties file.
prev_val (str): The previous value for the key.
cur_val (str): The current value for the key.
"""
self.rel_path = rel_path
self.key = key
self.prev_val = prev_val
self.cur_val = cur_val
if ItemChange.has_str_content(cur_val) and not ItemChange.has_str_content(prev_val):
self.type = 'ADDITION'
elif not ItemChange.has_str_content(cur_val) and ItemChange.has_str_content(prev_val):
self.type = 'DELETION'
else:
self.type = 'CHANGE'
@staticmethod
def has_str_content(content: str):
"""Determines whether or not the content is empty or None.
Args:
content (str): The text.
Returns:
bool: Whether or not it has content.
"""
return content is not None and len(content.strip()) > 0
@staticmethod
def get_headers() -> List[str]:
"""Returns the csv headers to insert when serializing a list of ItemChange objects to csv.
Returns:
List[str]: The column headers
"""
return ['Relative Path', 'Key', 'Change Type', 'Previous Value', 'Current Value']
def get_row(self) -> List[str]:
"""Returns the list of values to be entered as a row in csv serialization.
Returns:
List[str]: The list of values to be entered as a row in csv serialization.
"""
return [
self.rel_path,
self.key,
self.type,
self.prev_val,
self.cur_val]
def get_entry_dict(diff_str: str) -> Dict[str, str]:
"""Retrieves a dictionary mapping the properties represented in the string.
Args:
diff_str (str): The string of the properties file.
Returns:
Dict[str,str]: The mapping of keys to values in that properties file.
"""
props = Properties()
props.load(diff_str, "utf-8")
return props.properties
def get_item_change(rel_path: str, key: str, prev_val: str, cur_val: str) -> ItemChange:
"""Returns an ItemChange object if the previous value is not equal to the current value.
Args:
rel_path (str): The relative path for the properties file.
key (str): The key within the properties file for this potential change.
prev_val (str): The previous value.
cur_val (str): The current value.
Returns:
ItemChange: The ItemChange object or None if values are the same.
"""
if (prev_val == cur_val):
return None
else:
return ItemChange(rel_path, key, prev_val, cur_val)
def get_changed(rel_path: str, a_str: str, b_str: str) -> List[ItemChange]:
"""Given the relative path of the properties file that
Args:
rel_path (str): The relative path for the properties file.
a_str (str): The string representing the original state of the file.
b_str (str): The string representing the current state of the file.
Returns:
List[ItemChange]: The changes determined.
"""
print('Retrieving changes for {}...'.format(rel_path))
a_dict = get_entry_dict(a_str)
b_dict = get_entry_dict(b_str)
all_keys = set().union(a_dict.keys(), b_dict.keys())
mapped = map(lambda key: get_item_change(
rel_path, key, a_dict.get(key), b_dict.get(key)), all_keys)
return filter(lambda entry: entry is not None, mapped)
def get_text(blob) -> str:
return blob.data_stream.read().decode('utf-8')
def get_changed_from_diff(rel_path: str, diff) -> List[ItemChange]:
"""Determines changes from a git python diff.
Args:
rel_path (str): The relative path for the properties file.
diff: The git python diff.
Returns:
List[ItemChange]: The changes in properties.
"""
# an item was added
if diff.change_type == 'A':
changes = get_changed(rel_path, '', get_text(diff.b_blob))
# an item was deleted
elif diff.change_type == 'D':
changes = get_changed(rel_path, get_text(diff.a_blob), '')
# an item was modified
elif diff.change_type == 'M':
changes = get_changed(rel_path, get_text(
diff.a_blob), get_text(diff.b_blob))
else:
changes = []
return changes
def get_rel_path(diff) -> str:
"""Determines the relative path based on the git python.
Args:
diff: The git python diff.
Returns:
str: The determined relative path.
"""
if diff.b_path is not None:
return diff.b_path
elif diff.a_path is not None:
return diff.a_path
else:
return '<Uknown Path>'
def write_diff_to_csv(repo_path: str, output_path: str, commit_1_id: str, commit_2_id: str):
"""Determines the changes made in '.properties-MERGED' files from one commit to another commit.
Args:
repo_path (str): The local path to the git repo.
output_path (str): The output path for the csv file.
commit_1_id (str): The initial commit for the diff.
commit_2_id (str): The latest commit for the diff.
"""
repo = Repo(repo_path)
commit_1 = repo.commit(commit_1_id)
commit_2 = repo.commit(commit_2_id)
diffs = commit_1.diff(commit_2)
with open(output_path, 'w', newline='') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(ItemChange.get_headers())
for diff in diffs:
rel_path = get_rel_path(diff)
if not rel_path.endswith('.properties-MERGED'):
continue
changes = get_changed_from_diff(rel_path, diff)
for item_change in changes:
writer.writerow(item_change.get_row())
def print_help():
"""Prints a quick help message.
"""
print("diffscript.py [path to repo] [csv output path] [commit for previous release] [commit for current release (optional; defaults to 'HEAD')]")
def main():
if len(sys.argv) <= 3:
print_help()
sys.exit(1)
repo_path = sys.argv[1]
output_path = sys.argv[2]
commit_1_id = sys.argv[3]
commit_2_id = sys.argv[4] if len(sys.argv) > 4 else 'HEAD'
write_diff_to_csv(repo_path, output_path, commit_1_id, commit_2_id)
sys.exit(0)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,2 @@
__pycache__
.idea

View File

@ -0,0 +1,20 @@
## Description
This folder provides tools to handle updates of bundle files for language localization. There are three main scripts:
- `allbundlesscript.py` - generates a csv file containing the relative path of the bundle file, the key, and the value for each property.
- `diffscript.py` - determines the property values that have changed between two commits and generates a csv file containing the relative path, the key, the previous value, the new value, and the change type (addition, deletion, change).
- `updatepropsscript.py` - Given a csv file containing the relative path of the bundle, the key, and the new value, will update the property values for a given language within the project.
All of these scripts provide more details on usage by calling the script with `-h`.
## Basic Localization Update Workflow
1. Call `python3 diffscript.py <output path> -l <language>` to generate a csv file containing differences in properties file values from the language's previous commit to the `HEAD` commit. The language identifier should be the abbreviated identifier used for the bundle (i.e. 'ja' for Japanese). The output path should be specified as a relative path with the dot slash notation (i.e. `./outputpath.csv`) or an absolute path.
2. Update csv file with translations
3. Call `python3 updatepropsscript.py <input path> -l <language>` to update properties files based on the newly generated csv file. The csv file should be formatted such that the columns are bundle relative path, property files key, translated value and commit id for the latest commit id for which these changes represent. The commit id only needs to be in the header row. The output path should be specified as a relative path with the dot slash notation (i.e. `./outputpath.csv`) or an absolute path.
## Localization Generation for the First Time
First-time updates should follow a similar procedure except that instead of calling `diffscript.py`, call `python3 allbundlesscript <output path>` to generate a csv file with relative paths of bundle files, property file keys, property file values. The output path should be specified as a relative path with the dot slash notation (i.e. `./inputpath.csv`) or an absolute path.
##Unit Tests
Unit tests can be run from this directory using `python3 -m unittest`.

View File

@ -0,0 +1,73 @@
"""This script finds all '.properties-MERGED' files and writes relative path, key, and value to a CSV file.
This script requires the python libraries: gitpython and jproperties. As a consequence, it also requires
git >= 1.7.0 and python >= 3.4. This script relies on fetching 'HEAD' from current branch. So make sure
repo is on correct branch (i.e. develop).
"""
import sys
from envutil import get_proj_dir
from fileutil import get_filename_addition, OMITTED_ADDITION
from gitutil import get_property_file_entries, get_commit_id, get_git_root
from csvutil import records_to_csv
from typing import Union
import re
import argparse
def write_items_to_csv(repo_path: str, output_path: str, show_commit: bool, value_regex: Union[str, None] = None):
"""Determines the contents of '.properties-MERGED' files and writes to a csv file.
Args:
repo_path (str): The local path to the git repo.
output_path (str): The output path for the csv file.
show_commit (bool): Whether or not to include the commit id in the header
value_regex (Union[str, None]): If non-none, only key value pairs where the value is a regex match with this
value will be included.
"""
row_header = ['Relative path', 'Key', 'Value']
if show_commit:
row_header.append(get_commit_id(repo_path, 'HEAD'))
rows = []
omitted = []
for entry in get_property_file_entries(repo_path):
new_entry = [entry.rel_path, entry.key, entry.value]
if value_regex is None or re.match(value_regex, entry.value):
rows.append(new_entry)
else:
omitted.append(new_entry)
records_to_csv(output_path, [row_header] + rows)
if len(omitted) > 0:
records_to_csv(get_filename_addition(output_path, OMITTED_ADDITION), [row_header] + omitted)
def main():
# noinspection PyTypeChecker
parser = argparse.ArgumentParser(description='Gathers all key-value pairs within .properties-MERGED files into '
'one csv file.',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument(dest='output_path', type=str, help='The path to the output csv file. The output path should be'
' specified as a relative path with the dot slash notation '
'(i.e. \'./outputpath.csv\') or an absolute path.')
parser.add_argument('-r', '--repo', dest='repo_path', type=str, required=False,
help='The path to the repo. If not specified, path of script is used.')
parser.add_argument('-nc', '--no_commit', dest='no_commit', action='store_true', default=False,
required=False, help="Suppresses adding commits to the generated csv header.")
args = parser.parse_args()
repo_path = args.repo_path if args.repo_path is not None else get_git_root(get_proj_dir())
output_path = args.output_path
show_commit = not args.no_commit
write_items_to_csv(repo_path, output_path, show_commit)
sys.exit(0)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,51 @@
"""Provides tools for parsing and writing to a csv file.
"""
from typing import List, Iterable, Tuple
import csv
import os
def records_to_csv(output_path: str, rows: Iterable[List[str]]):
"""Writes rows to a csv file at the specified path.
Args:
output_path (str): The path where the csv file will be written.
rows (List[List[str]]): The rows to be written. Each row of a
list of strings will be written according
to their index (i.e. column 3 will be index 2).
"""
parent_dir, file = os.path.split(output_path)
if not os.path.exists(parent_dir):
os.makedirs(parent_dir)
with open(output_path, 'w', encoding="utf-8-sig", newline='') as csvfile:
writer = csv.writer(csvfile)
for row in rows:
writer.writerow(row)
def csv_to_records(input_path: str, header_row: bool) -> Tuple[List[List[str]], List[str]]:
"""Writes rows to a csv file at the specified path.
Args:
input_path (str): The path where the csv file will be written.
header_row (bool): Whether or not there is a header row to be skipped.
"""
with open(input_path, encoding='utf-8-sig') as csv_file:
csv_reader = csv.reader(csv_file, delimiter=',')
header = None
results = []
try:
for row in csv_reader:
if header_row:
header = row
header_row = False
else:
results.append(row)
except Exception as e:
raise Exception("There was an error parsing csv {path}".format(path=input_path), e)
return results, header

View File

@ -0,0 +1,97 @@
"""This script determines the updated, added, and deleted properties from the '.properties-MERGED' files
and generates a csv file containing the items changed. This script requires the python libraries:
gitpython and jproperties. As a consequence, it also requires git >= 1.7.0 and python >= 3.4.
"""
import re
import sys
from envutil import get_proj_dir
from fileutil import get_filename_addition, OMITTED_ADDITION
from gitutil import get_property_files_diff, get_commit_id, get_git_root
from itemchange import ItemChange, ChangeType
from csvutil import records_to_csv
import argparse
from typing import Union
from langpropsutil import get_commit_for_language, LANG_FILENAME
def write_diff_to_csv(repo_path: str, output_path: str, commit_1_id: str, commit_2_id: str, show_commits: bool,
value_regex: Union[str, None] = None):
"""Determines the changes made in '.properties-MERGED' files from one commit to another commit.
Args:
repo_path (str): The local path to the git repo.
output_path (str): The output path for the csv file.
commit_1_id (str): The initial commit for the diff.
commit_2_id (str): The latest commit for the diff.
show_commits (bool): Show commits in the header row.
value_regex (Union[str, None]): If non-none, only key value pairs where the value is a regex match with this
value will be included.
"""
row_header = ItemChange.get_headers()
if show_commits:
row_header += [get_commit_id(repo_path, commit_1_id), get_commit_id(repo_path, commit_2_id)]
rows = []
omitted = []
for entry in get_property_files_diff(repo_path, commit_1_id, commit_2_id):
new_entry = entry.get_row()
if value_regex is not None and (entry.type == ChangeType.DELETION or not re.match(value_regex, entry.cur_val)):
omitted.append(new_entry)
else:
rows.append(new_entry)
records_to_csv(output_path, [row_header] + rows)
if len(omitted) > 0:
records_to_csv(get_filename_addition(output_path, OMITTED_ADDITION), [row_header] + omitted)
def main():
# noinspection PyTypeChecker
parser = argparse.ArgumentParser(description="Determines the updated, added, and deleted properties from the "
"'.properties-MERGED' files and generates a csv file containing "
"the items changed.",
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument(dest='output_path', type=str, help='The path to the output csv file. The output path should '
'be specified as a relative path with the dot slash notation'
' (i.e. \'./outputpath.csv\') or an absolute path.')
parser.add_argument('-r', '--repo', dest='repo_path', type=str, required=False,
help='The path to the repo. If not specified, path of script is used.')
parser.add_argument('-fc', '--first-commit', dest='commit_1_id', type=str, required=False,
help='The commit for previous release. This flag or the language flag need to be specified'
' in order to determine a start point for the difference.')
parser.add_argument('-lc', '--latest-commit', dest='commit_2_id', type=str, default='HEAD', required=False,
help='The commit for current release.')
parser.add_argument('-nc', '--no-commits', dest='no_commits', action='store_true', default=False,
required=False, help="Suppresses adding commits to the generated csv header.")
parser.add_argument('-l', '--language', dest='language', type=str, default=None, required=False,
help='Specify the language in order to determine the first commit to use (i.e. \'ja\' for '
'Japanese. This flag overrides the first-commit flag.')
args = parser.parse_args()
repo_path = args.repo_path if args.repo_path is not None else get_git_root(get_proj_dir())
output_path = args.output_path
commit_1_id = args.commit_1_id
lang = args.language
if lang is not None:
commit_1_id = get_commit_for_language(lang)
if commit_1_id is None:
print('Either the first commit or language flag need to be specified. If specified, the language file, ' +
LANG_FILENAME + ', may not have the latest commit for the language.', file=sys.stderr)
parser.print_help(sys.stderr)
sys.exit(1)
commit_2_id = args.commit_2_id
show_commits = not args.no_commits
write_diff_to_csv(repo_path, output_path, commit_1_id, commit_2_id, show_commits)
sys.exit(0)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,17 @@
"""Functions relating to the project environment.
"""
import pathlib
from typing import Union
def get_proj_dir(path: Union[pathlib.PurePath, str] = __file__) -> str:
"""
Gets parent directory of this file (and subsequently, the project).
Args:
path: Can be overridden to provide a different file. This will return the parent of that file in that instance.
Returns:
The project folder or the parent folder of the file provided.
"""
return str(pathlib.Path(path).parent.absolute())

View File

@ -0,0 +1,63 @@
import os
from typing import Union, Tuple
from pathlib import Path
def get_path_pieces(orig_path: str) -> Tuple[str, Union[str, None], Union[str, None]]:
"""Retrieves path pieces. This is a naive approach as it determines if a file is present based on the
presence of an extension.
Args:
orig_path: The original path to deconstruct.
Returns: A tuple of directory, filename and extension. If no extension is present, filename and extension are None.
"""
potential_parent_dir, orig_file = os.path.split(str(Path(orig_path)))
filename, file_extension = os.path.splitext(orig_file)
if file_extension.startswith('.'):
file_extension = file_extension[1:]
if file_extension is None or len(file_extension) < 1:
return str(Path(orig_path)), None, None
else:
return potential_parent_dir, filename, file_extension
def get_new_path(orig_path: str, new_filename: str) -> str:
"""Obtains a new path. This tries to determine if the provided path is a directory or filename (has an
extension containing '.') then constructs the new path with the old parent directory and the new filename.
Args:
orig_path (str): The original path.
new_filename (str): The new filename to use.
Returns:
str: The new path.
"""
parent_dir, filename, ext = get_path_pieces(orig_path)
return str(Path(parent_dir) / Path(new_filename))
# For use with creating csv filenames for entries that have been omitted.
OMITTED_ADDITION = '-omitted'
def get_filename_addition(orig_path: str, filename_addition: str) -> str:
"""Gets filename with addition. So if item is '/path/name.ext' and the filename_addition is '-add', the new result
would be '/path/name-add.ext'.
Args:
orig_path (str): The original path.
filename_addition (str): The new addition.
Returns: The altered path.
"""
parent_dir, filename, extension = get_path_pieces(orig_path)
if filename is None:
return str(Path(orig_path + filename_addition))
else:
ext = '' if extension is None else extension
return str(Path(parent_dir) / Path('{0}{1}.{2}'.format(filename, filename_addition, ext)))

View File

@ -0,0 +1,168 @@
"""Functions relating to using git and GitPython with an existing repo.
"""
from git import Repo, Diff, Blob
from typing import List, Union, Iterator, Tuple, Any
from itemchange import ItemChange, get_changed
from pathlib import Path
from propentry import PropEntry
from propsutil import DEFAULT_PROPS_EXTENSION, get_entry_dict
def get_text(blob: Blob) -> str:
return blob.data_stream.read().decode('utf-8')
def get_git_root(child_path: str) -> str:
"""
Taken from https://stackoverflow.com/questions/22081209/find-the-root-of-the-git-repository-where-the-file-lives,
this obtains the root path of the git repo in which this file exists.
Args:
child_path: The path of a child within the repo.
Returns: The repo root path.
"""
git_repo = Repo(child_path, search_parent_directories=True)
git_root = git_repo.git.rev_parse("--show-toplevel")
return git_root
def get_changed_from_diff(rel_path: str, diff: Diff) -> List[ItemChange]:
"""Determines changes from a git python diff.
Args:
rel_path (str): The relative path for the properties file.
diff (Diff): The git python diff.
Returns:
List[ItemChange]: The changes in properties.
"""
# an item was added
if diff.change_type == 'A':
changes = get_changed(rel_path, '', get_text(diff.b_blob))
# an item was deleted
elif diff.change_type == 'D':
changes = get_changed(rel_path, get_text(diff.a_blob), '')
# an item was modified
elif diff.change_type == 'M':
changes = get_changed(rel_path, get_text(
diff.a_blob), get_text(diff.b_blob))
else:
changes = []
return changes
def get_rel_path(diff: Diff) -> Union[str, None]:
"""Determines the relative path based on the git python.
Args:
diff: The git python diff.
Returns:
str: The determined relative path.
"""
if diff.b_path is not None:
return diff.b_path
elif diff.a_path is not None:
return diff.a_path
else:
return None
def get_diff(repo_path: str, commit_1_id: str, commit_2_id: str) -> Any:
"""Determines the diff between two commits.
Args:
repo_path (str): The local path to the git repo.
commit_1_id (str): The initial commit for the diff.
commit_2_id (str): The latest commit for the diff.
Returns:
The determined diff.
"""
repo = Repo(repo_path, search_parent_directories=True)
commit_1 = repo.commit(commit_1_id)
commit_2 = repo.commit(commit_2_id)
return commit_1.diff(commit_2)
def get_commit_id(repo_path: str, commit_id: str) -> str:
"""Determines the hash for head commit. This does things like fetch the id of head if 'HEAD' is provided.
Args:
repo_path: The path to the repo.
commit_id: The id for the commit.
Returns:
The hash for the commit in the repo.
"""
repo = Repo(repo_path, search_parent_directories=True)
commit = repo.commit(commit_id.strip())
return str(commit.hexsha)
def get_property_files_diff(repo_path: str, commit_1_id: str, commit_2_id: str,
property_file_extension: str = DEFAULT_PROPS_EXTENSION) -> Iterator[ItemChange]:
"""Determines the item changes within property files as a diff between two commits.
Args:
repo_path (str): The repo path.
commit_1_id (str): The first git commit.
commit_2_id (str): The second git commit.
property_file_extension (str): The extension for properties files to gather.
Returns:
All found item changes in values of keys between the property files.
"""
diffs = get_diff(repo_path, commit_1_id.strip(), commit_2_id.strip())
for diff in diffs:
rel_path = get_rel_path(diff)
if rel_path is None or not rel_path.endswith('.' + property_file_extension):
continue
yield from get_changed_from_diff(rel_path, diff)
def list_paths(root_tree, path: Path = Path('.')) -> Iterator[Tuple[str, Blob]]:
"""
Given the root path to serve as a prefix, walks the tree of a git commit returning all files and blobs.
Repurposed from: https://www.enricozini.org/blog/2019/debian/gitpython-list-all-files-in-a-git-commit/
Args:
root_tree: The tree of the commit to walk.
path: The path to use as a prefix.
Returns: A tuple iterator where each tuple consists of the path as a string and a blob of the file.
"""
for blob in root_tree.blobs:
ret_item = (str(path / blob.name), blob)
yield ret_item
for tree in root_tree.trees:
yield from list_paths(tree, path / tree.name)
def get_property_file_entries(repo_path: str, at_commit: str = 'HEAD',
property_file_extension: str = DEFAULT_PROPS_EXTENSION) -> Iterator[PropEntry]:
"""
Retrieves all property files entries returning as an iterator of PropEntry objects.
Args:
repo_path: The path to the git repo.
at_commit: The commit to use.
property_file_extension: The extension to use for scanning for property files.
Returns: An iterator of PropEntry objects.
"""
repo = Repo(repo_path, search_parent_directories=True)
commit = repo.commit(at_commit.strip())
for item in list_paths(commit.tree):
path, blob = item
if path.endswith(property_file_extension):
for key, val in get_entry_dict(get_text(blob)).items():
yield PropEntry(path, key, val)

View File

@ -0,0 +1,103 @@
from typing import Iterator, List, Union
from propsutil import get_entry_dict
from enum import Enum
class ChangeType(Enum):
"""Describes the nature of a change in the properties file."""
ADDITION = 'ADDITION'
DELETION = 'DELETION'
CHANGE = 'CHANGE'
def __str__(self):
return str(self.value)
class ItemChange:
rel_path: str
key: str
prev_val: Union[str, None]
cur_val: Union[str, None]
type: ChangeType
def __init__(self, rel_path: str, key: str, prev_val: str, cur_val: str):
"""Describes the change that occurred for a particular key of a properties file.
Args:
rel_path (str): The relative path of the properties file.
key (str): The key in the properties file.
prev_val (str): The previous value for the key.
cur_val (str): The current value for the key.
"""
self.rel_path = rel_path
self.key = key
self.prev_val = prev_val
self.cur_val = cur_val
if cur_val is not None and prev_val is None:
self.type = ChangeType.ADDITION
elif cur_val is None and prev_val is not None:
self.type = ChangeType.DELETION
else:
self.type = ChangeType.CHANGE
@staticmethod
def get_headers() -> List[str]:
"""Returns the csv headers to insert when serializing a list of ItemChange objects to csv.
Returns:
List[str]: The column headers
"""
return ['Relative Path', 'Key', 'Change Type', 'Previous Value', 'Current Value']
def get_row(self) -> List[str]:
"""Returns the list of values to be entered as a row in csv serialization.
Returns:
List[str]: The list of values to be entered as a row in csv serialization.
"""
return [
self.rel_path,
self.key,
self.type,
self.prev_val,
self.cur_val]
def get_item_change(rel_path: str, key: str, prev_val: str, cur_val: str) -> Union[ItemChange, None]:
"""Returns an ItemChange object if the previous value is not equal to the current value.
Args:
rel_path (str): The relative path for the properties file.
key (str): The key within the properties file for this potential change.
prev_val (str): The previous value.
cur_val (str): The current value.
Returns:
ItemChange: The ItemChange object or None if values are the same.
"""
if prev_val == cur_val:
return None
else:
return ItemChange(rel_path, key, prev_val, cur_val)
def get_changed(rel_path: str, a_str: str, b_str: str) -> Iterator[ItemChange]:
"""Given the relative path of the properties file that has been provided,
determines the property items that have changed between the two property
file strings.
Args:
rel_path (str): The relative path for the properties file.
a_str (str): The string representing the original state of the file.
b_str (str): The string representing the current state of the file.
Returns:
List[ItemChange]: The changes determined.
"""
print('Retrieving changes for {0}...'.format(rel_path))
a_dict = get_entry_dict(a_str)
b_dict = get_entry_dict(b_str)
all_keys = set().union(a_dict.keys(), b_dict.keys())
mapped = map(lambda key: get_item_change(
rel_path, key, a_dict.get(key), b_dict.get(key)), all_keys)
return filter(lambda entry: entry is not None, mapped)

View File

@ -0,0 +1,34 @@
"""Functions handling retrieving and storing when a language was last updated.
"""
from typing import Union
from envutil import get_proj_dir
from propsutil import get_entry_dict_from_path, update_entry_dict
from os import path
LANG_FILENAME = 'lastupdated.properties'
def _get_last_update_key(language: str) -> str:
return "bundles.{lang}.lastupdated".format(lang=language)
def _get_props_path():
return path.join(get_proj_dir(), LANG_FILENAME)
def get_commit_for_language(language: str) -> Union[str, None]:
lang_dict = get_entry_dict_from_path(_get_props_path())
if lang_dict is None:
return None
key = _get_last_update_key(language)
if key not in lang_dict:
return None
return lang_dict[key]
def set_commit_for_language(language: str, latest_commit: str):
key = _get_last_update_key(language)
update_entry_dict({key: latest_commit}, _get_props_path())

View File

@ -0,0 +1,2 @@
# in format of bundles.<language>.lastupdated=<commit id>
bundles.ja.lastupdated=d9a37c48f4bd0dff014eead73a0eb730c875ed9f

View File

@ -0,0 +1,19 @@
class PropEntry:
rel_path: str
key: str
value: str
should_delete: bool
def __init__(self, rel_path: str, key: str, value: str, should_delete: bool = False):
"""Defines a property file entry to be updated in a property file.
Args:
rel_path (str): The relative path for the property file.
key (str): The key for the entry.
value (str): The value for the entry.
should_delete (bool, optional): Whether or not the key should simply be deleted. Defaults to False.
"""
self.rel_path = rel_path
self.key = key
self.value = value
self.should_delete = should_delete

View File

@ -0,0 +1,97 @@
"""Provides tools for reading from and writing to java properties files.
"""
from typing import Dict, Union, IO
from jproperties import Properties
import os
# The default extension for property files in autopsy repo
DEFAULT_PROPS_EXTENSION = 'properties-MERGED'
def get_lang_bundle_name(language: str) -> str:
"""
Returns the bundle name for the specific language identifier provided.
Args:
language: The language identifier (i.e. 'ja' for Japanese)
Returns:
The bundle name
"""
return 'Bundle_{lang}.properties'.format(lang=language)
def get_entry_dict(file_contents: Union[str, IO]) -> Dict[str, str]:
"""Retrieves a dictionary mapping the properties represented in the string.
Args:
file_contents: The string of the properties file or the file handle.
Returns:
Dict[str,str]: The mapping of keys to values in that properties file.
"""
props = Properties()
try:
props.load(file_contents)
except Exception as e:
raise Exception("There was an error loading properties file {file}".format(file=file_contents), e)
return props.properties
def get_entry_dict_from_path(props_path: str) -> Union[Dict[str, str], None]:
"""
Retrieves a dictionary mapping the properties represented in the string or None if no properties file can be found
at that path.
Args:
props_path: The path to the properties file.
Returns: The entry dictionary for that properties file.
"""
if os.path.isfile(props_path):
with open(props_path, "rb") as f:
return get_entry_dict(f)
else:
return None
def set_entry_dict(contents: Dict[str, str], file_path: str):
"""Sets the property file to the key-value pairs of the contents dictionary.
Args:
contents (Dict[str, str]): The dictionary whose contents will be the key value pairs of the properties file.
file_path (str): The path to the properties file to create.
"""
props = Properties()
for key, val in contents.items():
props[key] = val
parent_dir, file = os.path.split(file_path)
if not os.path.exists(parent_dir):
os.makedirs(parent_dir)
with open(file_path, "wb") as f:
props.store(f)
def update_entry_dict(contents: Dict[str, str], file_path: str):
"""Updates the properties file at the given location with the key-value properties of contents.
Creates a new properties file at given path if none exists.
Args:
contents (Dict[str, str]): The dictionary whose contents will be the key value pairs of the properties file.
file_path (str): The path to the properties file to create.
"""
contents_to_edit = contents.copy()
cur_dict = get_entry_dict_from_path(file_path)
if cur_dict is None:
cur_dict = {}
for cur_key, cur_val in cur_dict.items():
# only update contents if contents does not already have key
if cur_key not in contents_to_edit:
contents_to_edit[cur_key] = cur_val
set_entry_dict(contents_to_edit, file_path)

View File

@ -0,0 +1 @@
output

View File

@ -0,0 +1,41 @@
import codecs
import os
import unittest
from typing import TypeVar, List
from csvutil import records_to_csv, csv_to_records
from test.unittestutil import get_output_path
class CsvUtilTest(unittest.TestCase):
T = TypeVar('T')
def assert_equal_arr(self, a: List[T], b: List[T]):
self.assertEqual(len(a), len(b), 'arrays are not equal length')
for i in range(0, len(a)):
if isinstance(a[i], list) and isinstance(b[i], list):
self.assert_equal_arr(a[i], b[i])
else:
self.assertEqual(a[i], b[i], "Items: {0} and {1} at index {2} are not equal.".format(a[i], b[i], i))
def test_read_write(self):
data = [['header1', 'header2', 'header3', 'additional header'],
['data1', 'data2', 'data3'],
['', 'data2-1', 'data2-2']]
os.makedirs(get_output_path(), exist_ok=True)
test_path = get_output_path('test.csv')
records_to_csv(test_path, data)
byte_inf = min(32, os.path.getsize(test_path))
with open(test_path, 'rb') as bom_test_file:
raw = bom_test_file.read(byte_inf)
if not raw.startswith(codecs.BOM_UTF8):
self.fail("written csv does not have appropriate BOM")
read_records_no_header, no_header = csv_to_records(test_path, header_row=False)
self.assert_equal_arr(read_records_no_header, data)
read_rows, header = csv_to_records(test_path, header_row=True)
self.assert_equal_arr(header, data[0])
self.assert_equal_arr(read_rows, [data[1], data[2]])

View File

@ -0,0 +1,52 @@
import os
import unittest
from typing import Tuple
from pathlib import Path
from fileutil import get_path_pieces, get_new_path, get_filename_addition
def joined_paths(pieces: Tuple[str, str, str]) -> str:
return os.path.join(pieces[0], pieces[1] + '.' + pieces[2])
PATH_PIECES1 = ('/test/folder', 'filename', 'ext')
PATH_PIECES2 = ('/test.test2/folder.test2', 'filename.test', 'ext')
PATH_PIECES3 = ('/test.test2/folder.test2/folder', None, None)
PATH1 = joined_paths(PATH_PIECES1)
PATH2 = joined_paths(PATH_PIECES2)
PATH3 = PATH_PIECES3[0]
ALL_ITEMS = [
(PATH_PIECES1, PATH1),
(PATH_PIECES2, PATH2),
(PATH_PIECES3, PATH3)
]
class FileUtilTest(unittest.TestCase):
def test_get_path_pieces(self):
for (expected_path, expected_filename, expected_ext), path in ALL_ITEMS:
path, filename, ext = get_path_pieces(path)
self.assertEqual(path, str(Path(expected_path)))
self.assertEqual(filename, expected_filename)
self.assertEqual(ext, expected_ext)
def test_get_new_path(self):
for (expected_path, expected_filename, expected_ext), path in ALL_ITEMS:
new_name = "newname.file"
new_path = get_new_path(path, new_name)
self.assertEqual(new_path, str(Path(expected_path) / Path(new_name)))
def test_get_filename_addition(self):
for (expected_path, expected_filename, expected_ext), path in ALL_ITEMS:
addition = "addition"
new_path = get_filename_addition(path, addition)
if expected_filename is None or expected_ext is None:
expected_file_path = Path(expected_path + addition)
else:
expected_file_path = Path(expected_path) / Path("{file_name}{addition}.{extension}".format(
file_name=expected_filename, addition=addition, extension=expected_ext))
self.assertEqual(
new_path, str(expected_file_path))

View File

@ -0,0 +1,96 @@
import unittest
from typing import Dict
from itemchange import get_changed, ChangeType
def dict_to_prop_str(this_dict: Dict[str, str]) -> str:
toret = ''
for key, val in this_dict.items():
toret += "{key}={value}\n".format(key=key, value=val)
return toret
class ItemChangeTest(unittest.TestCase):
def test_get_changed(self):
deleted_key = 'deleted.property.key'
deleted_val = 'will be deleted'
change_key = 'change.property.key'
change_val_a = 'original value'
change_val_b = 'new value'
change_key2 = 'change2.property.key'
change_val2_a = 'original value 2'
change_val2_b = ''
change_key3 = 'change3.property.key'
change_val3_a = ''
change_val3_b = 'cur value 3'
addition_key = 'addition.property.key'
addition_new_val = 'the added value'
same_key = 'samevalue.property.key'
same_value = 'the same value'
same_key2 = 'samevalue2.property.key'
same_value2 = ''
a_dict = {
deleted_key: deleted_val,
change_key: change_val_a,
change_key2: change_val2_a,
change_key3: change_val3_a,
same_key: same_value,
same_key2: same_value2
}
b_dict = {
change_key: change_val_b,
change_key2: change_val2_b,
change_key3: change_val3_b,
addition_key: addition_new_val,
same_key: same_value,
same_key2: same_value2
}
a_str = dict_to_prop_str(a_dict)
b_str = dict_to_prop_str(b_dict)
rel_path = 'my/rel/path.properties'
key_to_change = {}
for item_change in get_changed(rel_path, a_str, b_str):
self.assertEqual(item_change.rel_path, rel_path)
key_to_change[item_change.key] = item_change
deleted_item = key_to_change[deleted_key]
self.assertEqual(deleted_item.type, ChangeType.DELETION)
self.assertEqual(deleted_item.prev_val, deleted_val)
self.assertEqual(deleted_item.cur_val, None)
addition_item = key_to_change[addition_key]
self.assertEqual(addition_item.type, ChangeType.ADDITION)
self.assertEqual(addition_item.prev_val, None)
self.assertEqual(addition_item.cur_val, addition_new_val)
change_item = key_to_change[change_key]
self.assertEqual(change_item.type, ChangeType.CHANGE)
self.assertEqual(change_item.prev_val, change_val_a)
self.assertEqual(change_item.cur_val, change_val_b)
change_item2 = key_to_change[change_key2]
self.assertEqual(change_item2.type, ChangeType.CHANGE)
self.assertEqual(change_item2.prev_val, change_val2_a)
self.assertEqual(change_item2.cur_val, change_val2_b)
change_item3 = key_to_change[change_key3]
self.assertEqual(change_item3.type, ChangeType.CHANGE)
self.assertEqual(change_item3.prev_val, change_val3_a)
self.assertEqual(change_item3.cur_val, change_val3_b)
self.assertTrue(same_key not in key_to_change)
self.assertTrue(same_key2 not in key_to_change)

View File

@ -0,0 +1,36 @@
import os
import unittest
from propsutil import set_entry_dict, get_entry_dict_from_path, update_entry_dict
from test.unittestutil import get_output_path
class PropsUtilTest(unittest.TestCase):
def test_update_entry_dict(self):
orig_key = 'orig_key'
orig_val = 'orig_val 片仮名 '
to_be_altered_key = 'tobealteredkey'
first_val = 'not yet altered sábado'
second_val = 'altered Stöcke'
orig_props = {
orig_key: orig_val,
to_be_altered_key: first_val
}
update_props = {
to_be_altered_key: second_val
}
os.makedirs(get_output_path(), exist_ok=True)
test_path = get_output_path('test.props')
set_entry_dict(orig_props, test_path)
orig_read_props = get_entry_dict_from_path(test_path)
self.assertEqual(orig_read_props[orig_key], orig_val)
self.assertEqual(orig_read_props[to_be_altered_key], first_val)
update_entry_dict(update_props, test_path)
updated_read_props = get_entry_dict_from_path(test_path)
self.assertEqual(updated_read_props[orig_key], orig_val)
self.assertEqual(updated_read_props[to_be_altered_key], second_val)

View File

@ -0,0 +1,14 @@
import os
from typing import Union
from envutil import get_proj_dir
TEST_ARTIFACT_FOLDER = 'artifacts'
TEST_OUTPUT_FOLDER = 'output'
def get_output_path(filename: Union[str, None] = None) -> str:
if filename is None:
return os.path.join(get_proj_dir(__file__), TEST_ARTIFACT_FOLDER, TEST_OUTPUT_FOLDER)
else:
return os.path.join(get_proj_dir(__file__), TEST_ARTIFACT_FOLDER, TEST_OUTPUT_FOLDER, filename)

View File

@ -0,0 +1,259 @@
"""This script finds all '.properties-MERGED' files and writes relative path, key, and value to a CSV file.
This script requires the python libraries: jproperties. It also requires Python 3.x.
"""
from typing import List, Dict, Tuple, Callable, Iterator
import sys
import os
from envutil import get_proj_dir
from fileutil import get_new_path
from gitutil import get_git_root
from langpropsutil import set_commit_for_language
from propsutil import set_entry_dict, get_entry_dict_from_path, get_lang_bundle_name
from csvutil import csv_to_records
from propentry import PropEntry
import argparse
def write_prop_entries(entries: Iterator[PropEntry], repo_path: str):
"""Writes property entry items to their expected relative path within the repo path.
Previously existing files will be overwritten and prop entries marked as should_be_deleted will
not be included.
Args:
entries (List[PropEntry]): the prop entry items to write to disk.
repo_path (str): The path to the git repo.
"""
items_by_file = get_by_file(entries)
for rel_path, (entries, ignored) in items_by_file.items():
abs_path = os.path.join(repo_path, rel_path)
set_entry_dict(entries, abs_path)
def update_prop_entries(entries: Iterator[PropEntry], repo_path: str):
"""Updates property entry items to their expected relative path within the repo path. The union of
entries provided and any previously existing entries will be created. Keys marked for deletion will be
removed from the generated property files.
Args:
entries (List[PropEntry]): the prop entry items to write to disk.
repo_path (str): The path to the git repo.
"""
items_by_file = get_by_file(entries)
for rel_path, (entries, to_delete) in items_by_file.items():
abs_path = os.path.join(repo_path, rel_path)
prop_items = get_entry_dict_from_path(abs_path)
if prop_items is None:
prop_items = {}
for key_to_delete in to_delete:
if key_to_delete in prop_items:
del prop_items[key_to_delete]
for key, val in entries.items():
prop_items[key] = val
set_entry_dict(prop_items, abs_path)
def get_by_file(entries: Iterator[PropEntry]) -> Dict[str, Tuple[Dict[str, str], List[str]]]:
"""Sorts a prop entry list by file. The return type is a dictionary mapping
the file path to a tuple containing the key-value pairs to be updated and a
list of keys to be deleted.
Args:
entries (List[PropEntry]): The entries to be sorted.
Returns:
Dict[str, Tuple[Dict[str,str], List[str]]]: A dictionary mapping
the file path to a tuple containing the key-value pairs to be updated and a
list of keys to be deleted.
"""
to_ret = {}
for prop_entry in entries:
rel_path = prop_entry.rel_path
key = prop_entry.key
value = prop_entry.value
if rel_path not in to_ret:
to_ret[rel_path] = ({}, [])
if prop_entry.should_delete:
to_ret[rel_path][1].append(prop_entry.key)
else:
to_ret[rel_path][0][key] = value
return to_ret
def idx_bounded(num: int, max_exclusive: int) -> bool:
return 0 <= num < max_exclusive
def get_prop_entry(row: List[str],
path_idx: int = 0,
key_idx: int = 1,
value_idx: int = 2,
should_delete_converter: Callable[[List[str]], bool] = None,
path_converter: Callable[[str], str] = None) -> PropEntry:
"""Parses a PropEntry object from a row of values in a csv.
Args:
row (List[str]): The csv file row to parse.
path_idx (int, optional): The column index for the relative path of the properties file. Defaults to 0.
key_idx (int, optional): The column index for the properties key. Defaults to 1.
value_idx (int, optional): The column index for the properties value. Defaults to 2.
should_delete_converter (Callable[[List[str]], bool], optional): If not None, this determines if the key should
be deleted from the row values. Defaults to None.
path_converter (Callable[[str], str], optional): If not None, this determines the relative path to use in the
created PropEntry given the original relative path. Defaults to None.
Returns:
PropEntry: The generated prop entry object.
"""
path = row[path_idx] if idx_bounded(path_idx, len(row)) else None
if path_converter is not None:
path = path_converter(path)
key = row[key_idx] if idx_bounded(key_idx, len(row)) else None
value = row[value_idx] if idx_bounded(value_idx, len(row)) else None
should_delete = False if should_delete_converter is None else should_delete_converter(row)
return PropEntry(path, key, value, should_delete)
def get_prop_entries(rows: List[List[str]],
path_idx: int = 0,
key_idx: int = 1,
value_idx: int = 2,
should_delete_converter: Callable[[List[str]], bool] = None,
path_converter: Callable[[str], str] = None) -> Iterator[PropEntry]:
"""Parses PropEntry objects from rows of values in a csv.
Args:
rows (List[List[str]]): The csv file rows to parse.
path_idx (int, optional): The column index for the relative path of the properties file. Defaults to 0.
key_idx (int, optional): The column index for the properties key. Defaults to 1.
value_idx (int, optional): The column index for the properties value. Defaults to 2.
should_delete_converter (Callable[[List[str]], bool], optional): If not None, this determines if the key should
be deleted from the row values. Defaults to None.
path_converter (Callable[[str], str], optional): If not None, this determines the relative path to use in the
created PropEntry given the original relative path. Defaults to None.
Returns:
List[PropEntry]: The generated prop entry objects.
"""
return map(lambda row: get_prop_entry(
row, path_idx, key_idx, value_idx, should_delete_converter, path_converter),
rows)
def get_should_deleted(row_items: List[str], requested_idx: int) -> bool:
"""If there is a value at row_items[requested_idx] and that value starts with 'DELET', then this will return true.
Args:
row_items (List[str]): The row items.
requested_idx (int): The index specifying if the property should be deleted.
Returns:
bool: True if the row specifies it should be deleted.
"""
if idx_bounded(requested_idx, len(row_items)) and row_items[requested_idx].strip().upper().startswith('DELET'):
return True
else:
return False
def main():
# noinspection PyTypeChecker
parser = argparse.ArgumentParser(description='Updates properties files in the autopsy git repo.',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument(dest='csv_file', type=str, help='The path to the csv file. The default format for the csv '
'file has columns of relative path, properties file key, '
'properties file value, whether or not the key should be '
'deleted, and commit id for how recent these updates are. '
'If the key should be deleted, the deletion row should be '
'\'DELETION.\' A header row is expected by default and the '
'commit id, if specified, should only be in the first row. The'
' input path should be specified as a relative path with the '
'dot slash notation (i.e. `./inputpath.csv`) or an absolute '
'path.')
parser.add_argument('-r', '--repo', dest='repo_path', type=str, required=False,
help='The path to the repo. If not specified, parent repo of path of script is used.')
parser.add_argument('-p', '--path-idx', dest='path_idx', action='store', type=int, default=0, required=False,
help='The column index in the csv file providing the relative path to the properties file.')
parser.add_argument('-k', '--key-idx', dest='key_idx', action='store', type=int, default=1, required=False,
help='The column index in the csv file providing the key within the properties file.')
parser.add_argument('-v', '--value-idx', dest='value_idx', action='store', type=int, default=2, required=False,
help='The column index in the csv file providing the value within the properties file.')
parser.add_argument('-d', '--should-delete-idx', dest='should_delete_idx', action='store', type=int, default=3,
required=False, help='The column index in the csv file providing whether or not the file '
'should be deleted. Any non-blank content will be treated as True.')
parser.add_argument('-c', '--commit-idx', dest='latest_commit_idx', action='store', type=int, default=4,
required=False, help='The column index in the csv file providing the commit for which this '
'update applies. The commit should be located in the header row. ')
parser.add_argument('-f', '--file-rename', dest='file_rename', action='store', type=str, default=None,
required=False, help='If specified, the properties file will be renamed to the argument'
' preserving the specified relative path.')
parser.add_argument('-z', '--has-no-header', dest='has_no_header', action='store_true', default=False,
required=False, help='Specify whether or not there is a header within the csv file.')
parser.add_argument('-o', '--should-overwrite', dest='should_overwrite', action='store_true', default=False,
required=False, help="Whether or not to overwrite the previously existing properties files"
" ignoring previously existing values.")
parser.add_argument('-l', '--language', dest='language', type=str, default='HEAD', required=False,
help='Specify the language in order to update the last updated properties file and rename '
'files within directories. This flag overrides the file-rename flag.')
args = parser.parse_args()
repo_path = args.repo_path if args.repo_path is not None else get_git_root(get_proj_dir())
input_path = args.csv_file
path_idx = args.path_idx
key_idx = args.key_idx
value_idx = args.value_idx
has_header = not args.has_no_header
overwrite = args.should_overwrite
# means of determining if a key should be deleted from a file
if args.should_delete_idx is None:
should_delete_converter = None
else:
def should_delete_converter(row_items: List[str]):
return get_should_deleted(row_items, args.should_delete_idx)
# provides the means of renaming the bundle file
if args.language is not None:
def path_converter(orig_path: str):
return get_new_path(orig_path, get_lang_bundle_name(args.language))
elif args.file_rename is not None:
def path_converter(orig_path: str):
return get_new_path(orig_path, args.file_rename)
else:
path_converter = None
# retrieve records from csv
all_items, header = list(csv_to_records(input_path, has_header))
prop_entries = get_prop_entries(all_items, path_idx, key_idx, value_idx, should_delete_converter, path_converter)
# write to files
if overwrite:
write_prop_entries(prop_entries, repo_path)
else:
update_prop_entries(prop_entries, repo_path)
# update the language last update if applicable
if args.language is not None and header is not None and len(header) > args.latest_commit_idx >= 0:
set_commit_for_language(args.language, header[args.latest_commit_idx])
sys.exit(0)
if __name__ == "__main__":
main()