Merge pull request #6642 from gdicristofaro/7215-externalTsvLib

7215 external tsv lib
This commit is contained in:
Richard Cordovano 2021-01-21 14:53:19 -05:00 committed by GitHub
commit 81c09eae68
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 173 additions and 209 deletions

View File

@ -28,6 +28,8 @@
<dependency conf="core->default" org="org.jsoup" name="jsoup" rev="1.10.3"/>
<dependency conf="core->default" org="com.fasterxml.jackson.core" name="jackson-databind" rev="2.9.7"/>
<dependency org="com.fasterxml.jackson.dataformat" name="jackson-dataformat-csv" rev="2.9.7"/>
<dependency conf="core->default" org="com.drewnoakes" name="metadata-extractor" rev="2.11.0"/>
<dependency conf="core->default" org="com.google.cloud" name="google-cloud-translate" rev="1.70.0"/>

View File

@ -68,6 +68,7 @@ file.reference.jackcess-encrypt-2.1.4.jar=release\\modules\\ext\\jackcess-encryp
file.reference.jackson-annotations-2.9.0.jar=release\\modules\\ext\\jackson-annotations-2.9.0.jar
file.reference.jackson-core-2.9.7.jar=release\\modules\\ext\\jackson-core-2.9.7.jar
file.reference.jackson-databind-2.9.7.jar=release\\modules\\ext\\jackson-databind-2.9.7.jar
file.reference.jackson-dataformat-csv-2.9.7.jar=release\\modules\\ext\\jackson-dataformat-csv-2.9.7.jar
file.reference.jai_core-1.1.3.jar=release\\modules\\ext\\jai_core-1.1.3.jar
file.reference.jai_imageio-1.1.jar=release\\modules\\ext\\jai_imageio-1.1.jar
file.reference.javax.annotation-api-1.3.2.jar=release\\modules\\ext\\javax.annotation-api-1.3.2.jar

View File

@ -393,6 +393,10 @@
<runtime-relative-path>ext/jackson-databind-2.9.7.jar</runtime-relative-path>
<binary-origin>release\modules\ext\jackson-databind-2.9.7.jar</binary-origin>
</class-path-extension>
<class-path-extension>
<runtime-relative-path>ext/jackson-dataformat-csv-2.9.7.jar</runtime-relative-path>
<binary-origin>release\modules\ext\jackson-dataformat-csv-2.9.7.jar</binary-origin>
</class-path-extension>
<class-path-extension>
<runtime-relative-path>ext/okhttp-2.7.5.jar</runtime-relative-path>
<binary-origin>release\modules\ext\okhttp-2.7.5.jar</binary-origin>

View File

@ -18,15 +18,19 @@
*/
package org.sleuthkit.autopsy.modules.leappanalyzers;
import com.fasterxml.jackson.databind.MappingIterator;
import com.fasterxml.jackson.dataformat.csv.CsvMapper;
import com.fasterxml.jackson.dataformat.csv.CsvParser;
import com.fasterxml.jackson.dataformat.csv.CsvSchema;
import com.google.common.collect.ImmutableMap;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.List;
@ -34,7 +38,6 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import static java.util.Locale.US;
@ -47,8 +50,10 @@ import java.util.stream.Stream;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.collections4.MapUtils;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.lang3.StringUtils;
import org.openide.util.NbBundle;
import org.sleuthkit.autopsy.casemodule.Case;
import static org.sleuthkit.autopsy.casemodule.Case.getCurrentCase;
@ -81,30 +86,30 @@ public final class LeappFileProcessor {
*/
private static class TsvColumn {
private final String attributeName;
private final BlackboardAttribute.Type attributeType;
private final String columnName;
private final boolean required;
/**
* Main constructor.
*
* @param attributeName The BlackboardAttribute name or null if not
* used.
* @param attributeType The BlackboardAttribute type or null if not
* used. used.
* @param columnName The name of the column in the tsv file.
* @param required Whether or not this attribute is required to be
* present.
*/
TsvColumn(String attributeName, String columnName, boolean required) {
this.attributeName = attributeName;
TsvColumn(BlackboardAttribute.Type attributeType, String columnName, boolean required) {
this.attributeType = attributeType;
this.columnName = columnName;
this.required = required;
}
/**
* @return The BlackboardAttribute name or null if not used.
* @return The BlackboardAttribute type or null if not used.
*/
String getAttributeName() {
return attributeName;
BlackboardAttribute.Type getAttributeType() {
return attributeType;
}
/**
@ -127,7 +132,7 @@ public final class LeappFileProcessor {
private final String moduleName;
private final Map<String, String> tsvFiles;
private final Map<String, String> tsvFileArtifacts;
private final Map<String, BlackboardArtifact.Type> tsvFileArtifacts;
private final Map<String, String> tsvFileArtifactComments;
private final Map<String, List<TsvColumn>> tsvFileAttributes;
@ -230,17 +235,15 @@ public final class LeappFileProcessor {
String fileName = FilenameUtils.getName(LeappFileName);
File LeappFile = new File(LeappFileName);
if (tsvFileAttributes.containsKey(fileName)) {
List<TsvColumn> attrList = tsvFileAttributes.get(fileName);
BlackboardArtifact.Type artifactType = null;
try {
BlackboardArtifact.Type artifactType = Case.getCurrentCase().getSleuthkitCase().getArtifactType(tsvFileArtifacts.get(fileName));
List<TsvColumn> attrList = tsvFileAttributes.get(fileName);
artifactType = tsvFileArtifacts.get(fileName);
processFile(LeappFile, attrList, fileName, artifactType, bbartifacts, LeappImageFile);
} catch (TskCoreException ex) {
throw new IngestModuleException(String.format("Error getting Blackboard Artifact Type for %s", tsvFileArtifacts.get(fileName)), ex);
throw new IngestModuleException(String.format("Error getting Blackboard Artifact Type for %s", artifactType == null ? "<null>" : artifactType.toString()), ex);
}
}
}
if (!bbartifacts.isEmpty()) {
@ -266,16 +269,7 @@ public final class LeappFileProcessor {
File LeappFile = new File(LeappFileName);
if (tsvFileAttributes.containsKey(fileName)) {
List<TsvColumn> attrList = tsvFileAttributes.get(fileName);
BlackboardArtifact.Type artifactType = null;
try {
artifactType = Case.getCurrentCase().getSleuthkitCase().getArtifactType(tsvFileArtifacts.get(fileName));
} catch (TskCoreException ex) {
logger.log(Level.SEVERE, String.format("Error getting Blackboard Artifact Type for %s", tsvFileArtifacts.get(fileName)), ex);
}
if (artifactType == null) {
continue;
}
BlackboardArtifact.Type artifactType = tsvFileArtifacts.get(fileName);
try {
processFile(LeappFile, attrList, fileName, artifactType, bbartifacts, dataSource);
@ -304,14 +298,25 @@ public final class LeappFileProcessor {
return;
}
try (BufferedReader reader = new BufferedReader(new FileReader(LeappFile))) {
String header = reader.readLine();
// Check first line, if it is null then no heading so nothing to match to, close and go to next file.
if (header != null) {
Map<Integer, String> columnNumberToProcess = findColumnsToProcess(fileName, header, attrList);
String line = reader.readLine();
while (line != null) {
Collection<BlackboardAttribute> bbattributes = processReadLine(line, columnNumberToProcess, fileName);
// based on https://stackoverflow.com/questions/56921465/jackson-csv-schema-for-array
try (MappingIterator<List<String>> iterator = new CsvMapper()
.enable(CsvParser.Feature.WRAP_AS_ARRAY)
.readerFor(List.class)
.with(CsvSchema.emptySchema().withColumnSeparator('\t'))
.readValues(LeappFile)) {
if (iterator.hasNext()) {
List<String> headerItems = iterator.next();
Map<String, Integer> columnIndexes = IntStream.range(0, headerItems.size())
.mapToObj(idx -> idx)
.collect(Collectors.toMap(
idx -> headerItems.get(idx) == null ? null : headerItems.get(idx).trim().toLowerCase(),
idx -> idx,
(val1, val2) -> val1));
int lineNum = 1;
while (iterator.hasNext()) {
Collection<BlackboardAttribute> bbattributes = processReadLine(iterator.next(), columnIndexes, attrList, fileName, lineNum++);
if (!bbattributes.isEmpty()) {
BlackboardArtifact bbartifact = createArtifactWithAttributes(artifactType.getTypeID(), dataSource, bbattributes);
@ -319,178 +324,154 @@ public final class LeappFileProcessor {
bbartifacts.add(bbartifact);
}
}
line = reader.readLine();
}
}
}
}
/**
* Process the line read and create the necessary attributes for it
* Process the line read and create the necessary attributes for it.
*
* @param line a tsv line to process that was read
* @param columnNumberToProcess Which columns to process in the tsv line
* @param fileName name of file begin processed
*
* @return
* @param lineValues List of column values.
* @param columnIndexes Mapping of column headers (trimmed; to lower case)
* to column index.
* @param attrList The list of attributes as specified for the schema of
* this file.
* @param fileName The name of the file being processed.
* @param lineNum The line number in the file.
* @return The collection of blackboard attributes for the artifact created
* from this line.
* @throws IngestModuleException
*/
private Collection<BlackboardAttribute> processReadLine(String line, Map<Integer, String> columnNumberToProcess, String fileName) throws IngestModuleException {
if (MapUtils.isEmpty(columnNumberToProcess)) {
return Collections.emptyList();
} else if (line == null) {
logger.log(Level.WARNING, "Line is null. Returning empty list for attributes.");
private Collection<BlackboardAttribute> processReadLine(List<String> lineValues, Map<String, Integer> columnIndexes,
List<TsvColumn> attrList, String fileName, int lineNum) throws IngestModuleException {
if (MapUtils.isEmpty(columnIndexes) || CollectionUtils.isEmpty(lineValues)
|| (lineValues.size() == 1 && StringUtils.isEmpty(lineValues.get(0)))) {
return Collections.emptyList();
}
String[] columnValues;
List<BlackboardAttribute> attrsToRet = new ArrayList<>();
for (TsvColumn colAttr : attrList) {
if (colAttr.getAttributeType() == null) {
continue;
}
// Check to see if the 2 values are equal, they may not be equal if there is no corresponding data in the line.
// or if the size of the line to split is not equal to the column numbers we are looking to process. This
// can happen when the last value of the tsv line has no data in it.
// If this happens then adding an empty value(s) for each columnValue where data does not exist
Integer maxColumnNumber = Collections.max(columnNumberToProcess.keySet());
if ((maxColumnNumber > line.split("\\t").length) || (columnNumberToProcess.size() > line.split("\\t").length)) {
columnValues = Arrays.copyOf(line.split("\\t"), maxColumnNumber + 1);
} else {
columnValues = line.split("\\t");
}
Integer columnIdx = columnIndexes.get(colAttr.getColumnName());
String value = (columnIdx == null || columnIdx >= lineValues.size() || columnIdx < 0) ? null : lineValues.get(columnIdx);
if (value == null) {
logger.log(Level.WARNING, String.format("No value found for column %s at line %d in file %s.", colAttr.getColumnName(), lineNum, fileName));
continue;
}
Collection<BlackboardAttribute> bbattributes = new ArrayList<BlackboardAttribute>();
for (Map.Entry<Integer, String> columnToProcess : columnNumberToProcess.entrySet()) {
Integer columnNumber = columnToProcess.getKey();
String attributeName = columnToProcess.getValue();
if (columnValues[columnNumber] != null) {
try {
BlackboardAttribute.Type attributeType = Case.getCurrentCase().getSleuthkitCase().getAttributeType(attributeName.toUpperCase());
if (attributeType == null) {
continue;
}
String attrType = attributeType.getValueType().getLabel().toUpperCase();
checkAttributeType(bbattributes, attrType, columnValues, columnNumber, attributeType, fileName);
} catch (TskCoreException ex) {
throw new IngestModuleException(String.format("Error getting Attribute type for Attribute Name %s", attributeName), ex); //NON-NLS
}
BlackboardAttribute attr = (value == null) ? null : getAttribute(colAttr.getAttributeType(), value, fileName);
if (attr != null) {
attrsToRet.add(attr);
}
}
if (tsvFileArtifactComments.containsKey(fileName)) {
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_COMMENT, moduleName, tsvFileArtifactComments.get(fileName)));
}
return bbattributes;
}
private void checkAttributeType(Collection<BlackboardAttribute> bbattributes, String attrType, String[] columnValues, int columnNumber, BlackboardAttribute.Type attributeType,
String fileName) {
if (columnValues == null || columnNumber < 0 || columnNumber > columnValues.length || columnValues[columnNumber] == null) {
logger.log(Level.WARNING, String.format("Unable to determine column value at index %d in columnValues: %s",
columnNumber,
columnValues == null ? "<null>" : "[" + String.join(", ", columnValues) + "]"));
return;
}
String columnValue = columnValues[columnNumber];
if (attrType.matches("STRING")) {
bbattributes.add(new BlackboardAttribute(attributeType, moduleName, columnValue));
} else if (attrType.matches("INTEGER")) {
try {
// parse as double to handle values of format like '21.0' and then convert to int
bbattributes.add(new BlackboardAttribute(attributeType, moduleName, Double.valueOf(columnValue).intValue()));
} catch (NumberFormatException ex) {
logger.log(Level.WARNING, String.format("Unable to format %s as an integer.", columnValue), ex);
}
} else if (attrType.matches("LONG")) {
try {
// parse as double to handle values of format like '21.0' and then convert to long
bbattributes.add(new BlackboardAttribute(attributeType, moduleName, Double.valueOf(columnValue).longValue()));
} catch (NumberFormatException ex) {
logger.log(Level.WARNING, String.format("Unable to format %s as an long.", columnValue), ex);
}
} else if (attrType.matches("DOUBLE")) {
try {
bbattributes.add(new BlackboardAttribute(attributeType, moduleName, Double.valueOf(columnValue)));
} catch (NumberFormatException ex) {
logger.log(Level.WARNING, String.format("Unable to format %s as an double.", columnValue), ex);
}
} else if (attrType.matches("BYTE")) {
try {
bbattributes.add(new BlackboardAttribute(attributeType, moduleName, Byte.valueOf(columnValue)));
} catch (NumberFormatException ex) {
logger.log(Level.WARNING, String.format("Unable to format %s as an byte.", columnValue), ex);
}
} else if (attrType.matches("DATETIME")) {
// format of data should be the same in all the data and the format is 2020-03-28 01:00:17
SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-d HH:mm:ss", US);
Long dateLong = Long.valueOf(0);
try {
Date newDate = dateFormat.parse(columnValue);
dateLong = newDate.getTime() / 1000;
bbattributes.add(new BlackboardAttribute(attributeType, moduleName, dateLong));
} catch (ParseException ex) {
// catching error and displaying date that could not be parsed
// we set the timestamp to 0 and continue on processing
logger.log(Level.WARNING, String.format("Failed to parse date/time %s for attribute type %s in file %s.", columnValue, attributeType.getDisplayName(), fileName)); //NON-NLS
}
} else if (attrType.matches("JSON")) {
bbattributes.add(new BlackboardAttribute(attributeType, moduleName, columnValue));
} else {
// Log this and continue on with processing
logger.log(Level.WARNING, String.format("Attribute Type %s not defined.", attrType)); //NON-NLS
attrsToRet.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_COMMENT, moduleName, tsvFileArtifactComments.get(fileName)));
}
return attrsToRet;
}
/**
* Process the first line of the tsv file which has the headings. Match the
* headings to the columns in the XML mapping file so we know which columns
* to process.
*
* @param fileName The name of the file in which these column headers exist.
* @param line a tsv heading line of the columns in the file
* @param attrList the list of headings we want to process
*
* @return the numbered column(s) and attribute(s) we want to use for the
* column(s)
* The format of time stamps in tsv.
*/
private Map<Integer, String> findColumnsToProcess(String fileName, String line, List<TsvColumn> attrList) {
String[] columnNames = line.split("\\t");
HashMap<Integer, String> columnsToProcess = new HashMap<>();
private static final DateFormat TIMESTAMP_FORMAT = new SimpleDateFormat("yyyy-MM-d HH:mm:ss", US);
Integer columnPosition = 0;
for (String columnName : columnNames) {
// for some reason the first column of the line has unprintable characters so removing them
String cleanColumnName = columnName.trim().replaceAll("[^\\n\\r\\t\\p{Print}]", "");
for (TsvColumn tsvColumn : attrList) {
if (cleanColumnName.equalsIgnoreCase(tsvColumn.getColumnName())) {
columnsToProcess.put(columnPosition, tsvColumn.getAttributeName());
break;
}
}
columnPosition++;
/**
* Gets an appropriate attribute based on the attribute type and string
* value.
*
* @param attrType The attribute type.
* @param value The string value to be converted to the appropriate data
* type for the attribute type.
* @param fileName The file name that the value comes from.
* @return The generated blackboard attribute.
*/
private BlackboardAttribute getAttribute(BlackboardAttribute.Type attrType, String value, String fileName) {
if (attrType == null || value == null) {
logger.log(Level.WARNING, String.format("Unable to parse attribute type %s for value '%s' in fileName %s",
attrType == null ? "<null>" : attrType.toString(),
value == null ? "<null>" : value,
fileName == null ? "<null>" : fileName));
return null;
}
if (columnsToProcess.size() != attrList.size()) {
String missingColumns = IntStream.range(0, attrList.size())
.filter((idx) -> !columnsToProcess.containsKey(attrList.get(idx).getAttributeName()))
.mapToObj((idx) -> String.format("'%s'", attrList.get(idx).getColumnName() == null ? "<null>" : attrList.get(idx).getColumnName()))
.collect(Collectors.joining(", "));
switch (attrType.getValueType()) {
case JSON:
case STRING:
return parseAttrValue(value, attrType, fileName, false, false,
(v) -> new BlackboardAttribute(attrType, moduleName, v));
case INTEGER:
return parseAttrValue(value.trim(), attrType, fileName, true, false,
(v) -> new BlackboardAttribute(attrType, moduleName, Double.valueOf(v).intValue()));
case LONG:
return parseAttrValue(value.trim(), attrType, fileName, true, false,
(v) -> new BlackboardAttribute(attrType, moduleName, Double.valueOf(v).longValue()));
case DOUBLE:
return parseAttrValue(value.trim(), attrType, fileName, true, false,
(v) -> new BlackboardAttribute(attrType, moduleName, (double) Double.valueOf(v)));
case BYTE:
return parseAttrValue(value.trim(), attrType, fileName, true, false,
(v) -> new BlackboardAttribute(attrType, moduleName, new byte[]{Byte.valueOf(v)}));
case DATETIME:
return parseAttrValue(value.trim(), attrType, fileName, true, true,
(v) -> new BlackboardAttribute(attrType, moduleName, TIMESTAMP_FORMAT.parse(v).getTime() / 1000));
default:
// Log this and continue on with processing
logger.log(Level.WARNING, String.format("Attribute Type %s for file %s not defined.", attrType, fileName)); //NON-NLS
return null;
}
}
logger.log(Level.WARNING, String.format("Columns size expected not found in file %s based on xml from %s. Column Keys Missing = [%s]; Header Line = '%s'.",
this.xmlFile == null ? "<null>" : this.xmlFile,
fileName,
missingColumns,
line));
/**
* Handles converting a string to a blackboard attribute.
*/
private interface ParseExceptionFunction {
/**
* Handles converting a string value to a blackboard attribute.
*
* @param orig The original string value.
* @return The generated blackboard attribute.
* @throws ParseException
* @throws NumberFormatException
*/
BlackboardAttribute apply(String orig) throws ParseException, NumberFormatException;
}
/**
* Runs parsing function on string value to convert to right data type and
* generates a blackboard attribute for that converted data type.
*
* @param value The string value.
* @param attrType The blackboard attribute type.
* @param fileName The name of the file from which the value comes.
* @param blankIsNull If string is blank return null attribute.
* @param zeroIsNull If string is some version of 0, return null attribute.
* @param valueConverter The means of converting the string value to an
* appropriate blackboard attribute.
* @return The generated blackboard attribute or null if not determined.
*/
private BlackboardAttribute parseAttrValue(String value, BlackboardAttribute.Type attrType, String fileName, boolean blankIsNull, boolean zeroIsNull, ParseExceptionFunction valueConverter) {
if (blankIsNull && StringUtils.isBlank(value)) {
return null;
}
return columnsToProcess;
if (zeroIsNull && value.matches("^\\s*[0\\.]*\\s*$")) {
return null;
}
try {
return valueConverter.apply(value);
} catch (NumberFormatException | ParseException ex) {
logger.log(Level.WARNING, String.format("Unable to format '%s' as value type %s while converting to attributes from %s.", value, attrType.getValueType().getLabel(), fileName), ex);
return null;
}
}
@NbBundle.Messages({
@ -558,10 +539,10 @@ public final class LeappFileProcessor {
if (foundArtifactType == null) {
logger.log(Level.SEVERE, String.format("No known artifact mapping found for [artifact: %s, %s]",
artifactName, getXmlFileIdentifier(parentName)));
} else {
tsvFileArtifacts.put(parentName, foundArtifactType);
}
tsvFileArtifacts.put(parentName, artifactName);
if (!comment.toLowerCase().matches("null")) {
tsvFileArtifactComments.put(parentName, comment);
}
@ -618,8 +599,8 @@ public final class LeappFileProcessor {
}
TsvColumn thisCol = new TsvColumn(
attributeName.toLowerCase(),
columnName.toLowerCase(),
foundAttrType,
columnName.trim().toLowerCase(),
"yes".compareToIgnoreCase(required) == 0);
if (tsvFileAttributes.containsKey(parentName)) {
@ -636,29 +617,6 @@ public final class LeappFileProcessor {
}
}
/**
* Generic method for creating a blackboard artifact with attributes
*
* @param type is a blackboard.artifact_type enum to determine which type
* the artifact should be
* @param abstractFile is the AbstractFile object that needs to have the
* artifact added for it
* @param bbattributes is the collection of blackboard attributes that need
* to be added to the artifact after the artifact has been created
*
* @return The newly-created artifact, or null on error
*/
private BlackboardArtifact createArtifactWithAttributes(int type, AbstractFile abstractFile, Collection<BlackboardAttribute> bbattributes) {
try {
BlackboardArtifact bbart = abstractFile.newArtifact(type);
bbart.addAttributes(bbattributes);
return bbart;
} catch (TskException ex) {
logger.log(Level.WARNING, Bundle.LeappFileProcessor_error_creating_new_artifacts(), ex); //NON-NLS
}
return null;
}
/**
* Generic method for creating a blackboard artifact with attributes
*
@ -710,9 +668,8 @@ public final class LeappFileProcessor {
xmlFile, true);
}
private static final Set<String> ALLOWED_EXTENSIONS = new HashSet<>(Arrays.asList("zip", "tar", "tgz"));
private static final Set<String> ALLOWED_EXTENSIONS = new HashSet<>(Arrays.asList("zip", "tar", "tgz"));
/**
* Find the files that will be processed by the iLeapp program
*
@ -738,7 +695,7 @@ public final class LeappFileProcessor {
for (AbstractFile leappFile : leappFiles) {
if (((leappFile.getLocalAbsPath() != null)
&& !leappFile.isVirtual())
&& leappFile.getNameExtension() != null
&& leappFile.getNameExtension() != null
&& ALLOWED_EXTENSIONS.contains(leappFile.getNameExtension().toLowerCase())) {
leappFilesToProcess.add(leappFile);
}