Merge pull request #6694 from markmckinnon/7242-aLEAPP/iLEAPP-parsing/sanitization-of-tsv-data-to-match-TSK-expectations2

7242-aLEAPP/iLEAPP-parsing/sanitization-of-tsv-data-to-match-TSK-expectations
This commit is contained in:
Richard Cordovano 2021-02-04 13:55:29 -05:00 committed by GitHub
commit dad94cabe2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 22 additions and 3 deletions

View File

@ -27,6 +27,7 @@ import java.util.Set;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.validator.routines.DomainValidator;
import org.apache.commons.validator.routines.EmailValidator;
import org.sleuthkit.autopsy.coreutils.NetworkUtils;
/**
* Provides functions for normalizing data by attribute type before insertion or
@ -144,11 +145,11 @@ final public class CorrelationAttributeNormalizer {
private static String normalizeDomain(String data) throws CorrelationAttributeNormalizationException {
DomainValidator validator = DomainValidator.getInstance(true);
if (validator.isValid(data)) {
return data.toLowerCase();
return NetworkUtils.extractDomain(data.toLowerCase());
} else {
final String validIpAddressRegex = "^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$";
if (data.matches(validIpAddressRegex)) {
return data;
return NetworkUtils.extractDomain(data);
} else {
throw new CorrelationAttributeNormalizationException(String.format("Data was expected to be a valid domain: %s", data));
}

View File

@ -59,6 +59,7 @@ import org.sleuthkit.autopsy.casemodule.Case;
import static org.sleuthkit.autopsy.casemodule.Case.getCurrentCase;
import org.sleuthkit.autopsy.casemodule.NoCurrentCaseException;
import org.sleuthkit.autopsy.casemodule.services.FileManager;
import org.sleuthkit.autopsy.coreutils.NetworkUtils;
import org.sleuthkit.autopsy.coreutils.Logger;
import org.sleuthkit.autopsy.coreutils.PlatformUtil;
import org.sleuthkit.autopsy.ingest.IngestModule.IngestModuleException;
@ -379,7 +380,9 @@ public final class LeappFileProcessor {
return Collections.emptyList();
}
BlackboardAttribute attr = (value == null) ? null : getAttribute(colAttr.getAttributeType(), value, fileName);
String formattedValue = formatValueBasedOnAttrType(colAttr, value);
BlackboardAttribute attr = (value == null) ? null : getAttribute(colAttr.getAttributeType(), formattedValue, fileName);
if (attr == null) {
logger.log(Level.WARNING, String.format("Blackboard attribute could not be parsed column %s at line %d in file %s. Omitting row.", colAttr.getColumnName(), lineNum, fileName));
return Collections.emptyList();
@ -394,6 +397,21 @@ public final class LeappFileProcessor {
return attrsToRet;
}
/**
* Check type of attribute and possibly format string based on it.
*
* @param colAttr Column Attribute information
* @param value string to be formatted
* @return formatted string based on attribute type if no attribute type found then return original string
*/
private String formatValueBasedOnAttrType(TsvColumn colAttr, String value) {
if (colAttr.getAttributeType().getTypeName().equals("TSK_DOMAIN")) {
return NetworkUtils.extractDomain(value);
}
return value;
}
/**
* The format of time stamps in tsv.
*/