Merge pull request #6572 from APriestman/7095_dateParsing

7095 Date parsing
This commit is contained in:
Richard Cordovano 2020-12-29 14:48:44 -05:00 committed by GitHub
commit b6ad811c3a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -24,15 +24,20 @@ import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Date;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.text.DateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Scanner;
import java.util.logging.Level;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.openide.modules.InstalledFileLocator;
import org.openide.util.NbBundle.Messages;
import org.sleuthkit.autopsy.casemodule.Case;
@ -99,7 +104,9 @@ final class ExtractEdge extends Extract {
private static final String ESE_TOOL_FOLDER = "ESEDatabaseView"; //NON-NLS
private static final String EDGE_RESULT_FOLDER_NAME = "results"; //NON-NLS
private static final SimpleDateFormat DATE_FORMATTER = new SimpleDateFormat("MM/dd/yyyy hh:mm:ss a"); //NON-NLS
// ESEDatabaseView converts long timestamps into a string based on the current locale,
// so the default format may not always work.
private SimpleDateFormat previouslyValidDateFormat = null;
@Messages({
"ExtractEdge_process_errMsg_unableFindESEViewer=Unable to find ESEDatabaseViewer",
@ -609,13 +616,7 @@ final class ExtractEdge extends Extract {
index = headers.indexOf(EDGE_HEAD_ACCESSTIME);
String accessTime = rowSplit[index].trim();
Long ftime = null;
try {
Long epochtime = DATE_FORMATTER.parse(accessTime).getTime();
ftime = epochtime / 1000;
} catch (ParseException ex) {
LOG.log(Level.WARNING, "The Accessed Time format in history file seems invalid " + accessTime, ex); //NON-NLS
}
Long ftime = parseTimestamp(accessTime);
BlackboardArtifact bbart = origFile.newArtifact(BlackboardArtifact.ARTIFACT_TYPE.TSK_WEB_HISTORY);
@ -640,13 +641,7 @@ final class ExtractEdge extends Extract {
String[] lineSplit = line.split(","); // NON-NLS
String accessTime = lineSplit[headers.indexOf(EDGE_HEAD_LASTMOD)].trim();
Long ftime = null;
try {
Long epochtime = DATE_FORMATTER.parse(accessTime).getTime();
ftime = epochtime / 1000;
} catch (ParseException ex) {
LOG.log(Level.WARNING, "The Accessed Time format in history file seems invalid " + accessTime, ex); //NON-NLS
}
Long ftime = parseTimestamp(accessTime);
String domain = lineSplit[headers.indexOf(EDGE_HEAD_RDOMAIN)].trim();
String name = hexToChar(lineSplit[headers.indexOf(EDGE_HEAD_NAME)].trim());
@ -708,6 +703,115 @@ final class ExtractEdge extends Extract {
this.getName(), NetworkUtils.extractDomain(url)));
return bbart;
}
/**
* Attempt to parse the timestamp.
*
* ESEDatabaseView makes timestamps based on the locale of the machine so
* they will not always be in the expected format. Additionally, the format
* used in the database output does not appear to match the default format
* using DateFormat.SHORT. Therefore, if the default US format doesn't work,
* we will attempt to determine the correct pattern to use and save any
* working pattern for the next attempt.
*
* @param timeStr The date/time string to parse
*
* @return The epoch time as a Long or null if it could not be parsed.
*/
private Long parseTimestamp(String timeStr) {
// If we had a pattern that worked on the last date, use it again.
if (previouslyValidDateFormat != null) {
try {
return previouslyValidDateFormat.parse(timeStr).getTime() / 1000;
} catch (ParseException ex) {
// Continue on to format detection
}
}
// Try the default US pattern
try {
SimpleDateFormat usDateFormat = new SimpleDateFormat("MM/dd/yyyy hh:mm:ss a"); //NON-NLS
usDateFormat.setLenient(false); // Fail if month or day are out of range
Long epochTime = usDateFormat.parse(timeStr).getTime();
previouslyValidDateFormat = usDateFormat;
return epochTime / 1000;
} catch (ParseException ex) {
// Continue on to format detection
}
// This generally doesn't match the data in the file but can give information on whether
// the month or day is first.
boolean monthFirstFromLocale = true;
String localeDatePattern = ((SimpleDateFormat) DateFormat.getDateInstance(
DateFormat.SHORT, Locale.getDefault())).toPattern();
if (localeDatePattern.startsWith("d")) {
monthFirstFromLocale = false;
}
// Try to determine if the month or day is first by looking at the data.
// If both variations appear valid, use the locale result.
boolean monthFirst = monthFirstFromLocale;
Pattern pattern = Pattern.compile("^([0-9]{1,2})[^0-9]([0-9]{1,2})");
Matcher matcher = pattern.matcher(timeStr);
if (matcher.find()) {
int firstVal = Integer.parseInt(matcher.group(1));
int secondVal = Integer.parseInt(matcher.group(2));
if (firstVal > 12) {
monthFirst = false;
} else if (secondVal > 12) {
monthFirst = true;
}
// Otherwise keep the setting from the locale
}
// See if the time has AM/PM attached
boolean hasAmPm = false;
if (timeStr.endsWith("M") || timeStr.endsWith("m")) {
hasAmPm = true;
}
// See if the date appears to use forward slashes. If not, assume '.' is being used.
boolean hasSlashes = false;
if (timeStr.contains("/")) {
hasSlashes = true;
}
// Make our best guess at the pattern
String dateFormatPattern;
if (monthFirst) {
if (hasSlashes) {
dateFormatPattern = "MM/dd/yyyy ";
} else {
dateFormatPattern = "MM.dd.yyyy ";
}
} else {
if (hasSlashes) {
dateFormatPattern = "dd/MM/yyyy ";
} else {
dateFormatPattern = "dd.MM.yyyy ";
}
}
if (hasAmPm) {
dateFormatPattern += "hh:mm:ss a";
} else {
dateFormatPattern += "HH:mm:ss";
}
try {
SimpleDateFormat dateFormat = new SimpleDateFormat(dateFormatPattern); //NON-NLS
dateFormat.setLenient(false); // Fail if month or day are out of range
Long epochTime = dateFormat.parse(timeStr).getTime();
previouslyValidDateFormat = dateFormat;
return epochTime / 1000;
} catch (ParseException ex) {
LOG.log(Level.WARNING, "Timestamp could not be parsed ({0})", timeStr); //NON-NLS
return null;
}
}
/**
* Converts a space separated string of hex values to ascii characters.