Merge pull request #5985 from markmckinnon/6381-Mbox-files-greater-then-2gb-will-not-be-processed-in-Email-Parser

6381 mbox files greater then 2gb will not be processed in email parser
This commit is contained in:
Richard Cordovano 2020-06-22 16:56:48 -04:00 committed by GitHub
commit 080c32fa2b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 137 additions and 11 deletions

View File

@ -290,6 +290,62 @@ public final class ContentUtils {
return totalRead; return totalRead;
} }
/**
* Reads all the data from any content object and writes (extracts) it to a
* file, using a cancellation check instead of a Future object method.
*
* @param content Any content object.
* @param outputFile Will be created if it doesn't exist, and overwritten
* if it does
* @param cancelCheck A function used to check if the file write process
* should be terminated.
* @param startingOffset the starting offset to start reading the file
* @param endingOffset the ending offset to read of the file to write
*
* @return number of bytes extracted
*
* @throws IOException if file could not be written
*/
public static long writeToFile(Content content, java.io.File outputFile,
Supplier<Boolean> cancelCheck, long startingOffset, long endingOffset) throws IOException {
InputStream in = new ReadContentInputStream(content);
long totalRead = 0;
try (FileOutputStream out = new FileOutputStream(outputFile, false)) {
long offsetSkipped = in.skip(startingOffset);
if (offsetSkipped != startingOffset) {
in.close();
throw new IOException(String.format("Skipping file to starting offset {0} was not successful only skipped to offset {1}.", startingOffset, offsetSkipped));
}
byte[] buffer = new byte[TO_FILE_BUFFER_SIZE];
int len = in.read(buffer);
long writeFileLength = endingOffset - startingOffset;
writeFileLength = writeFileLength - TO_FILE_BUFFER_SIZE;
while (len != -1 && writeFileLength != 0) {
out.write(buffer, 0, len);
totalRead += len;
if (cancelCheck.get()) {
break;
}
if (writeFileLength > TO_FILE_BUFFER_SIZE) {
len = in.read(buffer);
writeFileLength = writeFileLength - TO_FILE_BUFFER_SIZE;
} else {
int writeLength = (int)writeFileLength;
byte[] lastBuffer = new byte[writeLength];
len = in.read(lastBuffer);
out.write(lastBuffer, 0, len);
totalRead += len;
writeFileLength = 0;
}
}
} finally {
in.close();
}
return totalRead;
}
/** /**
* Helper to ignore the '.' and '..' directories * Helper to ignore the '.' and '..' directories
* *

View File

@ -6,6 +6,15 @@
<code-name-base>org.sleuthkit.autopsy.thunderbirdparser</code-name-base> <code-name-base>org.sleuthkit.autopsy.thunderbirdparser</code-name-base>
<suite-component/> <suite-component/>
<module-dependencies> <module-dependencies>
<dependency>
<code-name-base>org.netbeans.api.progress</code-name-base>
<build-prerequisite/>
<compile-dependency/>
<run-dependency>
<release-version>1</release-version>
<specification-version>1.47.1</specification-version>
</run-dependency>
</dependency>
<dependency> <dependency>
<code-name-base>org.openide.util</code-name-base> <code-name-base>org.openide.util</code-name-base>
<build-prerequisite/> <build-prerequisite/>

View File

@ -54,6 +54,7 @@ import org.sleuthkit.datamodel.BlackboardArtifact;
import org.sleuthkit.datamodel.BlackboardAttribute; import org.sleuthkit.datamodel.BlackboardAttribute;
import org.sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE; import org.sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE;
import org.sleuthkit.datamodel.DerivedFile; import org.sleuthkit.datamodel.DerivedFile;
import org.sleuthkit.datamodel.ReadContentInputStream;
import org.sleuthkit.datamodel.Relationship; import org.sleuthkit.datamodel.Relationship;
import org.sleuthkit.datamodel.TskCoreException; import org.sleuthkit.datamodel.TskCoreException;
import org.sleuthkit.datamodel.TskData; import org.sleuthkit.datamodel.TskData;
@ -76,6 +77,7 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
private Blackboard blackboard; private Blackboard blackboard;
private CommunicationArtifactsHelper communicationArtifactsHelper; private CommunicationArtifactsHelper communicationArtifactsHelper;
private static final int MBOX_SIZE_TO_SPLIT = 1048576000;
private Case currentCase; private Case currentCase;
/** /**
@ -309,12 +311,75 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
return ProcessResult.OK; return ProcessResult.OK;
} }
try { if (abstractFile.getSize() < MBOX_SIZE_TO_SPLIT) {
ContentUtils.writeToFile(abstractFile, file, context::fileIngestIsCancelled);
} catch (IOException ex) { try {
logger.log(Level.WARNING, "Failed writing mbox file to disk.", ex); //NON-NLS ContentUtils.writeToFile(abstractFile, file, context::fileIngestIsCancelled);
return ProcessResult.OK; } catch (IOException ex) {
logger.log(Level.WARNING, "Failed writing mbox file to disk.", ex); //NON-NLS
return ProcessResult.OK;
}
processMboxFile(file, abstractFile, emailFolder);
if (file.delete() == false) {
logger.log(Level.INFO, "Failed to delete temp file: {0}", file.getName()); //NON-NLS
}
} else {
List<Long> mboxSplitOffsets = new ArrayList<>();
try{
mboxSplitOffsets = findMboxSplitOffset(abstractFile, file);
} catch (IOException ex) {
logger.log(Level.WARNING, String.format("Failed finding split offsets for mbox file {0}.", fileName), ex); //NON-NLS
return ProcessResult.OK;
}
long startingOffset = 0;
for (Long mboxSplitOffset : mboxSplitOffsets) {
File splitFile = new File(fileName + "-" + mboxSplitOffset);
try {
ContentUtils.writeToFile(abstractFile, splitFile, context::fileIngestIsCancelled, startingOffset, mboxSplitOffset);
} catch (IOException ex) {
logger.log(Level.WARNING, "Failed writing split mbox file to disk.", ex); //NON-NLS
return ProcessResult.OK;
}
processMboxFile(splitFile, abstractFile, emailFolder);
startingOffset = mboxSplitOffset;
if (splitFile.delete() == false) {
logger.log(Level.INFO, "Failed to delete temp file: {0}", splitFile); //NON-NLS
}
}
}
return ProcessResult.OK;
}
private List<Long> findMboxSplitOffset(AbstractFile abstractFile, File file) throws IOException {
List<Long> mboxSplitOffset = new ArrayList<>();
byte[] buffer = new byte[7];
ReadContentInputStream in = new ReadContentInputStream(abstractFile);
in.skip(MBOX_SIZE_TO_SPLIT);
int len = in.read(buffer);
while (len != -1) {
len = in.read(buffer);
if (buffer[0] == 13 && buffer[1] == 10 && buffer[2] == 70 && buffer[3] == 114 &&
buffer[4] == 111 && buffer[5] == 109 && buffer[6] == 32) {
mboxSplitOffset.add(in.getCurPosition() - 5 );
in.skip(MBOX_SIZE_TO_SPLIT);
}
} }
return mboxSplitOffset;
}
private void processMboxFile(File file, AbstractFile abstractFile, String emailFolder) {
MboxParser emailIterator = MboxParser.getEmailIterator( emailFolder, file, abstractFile.getId()); MboxParser emailIterator = MboxParser.getEmailIterator( emailFolder, file, abstractFile.getId());
List<EmailMessage> emails = new ArrayList<>(); List<EmailMessage> emails = new ArrayList<>();
@ -325,7 +390,7 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
emails.add(emailMessage); emails.add(emailMessage);
} }
} }
String errors = emailIterator.getErrors(); String errors = emailIterator.getErrors();
if (!errors.isEmpty()) { if (!errors.isEmpty()) {
postErrorMessage( postErrorMessage(
@ -335,11 +400,6 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
} }
processEmails(emails, MboxParser.getEmailIterator( emailFolder, file, abstractFile.getId()), abstractFile); processEmails(emails, MboxParser.getEmailIterator( emailFolder, file, abstractFile.getId()), abstractFile);
if (file.delete() == false) {
logger.log(Level.INFO, "Failed to delete temp file: {0}", file.getName()); //NON-NLS
}
return ProcessResult.OK;
} }
/** /**
@ -755,4 +815,5 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
public void shutDown() { public void shutDown() {
// nothing to shut down // nothing to shut down
} }
} }