Use the original length of the file, not the length of the encoded file on disk.

This commit is contained in:
apriestman 2020-11-05 14:26:43 -05:00
parent 916d6bc098
commit e95f87796e
3 changed files with 37 additions and 11 deletions

View File

@ -491,7 +491,7 @@ class DocumentEmbeddedContentExtractor {
try { try {
Path outputDirectory = Paths.get(getOutputFolderPath(parentFileName)); Path outputDirectory = Paths.get(getOutputFolderPath(parentFileName));
//Get map of attachment name -> location disk. //Get map of attachment name -> location disk.
Map<String, Path> extractedAttachments = pdfExtractor.extract( Map<String, PDFAttachmentExtractor.NewResourceData> extractedAttachments = pdfExtractor.extract(
new ReadContentInputStream(abstractFile), abstractFile.getId(), new ReadContentInputStream(abstractFile), abstractFile.getId(),
outputDirectory); outputDirectory);
@ -499,10 +499,11 @@ class DocumentEmbeddedContentExtractor {
List<ExtractedFile> extractedFiles = new ArrayList<>(); List<ExtractedFile> extractedFiles = new ArrayList<>();
extractedAttachments.entrySet().forEach((pathEntry) -> { extractedAttachments.entrySet().forEach((pathEntry) -> {
String fileName = pathEntry.getKey(); String fileName = pathEntry.getKey();
Path writeLocation = pathEntry.getValue(); Path writeLocation = pathEntry.getValue().getPath();
int fileSize = pathEntry.getValue().getLength();
extractedFiles.add(new ExtractedFile(fileName, extractedFiles.add(new ExtractedFile(fileName,
getFileRelativePath(writeLocation.getFileName().toString()), getFileRelativePath(writeLocation.getFileName().toString()),
writeLocation.toFile().length())); fileSize));
}); });
return extractedFiles; return extractedFiles;

View File

@ -73,7 +73,7 @@ final class PDFAttachmentExtractor {
* @throws SAXException * @throws SAXException
* @throws TikaException * @throws TikaException
*/ */
public Map<String, Path> extract(InputStream input, long parentID, Path outputDir) throws IOException, SAXException, TikaException { public Map<String, NewResourceData> extract(InputStream input, long parentID, Path outputDir) throws IOException, SAXException, TikaException {
ExtractionPreconditions.checkArgument(Files.exists(outputDir), ExtractionPreconditions.checkArgument(Files.exists(outputDir),
String.format("Output directory: %s, does not exist.", outputDir.toString())); //NON-NLS String.format("Output directory: %s, does not exist.", outputDir.toString())); //NON-NLS
@ -139,8 +139,8 @@ final class PDFAttachmentExtractor {
try (EncodedFileOutputStream outputStream = new EncodedFileOutputStream( try (EncodedFileOutputStream outputStream = new EncodedFileOutputStream(
new FileOutputStream(outputFile.toFile()), TskData.EncodingType.XOR1)){ new FileOutputStream(outputFile.toFile()), TskData.EncodingType.XOR1)){
IOUtils.copy(in, outputStream); int bytesCopied = IOUtils.copy(in, outputStream);
watcher.notify(name, outputFile); watcher.notify(name, outputFile, bytesCopied);
} catch (IOException ex) { } catch (IOException ex) {
logger.log(Level.WARNING, String.format("Could not extract attachment %s into directory %s", //NON-NLS logger.log(Level.WARNING, String.format("Could not extract attachment %s into directory %s", //NON-NLS
uniqueExtractedName, outputFile), ex); uniqueExtractedName, outputFile), ex);
@ -148,6 +148,29 @@ final class PDFAttachmentExtractor {
} }
} }
/**
* Utility class to hold an extracted file's path and length.
* Note that we can not use the length of the file on disk because
* the XOR header has been added to it.
*/
static class NewResourceData {
private final Path path;
private final int length;
NewResourceData(Path path, int length) {
this.path = path;
this.length = length;
}
Path getPath() {
return path;
}
int getLength() {
return length;
}
}
/** /**
* Convenient wrapper for keeping track of new resource paths and the display * Convenient wrapper for keeping track of new resource paths and the display
* name for each of these resources. * name for each of these resources.
@ -157,17 +180,17 @@ final class PDFAttachmentExtractor {
*/ */
static class NewResourceWatcher { static class NewResourceWatcher {
private final Map<String, Path> newResourcePaths; private final Map<String, NewResourceData> newResourcePaths;
public NewResourceWatcher() { public NewResourceWatcher() {
newResourcePaths = new HashMap<>(); newResourcePaths = new HashMap<>();
} }
public void notify(String name, Path newResource) { public void notify(String name, Path localPath, int length) {
newResourcePaths.put(name, newResource); newResourcePaths.put(name, new NewResourceData(localPath, length));
} }
public Map<String, Path> getSnapshot() { public Map<String, NewResourceData> getSnapshot() {
return newResourcePaths; return newResourcePaths;
} }
} }

View File

@ -318,8 +318,10 @@ class MimeJ4MessageParser {
Body body = e.getBody(); Body body = e.getBody();
if (body instanceof SingleBody) { if (body instanceof SingleBody) {
long fileLength;
try (EncodedFileOutputStream fos = new EncodedFileOutputStream(new FileOutputStream(outPath), TskData.EncodingType.XOR1)) { try (EncodedFileOutputStream fos = new EncodedFileOutputStream(new FileOutputStream(outPath), TskData.EncodingType.XOR1)) {
((SingleBody) body).writeTo(fos); ((SingleBody) body).writeTo(fos);
fileLength = fos.getBytesWritten();
} catch (IOException ex) { } catch (IOException ex) {
logger.log(Level.WARNING, "Failed to create file output stream for: " + outPath, ex); //NON-NLS logger.log(Level.WARNING, "Failed to create file output stream for: " + outPath, ex); //NON-NLS
return; return;
@ -328,7 +330,7 @@ class MimeJ4MessageParser {
EmailMessage.Attachment attach = new EmailMessage.Attachment(); EmailMessage.Attachment attach = new EmailMessage.Attachment();
attach.setName(filename); attach.setName(filename);
attach.setLocalPath(relModuleOutputPath + uniqueFilename); attach.setLocalPath(relModuleOutputPath + uniqueFilename);
attach.setSize(new File(outPath).length()); attach.setSize(fileLength);
attach.setEncodingType(TskData.EncodingType.XOR1); attach.setEncodingType(TskData.EncodingType.XOR1);
email.addAttachment(attach); email.addAttachment(attach);
} }