Use the original length of the file, not the length of the encoded file on disk.

This commit is contained in:
apriestman 2020-11-05 14:26:43 -05:00
parent 916d6bc098
commit e95f87796e
3 changed files with 37 additions and 11 deletions

View File

@ -491,7 +491,7 @@ class DocumentEmbeddedContentExtractor {
try {
Path outputDirectory = Paths.get(getOutputFolderPath(parentFileName));
//Get map of attachment name -> location disk.
Map<String, Path> extractedAttachments = pdfExtractor.extract(
Map<String, PDFAttachmentExtractor.NewResourceData> extractedAttachments = pdfExtractor.extract(
new ReadContentInputStream(abstractFile), abstractFile.getId(),
outputDirectory);
@ -499,10 +499,11 @@ class DocumentEmbeddedContentExtractor {
List<ExtractedFile> extractedFiles = new ArrayList<>();
extractedAttachments.entrySet().forEach((pathEntry) -> {
String fileName = pathEntry.getKey();
Path writeLocation = pathEntry.getValue();
Path writeLocation = pathEntry.getValue().getPath();
int fileSize = pathEntry.getValue().getLength();
extractedFiles.add(new ExtractedFile(fileName,
getFileRelativePath(writeLocation.getFileName().toString()),
writeLocation.toFile().length()));
fileSize));
});
return extractedFiles;

View File

@ -73,7 +73,7 @@ final class PDFAttachmentExtractor {
* @throws SAXException
* @throws TikaException
*/
public Map<String, Path> extract(InputStream input, long parentID, Path outputDir) throws IOException, SAXException, TikaException {
public Map<String, NewResourceData> extract(InputStream input, long parentID, Path outputDir) throws IOException, SAXException, TikaException {
ExtractionPreconditions.checkArgument(Files.exists(outputDir),
String.format("Output directory: %s, does not exist.", outputDir.toString())); //NON-NLS
@ -139,8 +139,8 @@ final class PDFAttachmentExtractor {
try (EncodedFileOutputStream outputStream = new EncodedFileOutputStream(
new FileOutputStream(outputFile.toFile()), TskData.EncodingType.XOR1)){
IOUtils.copy(in, outputStream);
watcher.notify(name, outputFile);
int bytesCopied = IOUtils.copy(in, outputStream);
watcher.notify(name, outputFile, bytesCopied);
} catch (IOException ex) {
logger.log(Level.WARNING, String.format("Could not extract attachment %s into directory %s", //NON-NLS
uniqueExtractedName, outputFile), ex);
@ -148,6 +148,29 @@ final class PDFAttachmentExtractor {
}
}
/**
* Utility class to hold an extracted file's path and length.
* Note that we can not use the length of the file on disk because
* the XOR header has been added to it.
*/
static class NewResourceData {
private final Path path;
private final int length;
NewResourceData(Path path, int length) {
this.path = path;
this.length = length;
}
Path getPath() {
return path;
}
int getLength() {
return length;
}
}
/**
* Convenient wrapper for keeping track of new resource paths and the display
* name for each of these resources.
@ -157,17 +180,17 @@ final class PDFAttachmentExtractor {
*/
static class NewResourceWatcher {
private final Map<String, Path> newResourcePaths;
private final Map<String, NewResourceData> newResourcePaths;
public NewResourceWatcher() {
newResourcePaths = new HashMap<>();
}
public void notify(String name, Path newResource) {
newResourcePaths.put(name, newResource);
public void notify(String name, Path localPath, int length) {
newResourcePaths.put(name, new NewResourceData(localPath, length));
}
public Map<String, Path> getSnapshot() {
public Map<String, NewResourceData> getSnapshot() {
return newResourcePaths;
}
}

View File

@ -318,8 +318,10 @@ class MimeJ4MessageParser {
Body body = e.getBody();
if (body instanceof SingleBody) {
long fileLength;
try (EncodedFileOutputStream fos = new EncodedFileOutputStream(new FileOutputStream(outPath), TskData.EncodingType.XOR1)) {
((SingleBody) body).writeTo(fos);
fileLength = fos.getBytesWritten();
} catch (IOException ex) {
logger.log(Level.WARNING, "Failed to create file output stream for: " + outPath, ex); //NON-NLS
return;
@ -328,7 +330,7 @@ class MimeJ4MessageParser {
EmailMessage.Attachment attach = new EmailMessage.Attachment();
attach.setName(filename);
attach.setLocalPath(relModuleOutputPath + uniqueFilename);
attach.setSize(new File(outPath).length());
attach.setSize(fileLength);
attach.setEncodingType(TskData.EncodingType.XOR1);
email.addAttachment(attach);
}