From 3e9f647d8e38541fd2ccf5a5c5f9e09c93f1af58 Mon Sep 17 00:00:00 2001 From: Greg DiCristofaro Date: Fri, 17 Jan 2025 21:41:21 -0500 Subject: [PATCH] update for metadata changes in deps --- .../KeywordSearchIngestModule.java | 141 ++++++++++++++---- 1 file changed, 111 insertions(+), 30 deletions(-) diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchIngestModule.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchIngestModule.java index b3a0a1b3b3..629eaf65b8 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchIngestModule.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchIngestModule.java @@ -19,7 +19,6 @@ package org.sleuthkit.autopsy.keywordsearch; import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.google.common.io.CharSource; import java.io.IOException; @@ -33,10 +32,26 @@ import java.util.HashMap; import java.util.List; import static java.util.Locale.US; import java.util.Map; +import java.util.Map.Entry; import java.util.Optional; import java.util.concurrent.atomic.AtomicInteger; import java.util.logging.Level; import java.util.stream.Collectors; +import java.util.stream.IntStream; +import java.util.stream.Stream; +import org.apache.commons.lang3.tuple.Pair; +import org.apache.commons.lang3.tuple.Triple; +import org.apache.tika.metadata.DublinCore; +import org.apache.tika.metadata.FileSystem; +import org.apache.tika.metadata.IPTC; +import org.apache.tika.metadata.Office; +import org.apache.tika.metadata.OfficeOpenXMLCore; +import org.apache.tika.metadata.OfficeOpenXMLExtended; +import org.apache.tika.metadata.PDF; +import org.apache.tika.metadata.Photoshop; +import org.apache.tika.metadata.TikaCoreProperties; +import org.apache.tika.metadata.XMP; +import org.apache.tika.metadata.XMPDM; import org.apache.tika.mime.MimeTypes; import org.openide.util.Lookup; import org.openide.util.NbBundle; @@ -130,24 +145,69 @@ public final class KeywordSearchIngestModule implements FileIngestModule { "application/x-z", //NON-NLS "application/x-compress"); //NON-NLS - private static final List METADATA_DATE_TYPES - = ImmutableList.of( - "Last-Save-Date", //NON-NLS - "Last-Printed", //NON-NLS - "Creation-Date"); //NON-NLS - - private static final Map METADATA_TYPES_MAP = ImmutableMap.builder() - .put("Last-Save-Date", BlackboardAttribute.ATTRIBUTE_TYPE.TSK_DATETIME_MODIFIED) - .put("Last-Author", BlackboardAttribute.ATTRIBUTE_TYPE.TSK_USER_ID) - .put("Creation-Date", BlackboardAttribute.ATTRIBUTE_TYPE.TSK_DATETIME_CREATED) - .put("Company", BlackboardAttribute.ATTRIBUTE_TYPE.TSK_ORGANIZATION) - .put("Author", BlackboardAttribute.ATTRIBUTE_TYPE.TSK_OWNER) - .put("Application-Name", BlackboardAttribute.ATTRIBUTE_TYPE.TSK_PROG_NAME) - .put("Last-Printed", BlackboardAttribute.ATTRIBUTE_TYPE.TSK_LAST_PRINTED_DATETIME) - .put("Producer", BlackboardAttribute.ATTRIBUTE_TYPE.TSK_PROG_NAME) - .put("Title", BlackboardAttribute.ATTRIBUTE_TYPE.TSK_DESCRIPTION) - .put("pdf:PDFVersion", BlackboardAttribute.ATTRIBUTE_TYPE.TSK_VERSION) - .build(); + private static final Map> METADATA_TYPES_MAP = Stream.of( + Pair.of(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_DATETIME_MODIFIED, List.of( + "Last-Save-Date", + TikaCoreProperties.MODIFIED.getName(), + FileSystem.MODIFIED.getName(), + DublinCore.MODIFIED.getName(), + PDF.DOC_INFO_MODIFICATION_DATE.getName(), + PDF.PDFVT_MODIFIED.getName(), + XMP.MODIFY_DATE.getName(), + XMPDM.AUDIO_MOD_DATE.getName(), + XMPDM.METADATA_MOD_DATE.getName(), + XMPDM.VIDEO_MOD_DATE.getName())), + Pair.of(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_USER_ID, List.of( + "Last-Author", + Office.LAST_AUTHOR.getName(), + TikaCoreProperties.MODIFIER.getName())), + Pair.of(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_DATETIME_CREATED, List.of( + "Creation-Date", + TikaCoreProperties.CREATED.getName(), + FileSystem.CREATED.getName(), + DublinCore.CREATED.getName(), + IPTC.DATE_CREATED.getName(), + Office.CREATION_DATE.getName(), + PDF.DOC_INFO_CREATED.getName(), + Photoshop.DATE_CREATED.getName(), + XMP.CREATE_DATE.getName())), + Pair.of(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_ORGANIZATION, List.of( + "Company", + DublinCore.PUBLISHER.getName(), + IPTC.ORGANISATION_NAME.getName(), + OfficeOpenXMLExtended.COMPANY.getName())), + Pair.of(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_OWNER, List.of( + "Author", + TikaCoreProperties.CREATOR.getName(), + DublinCore.CREATOR.getName(), + Office.INITIAL_AUTHOR.getName(), + Office.AUTHOR.getName(), + Photoshop.AUTHORS_POSITION.getName(), + PDF.DOC_INFO_CREATOR.getName())), + Pair.of(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_PROG_NAME, List.of( + "Application-Name", + "Producer", + OfficeOpenXMLExtended.APPLICATION.getName(), + org.apache.tika.metadata.RTFMetadata.EMB_APP_VERSION.getName())), + Pair.of(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_LAST_PRINTED_DATETIME, List.of( + "Last-Printed", + OfficeOpenXMLCore.LAST_PRINTED.getName())), + Pair.of(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_DESCRIPTION, List.of( + "Title", + DublinCore.TITLE.getName(), + IPTC.TITLE.getName(), + PDF.DOC_INFO_TITLE.getName())), + Pair.of(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_VERSION, List.of( + PDF.PDF_VERSION.getName(), + OfficeOpenXMLCore.VERSION.getName()))) + .flatMap(pr -> { + BlackboardAttribute.ATTRIBUTE_TYPE attrType = pr.getKey(); + List keys = pr.getValue(); + return IntStream.range(0, keys.size()) + .mapToObj(idx -> Triple.of(keys.get(idx), attrType, idx)); + }) + .collect(Collectors.toMap(Triple::getLeft, trip -> Pair.of(trip.getMiddle(), trip.getRight()), (v1, v2) -> v1.getRight() < v2.getRight() ? v1 : v2)); + private static final String IMAGE_MIME_TYPE_PREFIX = "image/"; @@ -624,14 +684,28 @@ public final class KeywordSearchIngestModule implements FileIngestModule { Collection attributes = new ArrayList<>(); Collection bbartifacts = new ArrayList<>(); + + /** + * Get best matched metadata for each attribute type found in metadata map. + */ + Map> intermediateMapping = new HashMap<>(); for (Map.Entry entry : metadata.entrySet()) { - if (METADATA_TYPES_MAP.containsKey(entry.getKey())) { - BlackboardAttribute bba = checkAttribute(entry.getKey(), entry.getValue()); - if (bba != null) { - attributes.add(bba); - } + Pair attrPair = METADATA_TYPES_MAP.get(entry.getKey()); + if (attrPair != null) { + intermediateMapping.compute(attrPair.getKey(), (k, v) -> { + if (v == null || v.getKey() > attrPair.getValue()) { + return Pair.of(attrPair.getValue(), entry.getValue()); + } else { + return v; + } + }); } } + + for (Entry> interEntry: intermediateMapping.entrySet()) { + attributes.add(checkAttribute(interEntry.getKey(), interEntry.getValue().getValue())); + } + if (!attributes.isEmpty()) { try { BlackboardArtifact bbart = aFile.newDataArtifact(new BlackboardArtifact.Type(BlackboardArtifact.ARTIFACT_TYPE.TSK_METADATA), attributes); @@ -653,24 +727,31 @@ public final class KeywordSearchIngestModule implements FileIngestModule { } } - private BlackboardAttribute checkAttribute(String key, String value) { + /** + * Create a metadata blackboard attribute based on specified content. + * @param attrType The attribute type. + * @param key The key for the attribute. + * @param value The value of the attribute. + * @return + */ + private BlackboardAttribute checkAttribute(BlackboardAttribute.ATTRIBUTE_TYPE attrType, String value) { String moduleName = KeywordSearchIngestModule.class.getName(); - if (!value.isEmpty() && value.charAt(0) != ' ') { - if (METADATA_DATE_TYPES.contains(key)) { + if (attrType != null && !value.isEmpty() && value.charAt(0) != ' ') { + if (attrType.getValueType() == BlackboardAttribute.TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.DATETIME) { SimpleDateFormat metadataDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", US); Long metadataDateTime = Long.valueOf(0); try { String metadataDate = value.replaceAll("T", " ").replaceAll("Z", ""); Date usedDate = metadataDateFormat.parse(metadataDate); metadataDateTime = usedDate.getTime() / 1000; - return new BlackboardAttribute(METADATA_TYPES_MAP.get(key), moduleName, metadataDateTime); + return new BlackboardAttribute(attrType, moduleName, metadataDateTime); } catch (ParseException ex) { // catching error and displaying date that could not be parsed then will continue on. - logger.log(Level.WARNING, String.format("Failed to parse date/time %s for metadata attribute %s.", value, key), ex); //NON-NLS + logger.log(Level.WARNING, String.format("Failed to parse date/time %s for metadata attribute %s.", value, attrType == null ? "" : attrType.name()), ex); //NON-NLS return null; } } else { - return new BlackboardAttribute(METADATA_TYPES_MAP.get(key), moduleName, value); + return new BlackboardAttribute(attrType, moduleName, value); } }