diff --git a/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/MSOfficeEmbeddedContentExtractor.java b/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/MSOfficeEmbeddedContentExtractor.java index 4a3a20da07..7fca6a896c 100644 --- a/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/MSOfficeEmbeddedContentExtractor.java +++ b/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/MSOfficeEmbeddedContentExtractor.java @@ -51,6 +51,7 @@ import org.apache.tika.mime.MimeTypeException; import org.apache.tika.parser.AutoDetectParser; import org.apache.tika.parser.ParseContext; import org.apache.tika.parser.Parser; +import org.apache.tika.parser.microsoft.OfficeParserConfig; import org.apache.tika.sax.BodyContentHandler; import org.openide.util.NbBundle; import org.sleuthkit.autopsy.casemodule.Case; @@ -231,11 +232,13 @@ class MSOfficeEmbeddedContentExtractor { // write limit (which defaults to 100,000 characters. ContentHandler contentHandler = new BodyContentHandler(-1); - // TODO: this will be needed once we upgrade to Tika 1.16 or later. - // OfficeParserConfig officeParserConfig = new OfficeParserConfig(); - // officeParserConfig.setUseSAXPptxExtractor(true); - // officeParserConfig.setUseSAXDocxExtractor(true); - // parseContext.set(OfficeParserConfig.class, officeParserConfig); + // Use the more memory efficient Tika SAX parsers for DOCX and + // PPTX files (it already uses SAX for XLSX). + OfficeParserConfig officeParserConfig = new OfficeParserConfig(); + officeParserConfig.setUseSAXPptxExtractor(true); + officeParserConfig.setUseSAXDocxExtractor(true); + parseContext.set(OfficeParserConfig.class, officeParserConfig); + EmbeddedDocumentExtractor extractor = new EmbeddedContentExtractor(parseContext); parseContext.set(EmbeddedDocumentExtractor.class, extractor); ReadContentInputStream stream = new ReadContentInputStream(abstractFile); diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchResultFactory.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchResultFactory.java index a48b40e756..a8bc995eae 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchResultFactory.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchResultFactory.java @@ -1,7 +1,7 @@ /* * Autopsy Forensic Browser * - * Copyright 2011-2017 Basis Technology Corp. + * Copyright 2011-2018 Basis Technology Corp. * Contact: carrier sleuthkit org * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -44,6 +44,7 @@ import org.sleuthkit.autopsy.coreutils.MessageNotifyUtil; import org.sleuthkit.autopsy.datamodel.AbstractAbstractFileNode; import static org.sleuthkit.autopsy.datamodel.AbstractAbstractFileNode.AbstractFilePropertyType.LOCATION; import org.sleuthkit.autopsy.datamodel.AbstractFsContentNode; +import org.sleuthkit.autopsy.datamodel.EmptyNode; import org.sleuthkit.autopsy.datamodel.KeyValue; import org.sleuthkit.autopsy.datamodel.KeyValueNode; import org.sleuthkit.autopsy.keywordsearch.KeywordSearchResultFactory.KeyValueQueryContent; @@ -63,21 +64,21 @@ import org.sleuthkit.datamodel.TskCoreException; * Responsible for assembling nodes and columns in the right way and performing * lazy queries as needed. */ -class KeywordSearchResultFactory extends ChildFactory { +class KeywordSearchResultFactory extends ChildFactory { - private static final Logger logger = Logger.getLogger(KeywordSearchResultFactory.class.getName()); + private static final Logger LOGGER = Logger.getLogger(KeywordSearchResultFactory.class.getName()); //common properties (superset of all Node properties) to be displayed as columns - static final List COMMON_PROPERTIES = - Stream.concat( + static final List COMMON_PROPERTIES + = Stream.concat( Stream.of( TSK_KEYWORD, TSK_KEYWORD_REGEXP, TSK_KEYWORD_PREVIEW) - .map(BlackboardAttribute.ATTRIBUTE_TYPE::getDisplayName), + .map(BlackboardAttribute.ATTRIBUTE_TYPE::getDisplayName), Arrays.stream(AbstractAbstractFileNode.AbstractFilePropertyType.values()) - .map(Object::toString)) - .collect(Collectors.toList()); + .map(Object::toString)) + .collect(Collectors.toList()); private final Collection queryRequests; @@ -93,7 +94,7 @@ class KeywordSearchResultFactory extends ChildFactory { * @param toPopulate property set map for a Node */ @Override - protected boolean createKeys(List toPopulate) { + protected boolean createKeys(List toPopulate) { for (QueryRequest queryRequest : queryRequests) { /** @@ -130,7 +131,7 @@ class KeywordSearchResultFactory extends ChildFactory { * @return */ @NbBundle.Messages({"KeywordSearchResultFactory.query.exception.msg=Could not perform the query "}) - private boolean createFlatKeys(KeywordSearchQuery queryRequest, List toPopulate) { + private boolean createFlatKeys(KeywordSearchQuery queryRequest, List toPopulate) { /** * Execute the requested query. @@ -139,15 +140,15 @@ class KeywordSearchResultFactory extends ChildFactory { try { queryResults = queryRequest.performQuery(); } catch (KeywordSearchModuleException | NoOpenCoreException ex) { - logger.log(Level.SEVERE, "Could not perform the query " + queryRequest.getQueryString(), ex); //NON-NLS + LOGGER.log(Level.SEVERE, "Could not perform the query " + queryRequest.getQueryString(), ex); //NON-NLS MessageNotifyUtil.Notify.error(Bundle.KeywordSearchResultFactory_query_exception_msg() + queryRequest.getQueryString(), ex.getCause().getMessage()); return false; } - SleuthkitCase tskCase = null; + SleuthkitCase tskCase; try { tskCase = Case.getCurrentCase().getSleuthkitCase(); } catch (IllegalStateException ex) { - logger.log(Level.SEVERE, "There was no case open.", ex); //NON-NLS + LOGGER.log(Level.SEVERE, "There was no case open.", ex); //NON-NLS return false; } @@ -159,16 +160,16 @@ class KeywordSearchResultFactory extends ChildFactory { * Get file properties. */ Map properties = new LinkedHashMap<>(); - Content content = null; - String contentName = ""; + Content content; + String contentName; try { content = tskCase.getContentById(hit.getContentID()); if (content == null) { - logger.log(Level.SEVERE, "There was a error getting content by id."); //NON-NLS + LOGGER.log(Level.SEVERE, "There was a error getting content by id."); //NON-NLS return false; } } catch (TskCoreException ex) { - logger.log(Level.SEVERE, "There was a error getting content by id.", ex); //NON-NLS + LOGGER.log(Level.SEVERE, "There was a error getting content by id.", ex); //NON-NLS return false; } @@ -191,7 +192,7 @@ class KeywordSearchResultFactory extends ChildFactory { try { hitName = tskCase.getBlackboardArtifact(hit.getArtifactID().get()).getDisplayName() + " Artifact"; //NON-NLS } catch (TskCoreException ex) { - logger.log(Level.SEVERE, "Error getting blckboard artifact by id", ex); + LOGGER.log(Level.SEVERE, "Error getting blckboard artifact by id", ex); return false; } } else { @@ -202,9 +203,13 @@ class KeywordSearchResultFactory extends ChildFactory { } - // Add all the nodes to toPopulate at once. Minimizes node creation - // EDT threads, which can slow and/or hang the UI on large queries. - toPopulate.addAll(tempList); + if (hitNumber == 0) { + toPopulate.add(new KeyValue("This KeyValue Is Empty", 0)); + } else { + // Add all the nodes to toPopulate at once. Minimizes node creation + // EDT threads, which can slow and/or hang the UI on large queries. + toPopulate.addAll(tempList); + } //write to bb //cannot reuse snippet in BlackboardResultWriter @@ -239,15 +244,25 @@ class KeywordSearchResultFactory extends ChildFactory { return hits.values(); } + @NbBundle.Messages({"KeywordSearchResultFactory.createNodeForKey.noResultsFound.text=No results found."}) @Override - protected Node createNodeForKey(KeyValueQueryContent key) { - final Content content = key.getContent(); - QueryResults hits = key.getHits(); + protected Node createNodeForKey(KeyValue key) { + Node resultNode; - Node kvNode = new KeyValueNode(key, Children.LEAF, Lookups.singleton(content)); + if (key instanceof KeyValueQueryContent) { + final Content content = ((KeyValueQueryContent) key).getContent(); + QueryResults hits = ((KeyValueQueryContent) key).getHits(); - //wrap in KeywordSearchFilterNode for the markup content, might need to override FilterNode for more customization - return new KeywordSearchFilterNode(hits, kvNode); + Node kvNode = new KeyValueNode(key, Children.LEAF, Lookups.singleton(content)); + + //wrap in KeywordSearchFilterNode for the markup content, might need to override FilterNode for more customization + resultNode = new KeywordSearchFilterNode(hits, kvNode); + } else { + resultNode = new EmptyNode("This Node Is Empty"); + resultNode.setDisplayName(NbBundle.getMessage(this.getClass(), "KeywordSearchResultFactory.createNodeForKey.noResultsFound.text")); + } + + return resultNode; } @@ -308,7 +323,7 @@ class KeywordSearchResultFactory extends ChildFactory { */ static class BlackboardResultWriter extends SwingWorker { - private static final List writers = new ArrayList<>(); + private static final List WRITERS = new ArrayList<>(); private ProgressHandle progress; private final KeywordSearchQuery query; private final QueryResults hits; @@ -343,24 +358,24 @@ class KeywordSearchResultFactory extends ChildFactory { try { get(); } catch (InterruptedException | CancellationException ex) { - logger.log(Level.WARNING, "User cancelled writing of ad hoc search query results for '{0}' to the blackboard", query.getQueryString()); //NON-NLS + LOGGER.log(Level.WARNING, "User cancelled writing of ad hoc search query results for '{0}' to the blackboard", query.getQueryString()); //NON-NLS } catch (ExecutionException ex) { - logger.log(Level.SEVERE, "Error writing of ad hoc search query results for " + query.getQueryString() + " to the blackboard", ex); //NON-NLS + LOGGER.log(Level.SEVERE, "Error writing of ad hoc search query results for " + query.getQueryString() + " to the blackboard", ex); //NON-NLS } } private static synchronized void registerWriter(BlackboardResultWriter writer) { - writers.add(writer); + WRITERS.add(writer); } private static synchronized void deregisterWriter(BlackboardResultWriter writer) { - writers.remove(writer); + WRITERS.remove(writer); } static synchronized void stopAllWriters() { - for (BlackboardResultWriter w : writers) { + for (BlackboardResultWriter w : WRITERS) { w.cancel(true); - writers.remove(w); + WRITERS.remove(w); } } } diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/TikaTextExtractor.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/TikaTextExtractor.java index fcbace158d..4b9c3b17cb 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/TikaTextExtractor.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/TikaTextExtractor.java @@ -33,7 +33,11 @@ import java.util.stream.Collectors; import java.util.stream.Stream; import org.apache.tika.Tika; import org.apache.tika.metadata.Metadata; +import org.apache.tika.parser.AutoDetectParser; import org.apache.tika.parser.ParseContext; +import org.apache.tika.parser.Parser; +import org.apache.tika.parser.ParsingReader; +import org.apache.tika.parser.microsoft.OfficeParserConfig; import org.openide.util.NbBundle; import org.sleuthkit.autopsy.coreutils.Logger; import org.sleuthkit.datamodel.AbstractFile; @@ -48,6 +52,8 @@ class TikaTextExtractor extends FileTextExtractor { static final private Logger logger = Logger.getLogger(TikaTextExtractor.class.getName()); private final ExecutorService tikaParseExecutor = Executors.newSingleThreadExecutor(); + private final AutoDetectParser parser = new AutoDetectParser(); + private static final List TIKA_SUPPORTED_TYPES = new Tika().getParser().getSupportedTypes(new ParseContext()) .stream() @@ -64,8 +70,18 @@ class TikaTextExtractor extends FileTextExtractor { ReadContentInputStream stream = new ReadContentInputStream(sourceFile); Metadata metadata = new Metadata(); + ParseContext parseContext = new ParseContext(); + parseContext.set(Parser.class, parser); + + // Use the more memory efficient Tika SAX parsers for DOCX and + // PPTX files (it already uses SAX for XLSX). + OfficeParserConfig officeParserConfig = new OfficeParserConfig(); + officeParserConfig.setUseSAXPptxExtractor(true); + officeParserConfig.setUseSAXDocxExtractor(true); + parseContext.set(OfficeParserConfig.class, officeParserConfig); + //Parse the file in a task, a convenient way to have a timeout... - final Future future = tikaParseExecutor.submit(() -> new Tika().parse(stream, metadata)); + final Future future = tikaParseExecutor.submit(() -> new ParsingReader(parser, stream, metadata, parseContext)); try { final Reader tikaReader = future.get(getTimeout(sourceFile.getSize()), TimeUnit.SECONDS);