Merge remote-tracking branch 'upstream/custom-release-2.11.0' into develop

This commit is contained in:
Richard Cordovano 2018-01-18 10:56:06 -05:00
commit a49043d0ab
3 changed files with 74 additions and 40 deletions

View File

@ -51,6 +51,7 @@ import org.apache.tika.mime.MimeTypeException;
import org.apache.tika.parser.AutoDetectParser; import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext; import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser; import org.apache.tika.parser.Parser;
import org.apache.tika.parser.microsoft.OfficeParserConfig;
import org.apache.tika.sax.BodyContentHandler; import org.apache.tika.sax.BodyContentHandler;
import org.openide.util.NbBundle; import org.openide.util.NbBundle;
import org.sleuthkit.autopsy.casemodule.Case; import org.sleuthkit.autopsy.casemodule.Case;
@ -231,11 +232,13 @@ class MSOfficeEmbeddedContentExtractor {
// write limit (which defaults to 100,000 characters. // write limit (which defaults to 100,000 characters.
ContentHandler contentHandler = new BodyContentHandler(-1); ContentHandler contentHandler = new BodyContentHandler(-1);
// TODO: this will be needed once we upgrade to Tika 1.16 or later. // Use the more memory efficient Tika SAX parsers for DOCX and
// OfficeParserConfig officeParserConfig = new OfficeParserConfig(); // PPTX files (it already uses SAX for XLSX).
// officeParserConfig.setUseSAXPptxExtractor(true); OfficeParserConfig officeParserConfig = new OfficeParserConfig();
// officeParserConfig.setUseSAXDocxExtractor(true); officeParserConfig.setUseSAXPptxExtractor(true);
// parseContext.set(OfficeParserConfig.class, officeParserConfig); officeParserConfig.setUseSAXDocxExtractor(true);
parseContext.set(OfficeParserConfig.class, officeParserConfig);
EmbeddedDocumentExtractor extractor = new EmbeddedContentExtractor(parseContext); EmbeddedDocumentExtractor extractor = new EmbeddedContentExtractor(parseContext);
parseContext.set(EmbeddedDocumentExtractor.class, extractor); parseContext.set(EmbeddedDocumentExtractor.class, extractor);
ReadContentInputStream stream = new ReadContentInputStream(abstractFile); ReadContentInputStream stream = new ReadContentInputStream(abstractFile);

View File

@ -1,7 +1,7 @@
/* /*
* Autopsy Forensic Browser * Autopsy Forensic Browser
* *
* Copyright 2011-2017 Basis Technology Corp. * Copyright 2011-2018 Basis Technology Corp.
* Contact: carrier <at> sleuthkit <dot> org * Contact: carrier <at> sleuthkit <dot> org
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
@ -44,6 +44,7 @@ import org.sleuthkit.autopsy.coreutils.MessageNotifyUtil;
import org.sleuthkit.autopsy.datamodel.AbstractAbstractFileNode; import org.sleuthkit.autopsy.datamodel.AbstractAbstractFileNode;
import static org.sleuthkit.autopsy.datamodel.AbstractAbstractFileNode.AbstractFilePropertyType.LOCATION; import static org.sleuthkit.autopsy.datamodel.AbstractAbstractFileNode.AbstractFilePropertyType.LOCATION;
import org.sleuthkit.autopsy.datamodel.AbstractFsContentNode; import org.sleuthkit.autopsy.datamodel.AbstractFsContentNode;
import org.sleuthkit.autopsy.datamodel.EmptyNode;
import org.sleuthkit.autopsy.datamodel.KeyValue; import org.sleuthkit.autopsy.datamodel.KeyValue;
import org.sleuthkit.autopsy.datamodel.KeyValueNode; import org.sleuthkit.autopsy.datamodel.KeyValueNode;
import org.sleuthkit.autopsy.keywordsearch.KeywordSearchResultFactory.KeyValueQueryContent; import org.sleuthkit.autopsy.keywordsearch.KeywordSearchResultFactory.KeyValueQueryContent;
@ -63,13 +64,13 @@ import org.sleuthkit.datamodel.TskCoreException;
* Responsible for assembling nodes and columns in the right way and performing * Responsible for assembling nodes and columns in the right way and performing
* lazy queries as needed. * lazy queries as needed.
*/ */
class KeywordSearchResultFactory extends ChildFactory<KeyValueQueryContent> { class KeywordSearchResultFactory extends ChildFactory<KeyValue> {
private static final Logger logger = Logger.getLogger(KeywordSearchResultFactory.class.getName()); private static final Logger LOGGER = Logger.getLogger(KeywordSearchResultFactory.class.getName());
//common properties (superset of all Node properties) to be displayed as columns //common properties (superset of all Node properties) to be displayed as columns
static final List<String> COMMON_PROPERTIES = static final List<String> COMMON_PROPERTIES
Stream.concat( = Stream.concat(
Stream.of( Stream.of(
TSK_KEYWORD, TSK_KEYWORD,
TSK_KEYWORD_REGEXP, TSK_KEYWORD_REGEXP,
@ -93,7 +94,7 @@ class KeywordSearchResultFactory extends ChildFactory<KeyValueQueryContent> {
* @param toPopulate property set map for a Node * @param toPopulate property set map for a Node
*/ */
@Override @Override
protected boolean createKeys(List<KeyValueQueryContent> toPopulate) { protected boolean createKeys(List<KeyValue> toPopulate) {
for (QueryRequest queryRequest : queryRequests) { for (QueryRequest queryRequest : queryRequests) {
/** /**
@ -130,7 +131,7 @@ class KeywordSearchResultFactory extends ChildFactory<KeyValueQueryContent> {
* @return * @return
*/ */
@NbBundle.Messages({"KeywordSearchResultFactory.query.exception.msg=Could not perform the query "}) @NbBundle.Messages({"KeywordSearchResultFactory.query.exception.msg=Could not perform the query "})
private boolean createFlatKeys(KeywordSearchQuery queryRequest, List<KeyValueQueryContent> toPopulate) { private boolean createFlatKeys(KeywordSearchQuery queryRequest, List<KeyValue> toPopulate) {
/** /**
* Execute the requested query. * Execute the requested query.
@ -139,15 +140,15 @@ class KeywordSearchResultFactory extends ChildFactory<KeyValueQueryContent> {
try { try {
queryResults = queryRequest.performQuery(); queryResults = queryRequest.performQuery();
} catch (KeywordSearchModuleException | NoOpenCoreException ex) { } catch (KeywordSearchModuleException | NoOpenCoreException ex) {
logger.log(Level.SEVERE, "Could not perform the query " + queryRequest.getQueryString(), ex); //NON-NLS LOGGER.log(Level.SEVERE, "Could not perform the query " + queryRequest.getQueryString(), ex); //NON-NLS
MessageNotifyUtil.Notify.error(Bundle.KeywordSearchResultFactory_query_exception_msg() + queryRequest.getQueryString(), ex.getCause().getMessage()); MessageNotifyUtil.Notify.error(Bundle.KeywordSearchResultFactory_query_exception_msg() + queryRequest.getQueryString(), ex.getCause().getMessage());
return false; return false;
} }
SleuthkitCase tskCase = null; SleuthkitCase tskCase;
try { try {
tskCase = Case.getCurrentCase().getSleuthkitCase(); tskCase = Case.getCurrentCase().getSleuthkitCase();
} catch (IllegalStateException ex) { } catch (IllegalStateException ex) {
logger.log(Level.SEVERE, "There was no case open.", ex); //NON-NLS LOGGER.log(Level.SEVERE, "There was no case open.", ex); //NON-NLS
return false; return false;
} }
@ -159,16 +160,16 @@ class KeywordSearchResultFactory extends ChildFactory<KeyValueQueryContent> {
* Get file properties. * Get file properties.
*/ */
Map<String, Object> properties = new LinkedHashMap<>(); Map<String, Object> properties = new LinkedHashMap<>();
Content content = null; Content content;
String contentName = ""; String contentName;
try { try {
content = tskCase.getContentById(hit.getContentID()); content = tskCase.getContentById(hit.getContentID());
if (content == null) { if (content == null) {
logger.log(Level.SEVERE, "There was a error getting content by id."); //NON-NLS LOGGER.log(Level.SEVERE, "There was a error getting content by id."); //NON-NLS
return false; return false;
} }
} catch (TskCoreException ex) { } catch (TskCoreException ex) {
logger.log(Level.SEVERE, "There was a error getting content by id.", ex); //NON-NLS LOGGER.log(Level.SEVERE, "There was a error getting content by id.", ex); //NON-NLS
return false; return false;
} }
@ -191,7 +192,7 @@ class KeywordSearchResultFactory extends ChildFactory<KeyValueQueryContent> {
try { try {
hitName = tskCase.getBlackboardArtifact(hit.getArtifactID().get()).getDisplayName() + " Artifact"; //NON-NLS hitName = tskCase.getBlackboardArtifact(hit.getArtifactID().get()).getDisplayName() + " Artifact"; //NON-NLS
} catch (TskCoreException ex) { } catch (TskCoreException ex) {
logger.log(Level.SEVERE, "Error getting blckboard artifact by id", ex); LOGGER.log(Level.SEVERE, "Error getting blckboard artifact by id", ex);
return false; return false;
} }
} else { } else {
@ -202,9 +203,13 @@ class KeywordSearchResultFactory extends ChildFactory<KeyValueQueryContent> {
} }
if (hitNumber == 0) {
toPopulate.add(new KeyValue("This KeyValue Is Empty", 0));
} else {
// Add all the nodes to toPopulate at once. Minimizes node creation // Add all the nodes to toPopulate at once. Minimizes node creation
// EDT threads, which can slow and/or hang the UI on large queries. // EDT threads, which can slow and/or hang the UI on large queries.
toPopulate.addAll(tempList); toPopulate.addAll(tempList);
}
//write to bb //write to bb
//cannot reuse snippet in BlackboardResultWriter //cannot reuse snippet in BlackboardResultWriter
@ -239,15 +244,25 @@ class KeywordSearchResultFactory extends ChildFactory<KeyValueQueryContent> {
return hits.values(); return hits.values();
} }
@NbBundle.Messages({"KeywordSearchResultFactory.createNodeForKey.noResultsFound.text=No results found."})
@Override @Override
protected Node createNodeForKey(KeyValueQueryContent key) { protected Node createNodeForKey(KeyValue key) {
final Content content = key.getContent(); Node resultNode;
QueryResults hits = key.getHits();
if (key instanceof KeyValueQueryContent) {
final Content content = ((KeyValueQueryContent) key).getContent();
QueryResults hits = ((KeyValueQueryContent) key).getHits();
Node kvNode = new KeyValueNode(key, Children.LEAF, Lookups.singleton(content)); Node kvNode = new KeyValueNode(key, Children.LEAF, Lookups.singleton(content));
//wrap in KeywordSearchFilterNode for the markup content, might need to override FilterNode for more customization //wrap in KeywordSearchFilterNode for the markup content, might need to override FilterNode for more customization
return new KeywordSearchFilterNode(hits, kvNode); resultNode = new KeywordSearchFilterNode(hits, kvNode);
} else {
resultNode = new EmptyNode("This Node Is Empty");
resultNode.setDisplayName(NbBundle.getMessage(this.getClass(), "KeywordSearchResultFactory.createNodeForKey.noResultsFound.text"));
}
return resultNode;
} }
@ -308,7 +323,7 @@ class KeywordSearchResultFactory extends ChildFactory<KeyValueQueryContent> {
*/ */
static class BlackboardResultWriter extends SwingWorker<Void, Void> { static class BlackboardResultWriter extends SwingWorker<Void, Void> {
private static final List<BlackboardResultWriter> writers = new ArrayList<>(); private static final List<BlackboardResultWriter> WRITERS = new ArrayList<>();
private ProgressHandle progress; private ProgressHandle progress;
private final KeywordSearchQuery query; private final KeywordSearchQuery query;
private final QueryResults hits; private final QueryResults hits;
@ -343,24 +358,24 @@ class KeywordSearchResultFactory extends ChildFactory<KeyValueQueryContent> {
try { try {
get(); get();
} catch (InterruptedException | CancellationException ex) { } catch (InterruptedException | CancellationException ex) {
logger.log(Level.WARNING, "User cancelled writing of ad hoc search query results for '{0}' to the blackboard", query.getQueryString()); //NON-NLS LOGGER.log(Level.WARNING, "User cancelled writing of ad hoc search query results for '{0}' to the blackboard", query.getQueryString()); //NON-NLS
} catch (ExecutionException ex) { } catch (ExecutionException ex) {
logger.log(Level.SEVERE, "Error writing of ad hoc search query results for " + query.getQueryString() + " to the blackboard", ex); //NON-NLS LOGGER.log(Level.SEVERE, "Error writing of ad hoc search query results for " + query.getQueryString() + " to the blackboard", ex); //NON-NLS
} }
} }
private static synchronized void registerWriter(BlackboardResultWriter writer) { private static synchronized void registerWriter(BlackboardResultWriter writer) {
writers.add(writer); WRITERS.add(writer);
} }
private static synchronized void deregisterWriter(BlackboardResultWriter writer) { private static synchronized void deregisterWriter(BlackboardResultWriter writer) {
writers.remove(writer); WRITERS.remove(writer);
} }
static synchronized void stopAllWriters() { static synchronized void stopAllWriters() {
for (BlackboardResultWriter w : writers) { for (BlackboardResultWriter w : WRITERS) {
w.cancel(true); w.cancel(true);
writers.remove(w); WRITERS.remove(w);
} }
} }
} }

View File

@ -33,7 +33,11 @@ import java.util.stream.Collectors;
import java.util.stream.Stream; import java.util.stream.Stream;
import org.apache.tika.Tika; import org.apache.tika.Tika;
import org.apache.tika.metadata.Metadata; import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext; import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.ParsingReader;
import org.apache.tika.parser.microsoft.OfficeParserConfig;
import org.openide.util.NbBundle; import org.openide.util.NbBundle;
import org.sleuthkit.autopsy.coreutils.Logger; import org.sleuthkit.autopsy.coreutils.Logger;
import org.sleuthkit.datamodel.AbstractFile; import org.sleuthkit.datamodel.AbstractFile;
@ -48,6 +52,8 @@ class TikaTextExtractor extends FileTextExtractor {
static final private Logger logger = Logger.getLogger(TikaTextExtractor.class.getName()); static final private Logger logger = Logger.getLogger(TikaTextExtractor.class.getName());
private final ExecutorService tikaParseExecutor = Executors.newSingleThreadExecutor(); private final ExecutorService tikaParseExecutor = Executors.newSingleThreadExecutor();
private final AutoDetectParser parser = new AutoDetectParser();
private static final List<String> TIKA_SUPPORTED_TYPES private static final List<String> TIKA_SUPPORTED_TYPES
= new Tika().getParser().getSupportedTypes(new ParseContext()) = new Tika().getParser().getSupportedTypes(new ParseContext())
.stream() .stream()
@ -64,8 +70,18 @@ class TikaTextExtractor extends FileTextExtractor {
ReadContentInputStream stream = new ReadContentInputStream(sourceFile); ReadContentInputStream stream = new ReadContentInputStream(sourceFile);
Metadata metadata = new Metadata(); Metadata metadata = new Metadata();
ParseContext parseContext = new ParseContext();
parseContext.set(Parser.class, parser);
// Use the more memory efficient Tika SAX parsers for DOCX and
// PPTX files (it already uses SAX for XLSX).
OfficeParserConfig officeParserConfig = new OfficeParserConfig();
officeParserConfig.setUseSAXPptxExtractor(true);
officeParserConfig.setUseSAXDocxExtractor(true);
parseContext.set(OfficeParserConfig.class, officeParserConfig);
//Parse the file in a task, a convenient way to have a timeout... //Parse the file in a task, a convenient way to have a timeout...
final Future<Reader> future = tikaParseExecutor.submit(() -> new Tika().parse(stream, metadata)); final Future<Reader> future = tikaParseExecutor.submit(() -> new ParsingReader(parser, stream, metadata, parseContext));
try { try {
final Reader tikaReader = future.get(getTimeout(sourceFile.getSize()), TimeUnit.SECONDS); final Reader tikaReader = future.get(getTimeout(sourceFile.getSize()), TimeUnit.SECONDS);