Merge remote-tracking branch 'upstream/custom-release-2.11.0' into develop

This commit is contained in:
Richard Cordovano 2018-01-18 10:56:06 -05:00
commit a49043d0ab
3 changed files with 74 additions and 40 deletions

View File

@ -51,6 +51,7 @@ import org.apache.tika.mime.MimeTypeException;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.microsoft.OfficeParserConfig;
import org.apache.tika.sax.BodyContentHandler;
import org.openide.util.NbBundle;
import org.sleuthkit.autopsy.casemodule.Case;
@ -231,11 +232,13 @@ class MSOfficeEmbeddedContentExtractor {
// write limit (which defaults to 100,000 characters.
ContentHandler contentHandler = new BodyContentHandler(-1);
// TODO: this will be needed once we upgrade to Tika 1.16 or later.
// OfficeParserConfig officeParserConfig = new OfficeParserConfig();
// officeParserConfig.setUseSAXPptxExtractor(true);
// officeParserConfig.setUseSAXDocxExtractor(true);
// parseContext.set(OfficeParserConfig.class, officeParserConfig);
// Use the more memory efficient Tika SAX parsers for DOCX and
// PPTX files (it already uses SAX for XLSX).
OfficeParserConfig officeParserConfig = new OfficeParserConfig();
officeParserConfig.setUseSAXPptxExtractor(true);
officeParserConfig.setUseSAXDocxExtractor(true);
parseContext.set(OfficeParserConfig.class, officeParserConfig);
EmbeddedDocumentExtractor extractor = new EmbeddedContentExtractor(parseContext);
parseContext.set(EmbeddedDocumentExtractor.class, extractor);
ReadContentInputStream stream = new ReadContentInputStream(abstractFile);

View File

@ -1,7 +1,7 @@
/*
* Autopsy Forensic Browser
*
* Copyright 2011-2017 Basis Technology Corp.
* Copyright 2011-2018 Basis Technology Corp.
* Contact: carrier <at> sleuthkit <dot> org
*
* Licensed under the Apache License, Version 2.0 (the "License");
@ -44,6 +44,7 @@ import org.sleuthkit.autopsy.coreutils.MessageNotifyUtil;
import org.sleuthkit.autopsy.datamodel.AbstractAbstractFileNode;
import static org.sleuthkit.autopsy.datamodel.AbstractAbstractFileNode.AbstractFilePropertyType.LOCATION;
import org.sleuthkit.autopsy.datamodel.AbstractFsContentNode;
import org.sleuthkit.autopsy.datamodel.EmptyNode;
import org.sleuthkit.autopsy.datamodel.KeyValue;
import org.sleuthkit.autopsy.datamodel.KeyValueNode;
import org.sleuthkit.autopsy.keywordsearch.KeywordSearchResultFactory.KeyValueQueryContent;
@ -63,21 +64,21 @@ import org.sleuthkit.datamodel.TskCoreException;
* Responsible for assembling nodes and columns in the right way and performing
* lazy queries as needed.
*/
class KeywordSearchResultFactory extends ChildFactory<KeyValueQueryContent> {
class KeywordSearchResultFactory extends ChildFactory<KeyValue> {
private static final Logger logger = Logger.getLogger(KeywordSearchResultFactory.class.getName());
private static final Logger LOGGER = Logger.getLogger(KeywordSearchResultFactory.class.getName());
//common properties (superset of all Node properties) to be displayed as columns
static final List<String> COMMON_PROPERTIES =
Stream.concat(
static final List<String> COMMON_PROPERTIES
= Stream.concat(
Stream.of(
TSK_KEYWORD,
TSK_KEYWORD_REGEXP,
TSK_KEYWORD_PREVIEW)
.map(BlackboardAttribute.ATTRIBUTE_TYPE::getDisplayName),
.map(BlackboardAttribute.ATTRIBUTE_TYPE::getDisplayName),
Arrays.stream(AbstractAbstractFileNode.AbstractFilePropertyType.values())
.map(Object::toString))
.collect(Collectors.toList());
.map(Object::toString))
.collect(Collectors.toList());
private final Collection<QueryRequest> queryRequests;
@ -93,7 +94,7 @@ class KeywordSearchResultFactory extends ChildFactory<KeyValueQueryContent> {
* @param toPopulate property set map for a Node
*/
@Override
protected boolean createKeys(List<KeyValueQueryContent> toPopulate) {
protected boolean createKeys(List<KeyValue> toPopulate) {
for (QueryRequest queryRequest : queryRequests) {
/**
@ -130,7 +131,7 @@ class KeywordSearchResultFactory extends ChildFactory<KeyValueQueryContent> {
* @return
*/
@NbBundle.Messages({"KeywordSearchResultFactory.query.exception.msg=Could not perform the query "})
private boolean createFlatKeys(KeywordSearchQuery queryRequest, List<KeyValueQueryContent> toPopulate) {
private boolean createFlatKeys(KeywordSearchQuery queryRequest, List<KeyValue> toPopulate) {
/**
* Execute the requested query.
@ -139,15 +140,15 @@ class KeywordSearchResultFactory extends ChildFactory<KeyValueQueryContent> {
try {
queryResults = queryRequest.performQuery();
} catch (KeywordSearchModuleException | NoOpenCoreException ex) {
logger.log(Level.SEVERE, "Could not perform the query " + queryRequest.getQueryString(), ex); //NON-NLS
LOGGER.log(Level.SEVERE, "Could not perform the query " + queryRequest.getQueryString(), ex); //NON-NLS
MessageNotifyUtil.Notify.error(Bundle.KeywordSearchResultFactory_query_exception_msg() + queryRequest.getQueryString(), ex.getCause().getMessage());
return false;
}
SleuthkitCase tskCase = null;
SleuthkitCase tskCase;
try {
tskCase = Case.getCurrentCase().getSleuthkitCase();
} catch (IllegalStateException ex) {
logger.log(Level.SEVERE, "There was no case open.", ex); //NON-NLS
LOGGER.log(Level.SEVERE, "There was no case open.", ex); //NON-NLS
return false;
}
@ -159,16 +160,16 @@ class KeywordSearchResultFactory extends ChildFactory<KeyValueQueryContent> {
* Get file properties.
*/
Map<String, Object> properties = new LinkedHashMap<>();
Content content = null;
String contentName = "";
Content content;
String contentName;
try {
content = tskCase.getContentById(hit.getContentID());
if (content == null) {
logger.log(Level.SEVERE, "There was a error getting content by id."); //NON-NLS
LOGGER.log(Level.SEVERE, "There was a error getting content by id."); //NON-NLS
return false;
}
} catch (TskCoreException ex) {
logger.log(Level.SEVERE, "There was a error getting content by id.", ex); //NON-NLS
LOGGER.log(Level.SEVERE, "There was a error getting content by id.", ex); //NON-NLS
return false;
}
@ -191,7 +192,7 @@ class KeywordSearchResultFactory extends ChildFactory<KeyValueQueryContent> {
try {
hitName = tskCase.getBlackboardArtifact(hit.getArtifactID().get()).getDisplayName() + " Artifact"; //NON-NLS
} catch (TskCoreException ex) {
logger.log(Level.SEVERE, "Error getting blckboard artifact by id", ex);
LOGGER.log(Level.SEVERE, "Error getting blckboard artifact by id", ex);
return false;
}
} else {
@ -202,9 +203,13 @@ class KeywordSearchResultFactory extends ChildFactory<KeyValueQueryContent> {
}
// Add all the nodes to toPopulate at once. Minimizes node creation
// EDT threads, which can slow and/or hang the UI on large queries.
toPopulate.addAll(tempList);
if (hitNumber == 0) {
toPopulate.add(new KeyValue("This KeyValue Is Empty", 0));
} else {
// Add all the nodes to toPopulate at once. Minimizes node creation
// EDT threads, which can slow and/or hang the UI on large queries.
toPopulate.addAll(tempList);
}
//write to bb
//cannot reuse snippet in BlackboardResultWriter
@ -239,15 +244,25 @@ class KeywordSearchResultFactory extends ChildFactory<KeyValueQueryContent> {
return hits.values();
}
@NbBundle.Messages({"KeywordSearchResultFactory.createNodeForKey.noResultsFound.text=No results found."})
@Override
protected Node createNodeForKey(KeyValueQueryContent key) {
final Content content = key.getContent();
QueryResults hits = key.getHits();
protected Node createNodeForKey(KeyValue key) {
Node resultNode;
Node kvNode = new KeyValueNode(key, Children.LEAF, Lookups.singleton(content));
if (key instanceof KeyValueQueryContent) {
final Content content = ((KeyValueQueryContent) key).getContent();
QueryResults hits = ((KeyValueQueryContent) key).getHits();
//wrap in KeywordSearchFilterNode for the markup content, might need to override FilterNode for more customization
return new KeywordSearchFilterNode(hits, kvNode);
Node kvNode = new KeyValueNode(key, Children.LEAF, Lookups.singleton(content));
//wrap in KeywordSearchFilterNode for the markup content, might need to override FilterNode for more customization
resultNode = new KeywordSearchFilterNode(hits, kvNode);
} else {
resultNode = new EmptyNode("This Node Is Empty");
resultNode.setDisplayName(NbBundle.getMessage(this.getClass(), "KeywordSearchResultFactory.createNodeForKey.noResultsFound.text"));
}
return resultNode;
}
@ -308,7 +323,7 @@ class KeywordSearchResultFactory extends ChildFactory<KeyValueQueryContent> {
*/
static class BlackboardResultWriter extends SwingWorker<Void, Void> {
private static final List<BlackboardResultWriter> writers = new ArrayList<>();
private static final List<BlackboardResultWriter> WRITERS = new ArrayList<>();
private ProgressHandle progress;
private final KeywordSearchQuery query;
private final QueryResults hits;
@ -343,24 +358,24 @@ class KeywordSearchResultFactory extends ChildFactory<KeyValueQueryContent> {
try {
get();
} catch (InterruptedException | CancellationException ex) {
logger.log(Level.WARNING, "User cancelled writing of ad hoc search query results for '{0}' to the blackboard", query.getQueryString()); //NON-NLS
LOGGER.log(Level.WARNING, "User cancelled writing of ad hoc search query results for '{0}' to the blackboard", query.getQueryString()); //NON-NLS
} catch (ExecutionException ex) {
logger.log(Level.SEVERE, "Error writing of ad hoc search query results for " + query.getQueryString() + " to the blackboard", ex); //NON-NLS
LOGGER.log(Level.SEVERE, "Error writing of ad hoc search query results for " + query.getQueryString() + " to the blackboard", ex); //NON-NLS
}
}
private static synchronized void registerWriter(BlackboardResultWriter writer) {
writers.add(writer);
WRITERS.add(writer);
}
private static synchronized void deregisterWriter(BlackboardResultWriter writer) {
writers.remove(writer);
WRITERS.remove(writer);
}
static synchronized void stopAllWriters() {
for (BlackboardResultWriter w : writers) {
for (BlackboardResultWriter w : WRITERS) {
w.cancel(true);
writers.remove(w);
WRITERS.remove(w);
}
}
}

View File

@ -33,7 +33,11 @@ import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.tika.Tika;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.ParsingReader;
import org.apache.tika.parser.microsoft.OfficeParserConfig;
import org.openide.util.NbBundle;
import org.sleuthkit.autopsy.coreutils.Logger;
import org.sleuthkit.datamodel.AbstractFile;
@ -48,6 +52,8 @@ class TikaTextExtractor extends FileTextExtractor {
static final private Logger logger = Logger.getLogger(TikaTextExtractor.class.getName());
private final ExecutorService tikaParseExecutor = Executors.newSingleThreadExecutor();
private final AutoDetectParser parser = new AutoDetectParser();
private static final List<String> TIKA_SUPPORTED_TYPES
= new Tika().getParser().getSupportedTypes(new ParseContext())
.stream()
@ -64,8 +70,18 @@ class TikaTextExtractor extends FileTextExtractor {
ReadContentInputStream stream = new ReadContentInputStream(sourceFile);
Metadata metadata = new Metadata();
ParseContext parseContext = new ParseContext();
parseContext.set(Parser.class, parser);
// Use the more memory efficient Tika SAX parsers for DOCX and
// PPTX files (it already uses SAX for XLSX).
OfficeParserConfig officeParserConfig = new OfficeParserConfig();
officeParserConfig.setUseSAXPptxExtractor(true);
officeParserConfig.setUseSAXDocxExtractor(true);
parseContext.set(OfficeParserConfig.class, officeParserConfig);
//Parse the file in a task, a convenient way to have a timeout...
final Future<Reader> future = tikaParseExecutor.submit(() -> new Tika().parse(stream, metadata));
final Future<Reader> future = tikaParseExecutor.submit(() -> new ParsingReader(parser, stream, metadata, parseContext));
try {
final Reader tikaReader = future.get(getTimeout(sourceFile.getSize()), TimeUnit.SECONDS);