Merge pull request #2869 from millmanorama/2706-regex-hit-count

2706 regex hit count
This commit is contained in:
Richard Cordovano 2017-06-22 11:58:46 -04:00 committed by GitHub
commit 7394ac1244

View File

@ -1,15 +1,15 @@
/* /*
* Autopsy Forensic Browser * Autopsy Forensic Browser
* *
* Copyright 2011-2016 Basis Technology Corp. * Copyright 2011-2017 Basis Technology Corp.
* Contact: carrier <at> sleuthkit <dot> org * Contact: carrier <at> sleuthkit <dot> org
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
* You may obtain a copy of the License at * You may obtain a copy of the License at
* *
* http://www.apache.org/licenses/LICENSE-2.0 * http://www.apache.org/licenses/LICENSE-2.0
* *
* Unless required by applicable law or agreed to in writing, software * Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, * distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -33,6 +33,8 @@ import java.util.Observable;
import java.util.Observer; import java.util.Observer;
import java.util.Set; import java.util.Set;
import java.util.logging.Level; import java.util.logging.Level;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import org.openide.nodes.ChildFactory; import org.openide.nodes.ChildFactory;
import org.openide.nodes.Children; import org.openide.nodes.Children;
import org.openide.nodes.Node; import org.openide.nodes.Node;
@ -68,18 +70,18 @@ public class KeywordHits implements AutopsyVisitableItem {
// String used in the instance MAP so that exact matches and substring can fit into the same // String used in the instance MAP so that exact matches and substring can fit into the same
// data structure as regexps, even though they don't use instances. // data structure as regexps, even though they don't use instances.
private final String DEFAULT_INSTANCE_NAME = "DEFAULT_INSTANCE_NAME"; private final String DEFAULT_INSTANCE_NAME = "DEFAULT_INSTANCE_NAME";
public KeywordHits(SleuthkitCase skCase) { public KeywordHits(SleuthkitCase skCase) {
this.skCase = skCase; this.skCase = skCase;
keywordResults = new KeywordResults(); keywordResults = new KeywordResults();
} }
/* All of these maps and code assume the following:
* Regexps will have an 'instance' layer that shows the specific words that matched the regexp
* Exact match and substring will not have the instance layer and instead will have the specific hits
* below their term.
*/
/*
* All of these maps and code assume the following: Regexps will have an
* 'instance' layer that shows the specific words that matched the regexp
* Exact match and substring will not have the instance layer and instead
* will have the specific hits below their term.
*/
private final class KeywordResults extends Observable { private final class KeywordResults extends Observable {
// Map from listName/Type to Map of keywords/regexp to Map of instance terms to Set of artifact Ids // Map from listName/Type to Map of keywords/regexp to Map of instance terms to Set of artifact Ids
@ -104,11 +106,12 @@ public class KeywordHits implements AutopsyVisitableItem {
} }
/** /**
* Get keywords used in a given list. Will be regexp patterns for regexps * Get keywords used in a given list. Will be regexp patterns for
* and search term for non-regexps. * regexps and search term for non-regexps.
* *
* @param listName Keyword list name * @param listName Keyword list name
* @return *
* @return
*/ */
List<String> getKeywords(String listName) { List<String> getKeywords(String listName) {
List<String> keywords; List<String> keywords;
@ -118,15 +121,16 @@ public class KeywordHits implements AutopsyVisitableItem {
Collections.sort(keywords); Collections.sort(keywords);
return keywords; return keywords;
} }
/** /**
* Get specific keyword terms that were found for a given list * Get specific keyword terms that were found for a given list and
* and keyword combination. For example, a specific phone number for a * keyword combination. For example, a specific phone number for a phone
* phone number regexp. Will be the default instance for non-regexp searches. * number regexp. Will be the default instance for non-regexp searches.
* *
* @param listName Keyword list name * @param listName Keyword list name
* @param keyword search term (regexp pattern or exact match term) * @param keyword search term (regexp pattern or exact match term)
* @return *
* @return
*/ */
List<String> getKeywordInstances(String listName, String keyword) { List<String> getKeywordInstances(String listName, String keyword) {
List<String> instances; List<String> instances;
@ -139,10 +143,14 @@ public class KeywordHits implements AutopsyVisitableItem {
/** /**
* Get artifact ids for a given list, keyword, and instance triple * Get artifact ids for a given list, keyword, and instance triple
* @param listName Keyword list name *
* @param keyword search term (regexp pattern or exact match term) * @param listName Keyword list name
* @param keywordInstance specific term that matched (or default instance name) * @param keyword search term (regexp pattern or exact match
* @return * term)
* @param keywordInstance specific term that matched (or default
* instance name)
*
* @return
*/ */
Set<Long> getArtifactIds(String listName, String keyword, String keywordInstance) { Set<Long> getArtifactIds(String listName, String keyword, String keywordInstance) {
synchronized (topLevelMap) { synchronized (topLevelMap) {
@ -151,11 +159,13 @@ public class KeywordHits implements AutopsyVisitableItem {
} }
/** /**
* Add a hit for a regexp to the internal data structure. * Add a hit for a regexp to the internal data structure.
* @param listMap Maps keywords/regexp to instances to artifact IDs *
* @param regExp Regular expression that was used in search * @param listMap Maps keywords/regexp to instances to artifact
* IDs
* @param regExp Regular expression that was used in search
* @param keywordInstance Specific term that matched regexp * @param keywordInstance Specific term that matched regexp
* @param artifactId Artifact id of file that had hit * @param artifactId Artifact id of file that had hit
*/ */
void addRegExpToList(Map<String, Map<String, Set<Long>>> listMap, String regExp, String keywordInstance, Long artifactId) { void addRegExpToList(Map<String, Map<String, Set<Long>>> listMap, String regExp, String keywordInstance, Long artifactId) {
if (listMap.containsKey(regExp) == false) { if (listMap.containsKey(regExp) == false) {
@ -171,12 +181,13 @@ public class KeywordHits implements AutopsyVisitableItem {
// add this ID to the instance // add this ID to the instance
instanceMap.get(keywordInstance).add(artifactId); instanceMap.get(keywordInstance).add(artifactId);
} }
/** /**
* Add a hit for a exactmatch (or substring) to the internal data structure. * Add a hit for a exactmatch (or substring) to the internal data
* @param listMap Maps keywords/regexp to instances to artifact IDs * structure.
* @param keyWord Term that was hit *
* @param listMap Maps keywords/regexp to instances to artifact IDs
* @param keyWord Term that was hit
* @param artifactId Artifact id of file that had hit * @param artifactId Artifact id of file that had hit
*/ */
void addNonRegExpMatchToList(Map<String, Map<String, Set<Long>>> listMap, String keyWord, Long artifactId) { void addNonRegExpMatchToList(Map<String, Map<String, Set<Long>>> listMap, String keyWord, Long artifactId) {
@ -191,10 +202,13 @@ public class KeywordHits implements AutopsyVisitableItem {
} }
instanceMap.get(DEFAULT_INSTANCE_NAME).add(artifactId); instanceMap.get(DEFAULT_INSTANCE_NAME).add(artifactId);
} }
/** /**
* Populate data structure for the tree based on the keyword hit artifacts * Populate data structure for the tree based on the keyword hit
* @param artifactIds Maps Artifact ID to map of attribute types to attribute values * artifacts
*
* @param artifactIds Maps Artifact ID to map of attribute types to
* attribute values
*/ */
void populateTreeMaps(Map<Long, Map<Long, String>> artifactIds) { void populateTreeMaps(Map<Long, Map<Long, String>> artifactIds) {
synchronized (topLevelMap) { synchronized (topLevelMap) {
@ -223,7 +237,7 @@ public class KeywordHits implements AutopsyVisitableItem {
String reg = attributes.get(Long.valueOf(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP.getTypeID())); String reg = attributes.get(Long.valueOf(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP.getTypeID()));
// new in 4.4 // new in 4.4
String kwType = attributes.get(Long.valueOf(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE.getTypeID())); String kwType = attributes.get(Long.valueOf(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE.getTypeID()));
// part of a list // part of a list
if (listName != null) { if (listName != null) {
// get or create list entry // get or create list entry
@ -231,7 +245,7 @@ public class KeywordHits implements AutopsyVisitableItem {
listsMap.put(listName, new LinkedHashMap<>()); listsMap.put(listName, new LinkedHashMap<>());
} }
Map<String, Map<String, Set<Long>>> listMap = listsMap.get(listName); Map<String, Map<String, Set<Long>>> listMap = listsMap.get(listName);
// substring, treated same as exact match // substring, treated same as exact match
// Enum for "1" is defined in KeywordSearch.java // Enum for "1" is defined in KeywordSearch.java
if ((kwType != null) && (kwType.equals("1"))) { if ((kwType != null) && (kwType.equals("1"))) {
@ -241,8 +255,7 @@ public class KeywordHits implements AutopsyVisitableItem {
} else { } else {
addNonRegExpMatchToList(listMap, word, id); addNonRegExpMatchToList(listMap, word, id);
} }
} } else if (reg != null) {
else if (reg != null) {
addRegExpToList(listMap, reg, word, id); addRegExpToList(listMap, reg, word, id);
} else { } else {
addNonRegExpMatchToList(listMap, word, id); addNonRegExpMatchToList(listMap, word, id);
@ -259,11 +272,11 @@ public class KeywordHits implements AutopsyVisitableItem {
} // literal, single term } // literal, single term
else { else {
addNonRegExpMatchToList(literalMap, word, id); addNonRegExpMatchToList(literalMap, word, id);
} }
} }
topLevelMap.putAll(listsMap); topLevelMap.putAll(listsMap);
} }
setChanged(); setChanged();
notifyObservers(); notifyObservers();
} }
@ -299,9 +312,9 @@ public class KeywordHits implements AutopsyVisitableItem {
long artifactId = resultSet.getLong("artifact_id"); //NON-NLS long artifactId = resultSet.getLong("artifact_id"); //NON-NLS
long typeId = resultSet.getLong("attribute_type_id"); //NON-NLS long typeId = resultSet.getLong("attribute_type_id"); //NON-NLS
if (!artifactIds.containsKey(artifactId)) { if (!artifactIds.containsKey(artifactId)) {
artifactIds.put(artifactId, new LinkedHashMap<Long, String>()); artifactIds.put(artifactId, new LinkedHashMap<>());
} }
if (valueStr != null && !valueStr.equals("")) { if (StringUtils.isNotEmpty(valueStr)) {
artifactIds.get(artifactId).put(typeId, valueStr); artifactIds.get(artifactId).put(typeId, valueStr);
} else { } else {
// Keyword Search Type is an int // Keyword Search Type is an int
@ -366,7 +379,7 @@ public class KeywordHits implements AutopsyVisitableItem {
} }
/** /**
* Creates the list nodes * Creates the list nodes
*/ */
private class ListFactory extends ChildFactory.Detachable<String> implements Observer { private class ListFactory extends ChildFactory.Detachable<String> implements Observer {
@ -458,8 +471,9 @@ public class KeywordHits implements AutopsyVisitableItem {
} }
} }
/** /**
* Represents the keyword search lists (or default groupings if list was not given) * Represents the keyword search lists (or default groupings if list was not
* given)
*/ */
public class ListNode extends DisplayableItemNode implements Observer { public class ListNode extends DisplayableItemNode implements Observer {
@ -583,17 +597,17 @@ public class KeywordHits implements AutopsyVisitableItem {
this.setIconBaseWithExtension("org/sleuthkit/autopsy/images/keyword_hits.png"); //NON-NLS this.setIconBaseWithExtension("org/sleuthkit/autopsy/images/keyword_hits.png"); //NON-NLS
updateDisplayName(); updateDisplayName();
keywordResults.addObserver(this); keywordResults.addObserver(this);
} }
private void updateDisplayName() { private void updateDisplayName() {
int totalDescendants = 0; super.setDisplayName(keyword + " (" + countTotalDescendants() + ")");
}
for (String instance : keywordResults.getKeywordInstances(setName, keyword)) {
Set<Long> ids = keywordResults.getArtifactIds(setName, keyword, instance); private int countTotalDescendants() {
totalDescendants += ids.size(); return keywordResults.getKeywordInstances(setName, keyword).stream()
} .mapToInt(instance -> keywordResults.getArtifactIds(setName, keyword, instance).size())
.sum();
super.setDisplayName(keyword + " (" + totalDescendants + ")");
} }
@Override @Override
@ -605,12 +619,7 @@ public class KeywordHits implements AutopsyVisitableItem {
public boolean isLeafTypeNode() { public boolean isLeafTypeNode() {
List<String> instances = keywordResults.getKeywordInstances(setName, keyword); List<String> instances = keywordResults.getKeywordInstances(setName, keyword);
// is this an exact/substring match (i.e. did we use the DEFAULT name)? // is this an exact/substring match (i.e. did we use the DEFAULT name)?
if (instances.size() == 1 && instances.get(0).equals(DEFAULT_INSTANCE_NAME)) { return instances.size() == 1 && instances.get(0).equals(DEFAULT_INSTANCE_NAME);
return true;
}
else {
return false;
}
} }
@Override @Override
@ -635,7 +644,7 @@ public class KeywordHits implements AutopsyVisitableItem {
ss.put(new NodeProperty<>(NbBundle.getMessage(this.getClass(), "KeywordHits.createSheet.filesWithHits.name"), ss.put(new NodeProperty<>(NbBundle.getMessage(this.getClass(), "KeywordHits.createSheet.filesWithHits.name"),
NbBundle.getMessage(this.getClass(), "KeywordHits.createSheet.filesWithHits.displayName"), NbBundle.getMessage(this.getClass(), "KeywordHits.createSheet.filesWithHits.displayName"),
NbBundle.getMessage(this.getClass(), "KeywordHits.createSheet.filesWithHits.desc"), NbBundle.getMessage(this.getClass(), "KeywordHits.createSheet.filesWithHits.desc"),
keywordResults.getKeywordInstances(setName, keyword).size())); countTotalDescendants()));
return s; return s;
} }
@ -645,42 +654,50 @@ public class KeywordHits implements AutopsyVisitableItem {
return getClass().getName(); return getClass().getName();
} }
} }
// Allows us to pass in either longs or strings // Allows us to pass in either longs or strings
// as they keys for different types of nodes at the // as they keys for different types of nodes at the
// same level. Probably a better way to do this, but // same level. Probably a better way to do this, but
// it works. // it works.
class RegExpInstanceKey { private class RegExpInstanceKey {
private final boolean isRegExp; private final boolean isRegExp;
private String strKey; private String strKey;
private Long longKey; private Long longKey;
public RegExpInstanceKey(String key) {
RegExpInstanceKey(String key) {
isRegExp = true; isRegExp = true;
strKey = key; strKey = key;
} }
public RegExpInstanceKey(Long key) {
RegExpInstanceKey(Long key) {
isRegExp = false; isRegExp = false;
longKey = key; longKey = key;
} }
boolean isRegExp() { boolean isRegExp() {
return isRegExp; return isRegExp;
} }
Long getIdKey() { Long getIdKey() {
return longKey; return longKey;
} }
String getRegExpKey() { String getRegExpKey() {
return strKey; return strKey;
} }
} }
/** /**
* Creates the nodes for a given regexp that represent the specific terms that were found * Creates the nodes for a given regexp that represent the specific terms
* that were found
*/ */
public class RegExpInstancesFactory extends ChildFactory.Detachable<RegExpInstanceKey> implements Observer { public class RegExpInstancesFactory extends ChildFactory.Detachable<RegExpInstanceKey> implements Observer {
private final String keyword; private final String keyword;
private final String setName; private final String setName;
private Map<RegExpInstanceKey, DisplayableItemNode > nodesMap = new HashMap<>(); private final Map<RegExpInstanceKey, DisplayableItemNode> nodesMap = new HashMap<>();
public RegExpInstancesFactory(String setName, String keyword) { public RegExpInstancesFactory(String setName, String keyword) {
super(); super();
@ -700,15 +717,15 @@ public class KeywordHits implements AutopsyVisitableItem {
@Override @Override
protected boolean createKeys(List<RegExpInstanceKey> list) { protected boolean createKeys(List<RegExpInstanceKey> list) {
List <String>instances = keywordResults.getKeywordInstances(setName, keyword); List<String> instances = keywordResults.getKeywordInstances(setName, keyword);
// The keys are different depending on what we are displaying. // The keys are different depending on what we are displaying.
// regexp get another layer to show instances. // regexp get another layer to show instances.
// Exact/substring matches don't. // Exact/substring matches don't.
if ((instances.size() == 1) && (instances.get(0).equals(DEFAULT_INSTANCE_NAME))) { if ((instances.size() == 1) && (instances.get(0).equals(DEFAULT_INSTANCE_NAME))) {
for (Long id : keywordResults.getArtifactIds(setName, keyword, DEFAULT_INSTANCE_NAME) ) { for (Long id : keywordResults.getArtifactIds(setName, keyword, DEFAULT_INSTANCE_NAME)) {
RegExpInstanceKey key = new RegExpInstanceKey(id); RegExpInstanceKey key = new RegExpInstanceKey(id);
if (!nodesMap.containsKey(key)) { if (!nodesMap.containsKey(key)) {
nodesMap.put(key, createNode(key)); nodesMap.put(key, createNode(key));
} }
list.add(key); list.add(key);
} }
@ -720,7 +737,7 @@ public class KeywordHits implements AutopsyVisitableItem {
} }
list.add(key); list.add(key);
} }
} }
return true; return true;
} }
@ -731,23 +748,24 @@ public class KeywordHits implements AutopsyVisitableItem {
} }
private DisplayableItemNode createNode(RegExpInstanceKey key) { private DisplayableItemNode createNode(RegExpInstanceKey key) {
// if it isn't a regexp, then skip the 'instance' layer of the tree // if it isn't a regexp, then skip the 'instance' layer of the tree
if (key.isRegExp() == false) { if (key.isRegExp() == false) {
return createBlackboardArtifactNode(key.getIdKey()); return createBlackboardArtifactNode(key.getIdKey());
} else { } else {
return new RegExpInstanceNode(setName, keyword, key.getRegExpKey()); return new RegExpInstanceNode(setName, keyword, key.getRegExpKey());
} }
} }
@Override @Override
public void update(Observable o, Object arg) { public void update(Observable o, Object arg) {
refresh(true); refresh(true);
} }
} }
/** /**
* Represents a specific term that was found from a regexp * Represents a specific term that was found from a regexp
*/ */
public class RegExpInstanceNode extends DisplayableItemNode implements Observer { public class RegExpInstanceNode extends DisplayableItemNode implements Observer {
@ -756,7 +774,7 @@ public class KeywordHits implements AutopsyVisitableItem {
private final String instance; private final String instance;
public RegExpInstanceNode(String setName, String keyword, String instance) { public RegExpInstanceNode(String setName, String keyword, String instance) {
super(Children.create(new HitsFactory(setName, keyword, instance), true), Lookups.singleton(keyword)); super(Children.create(new HitsFactory(setName, keyword, instance), true), Lookups.singleton(instance));
super.setName(instance); //the instance represents the name of the keyword hit at this point as the keyword is the regex super.setName(instance); //the instance represents the name of the keyword hit at this point as the keyword is the regex
this.setName = setName; this.setName = setName;
this.keyword = keyword; this.keyword = keyword;
@ -803,7 +821,7 @@ public class KeywordHits implements AutopsyVisitableItem {
ss.put(new NodeProperty<>(NbBundle.getMessage(this.getClass(), "KeywordHits.createSheet.filesWithHits.name"), ss.put(new NodeProperty<>(NbBundle.getMessage(this.getClass(), "KeywordHits.createSheet.filesWithHits.name"),
NbBundle.getMessage(this.getClass(), "KeywordHits.createSheet.filesWithHits.displayName"), NbBundle.getMessage(this.getClass(), "KeywordHits.createSheet.filesWithHits.displayName"),
NbBundle.getMessage(this.getClass(), "KeywordHits.createSheet.filesWithHits.desc"), NbBundle.getMessage(this.getClass(), "KeywordHits.createSheet.filesWithHits.desc"),
keywordResults.getKeywordInstances(setName, keyword).size())); keywordResults.getArtifactIds(setName, keyword, instance).size()));
return s; return s;
} }
@ -814,12 +832,14 @@ public class KeywordHits implements AutopsyVisitableItem {
} }
} }
/** /**
* Create a blackboard node for the given Keyword Hit artifact * Create a blackboard node for the given Keyword Hit artifact
*
* @param artifactId * @param artifactId
*
* @return Node or null on error * @return Node or null on error
*/ */
private BlackboardArtifactNode createBlackboardArtifactNode (Long artifactId) { private BlackboardArtifactNode createBlackboardArtifactNode(Long artifactId) {
if (skCase == null) { if (skCase == null) {
return null; return null;
} }
@ -869,7 +889,7 @@ public class KeywordHits implements AutopsyVisitableItem {
} }
return null; return null;
} }
/** /**
* Creates nodes for individual files that had hits * Creates nodes for individual files that had hits
*/ */
@ -878,8 +898,8 @@ public class KeywordHits implements AutopsyVisitableItem {
private final String keyword; private final String keyword;
private final String setName; private final String setName;
private final String instance; private final String instance;
private Map<Long, BlackboardArtifactNode > nodesMap = new HashMap<>(); private final Map<Long, BlackboardArtifactNode> nodesMap = new HashMap<>();
public HitsFactory(String setName, String keyword, String instance) { public HitsFactory(String setName, String keyword, String instance) {
super(); super();
@ -900,11 +920,11 @@ public class KeywordHits implements AutopsyVisitableItem {
@Override @Override
protected boolean createKeys(List<Long> list) { protected boolean createKeys(List<Long> list) {
for (Long id : keywordResults.getArtifactIds(setName, keyword, instance) ) { for (Long id : keywordResults.getArtifactIds(setName, keyword, instance)) {
if (!nodesMap.containsKey(id)) { if (!nodesMap.containsKey(id)) {
nodesMap.put(id, createBlackboardArtifactNode(id)); nodesMap.put(id, createBlackboardArtifactNode(id));
} }
list.add(id); list.add(id);
} }
return true; return true;
} }