mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-06 21:00:22 +00:00
Store content_str field as lowercase to support case insensitive regex search. Convert substring search to regex.
This commit is contained in:
parent
9ad3b9cee9
commit
4db94a7965
@ -45,7 +45,7 @@
|
||||
that avoids logging every request
|
||||
-->
|
||||
|
||||
<schema name="Autopsy Keyword Search" version="2.0">
|
||||
<schema name="Autopsy Keyword Search" version="2.1">
|
||||
<!-- attribute "name" is the name of this schema and is only used for display purposes.
|
||||
Applications should change this to reflect the nature of the search collection.
|
||||
version="1.4" is Solr's version number for the schema syntax and semantics. It should
|
||||
@ -60,6 +60,8 @@
|
||||
1.7 added _version_ field for Solr Cloud
|
||||
1.8 added new content_str string field and stopped copying content and file_name into content_ws
|
||||
2.0 added chunk_size field
|
||||
2.1 to facilitate case insensitive regex search,no longer copying content into content_str.
|
||||
content_str will be populated with lowercase content by Autopsy.
|
||||
-->
|
||||
|
||||
<types>
|
||||
@ -649,10 +651,15 @@
|
||||
<copyField source="file_name" dest="content_ws"/>
|
||||
-->
|
||||
|
||||
<!-- Copy content into content_str field for regex searches -->
|
||||
<copyField source="content" dest="content_str"/>
|
||||
<!-- Copying of content into content_str field has been removed
|
||||
so that we can perform case insensitive regex searches.
|
||||
The content_str field will be populated directly by Autopsy
|
||||
|
||||
copyField source="content" dest="content_str"/ -->
|
||||
|
||||
<!-- Copy file_name into content_str field for regex searches -->
|
||||
<copyField source="file_name" dest="content_str"/>
|
||||
|
||||
|
||||
<!-- Above, multiple source fields are copied to the [text] field.
|
||||
Another way to map multiple source fields to the same
|
||||
destination field is to use the dynamic field syntax.
|
||||
|
@ -357,11 +357,7 @@ class HighlightedText implements IndexedText {
|
||||
}
|
||||
final String filterQuery = Server.Schema.ID.toString() + ":" + KeywordSearchUtil.escapeLuceneQuery(contentIdStr);
|
||||
|
||||
double indexSchemaVersion = NumberUtils.toDouble(solrServer.getIndexInfo().getSchemaVersion());
|
||||
//choose field to highlight based on isLiteral and Solr index schema version.
|
||||
highlightField = (isLiteral || (indexSchemaVersion < 2.0))
|
||||
? LuceneQuery.HIGHLIGHT_FIELD
|
||||
: Server.Schema.CONTENT_STR.toString();
|
||||
highlightField = LuceneQuery.HIGHLIGHT_FIELD;
|
||||
if (isLiteral) {
|
||||
//if the query is literal try to get solr to do the highlighting
|
||||
final String highlightQuery = keywords.stream()
|
||||
|
@ -39,7 +39,7 @@ class IndexFinder {
|
||||
private static final String KWS_DATA_FOLDER_NAME = "data";
|
||||
private static final String INDEX_FOLDER_NAME = "index";
|
||||
private static final String CURRENT_SOLR_VERSION = "4";
|
||||
private static final String CURRENT_SOLR_SCHEMA_VERSION = "2.0";
|
||||
private static final String CURRENT_SOLR_SCHEMA_VERSION = "2.1";
|
||||
|
||||
static String getCurrentSolrVersion() {
|
||||
return CURRENT_SOLR_VERSION;
|
||||
|
@ -22,6 +22,7 @@ import java.io.BufferedReader;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.logging.Level;
|
||||
import org.apache.commons.lang3.math.NumberUtils;
|
||||
import org.apache.solr.client.solrj.SolrServerException;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.openide.util.NbBundle;
|
||||
@ -208,7 +209,7 @@ class Ingester {
|
||||
* /update handler e.g. with XMLUpdateRequestHandler (deprecated in SOlr
|
||||
* 4.0.0), see if possible to stream with UpdateRequestHandler
|
||||
*
|
||||
* @param chunk The chunk content as a string
|
||||
* @param chunk The chunk content as a string, or null for metadata only
|
||||
* @param fields
|
||||
* @param size
|
||||
*
|
||||
@ -231,13 +232,23 @@ class Ingester {
|
||||
for (String key : fields.keySet()) {
|
||||
updateDoc.addField(key, fields.get(key));
|
||||
}
|
||||
//add the content to the SolrInputDocument
|
||||
//JMTODO: can we just add it to the field map before passing that in?
|
||||
updateDoc.addField(Server.Schema.CONTENT.toString(), chunk);
|
||||
|
||||
try {
|
||||
//TODO: consider timeout thread, or vary socket timeout based on size of indexed content
|
||||
|
||||
//add the content to the SolrInputDocument
|
||||
//JMTODO: can we just add it to the field map before passing that in?
|
||||
updateDoc.addField(Server.Schema.CONTENT.toString(), chunk);
|
||||
|
||||
// We also add the content (if present) in lowercase form to facilitate case
|
||||
// insensitive substring/regular expression search.
|
||||
double indexSchemaVersion = NumberUtils.toDouble(solrServer.getIndexInfo().getSchemaVersion());
|
||||
if (indexSchemaVersion >= 2.1) {
|
||||
updateDoc.addField(Server.Schema.CONTENT_STR.toString(), ((chunk == null) ? "" : chunk.toLowerCase()));
|
||||
}
|
||||
|
||||
TimingMetric metric = HealthMonitor.getTimingMetric("Solr: Index chunk");
|
||||
|
||||
solrServer.addDocument(updateDoc);
|
||||
HealthMonitor.submitTimingMetric(metric);
|
||||
uncommitedIngests = true;
|
||||
|
@ -44,11 +44,11 @@ abstract class KeywordSearchList {
|
||||
// positives for phone numbers and IP addresses. They don't work as well
|
||||
// for "string" types such as URLs since the characters are more likely to
|
||||
// appear in the resulting hit.
|
||||
private static final String BOUNDARY_CHARACTERS = "[ \t\r\n\\.\\-\\?\\,\\;\\\\!\\:\\[\\]\\/\\(\\)\\\"\\\'\\>\\{\\}]";
|
||||
static final String BOUNDARY_CHARACTERS = "[ \t\r\n\\.\\-\\?\\,\\;\\\\!\\:\\[\\]\\/\\(\\)\\\"\\\'\\>\\{\\}]";
|
||||
private static final String PHONE_NUMBER_REGEX = BOUNDARY_CHARACTERS + "(\\([0-9]{3}\\)|[0-9]{3})([ \\-\\.])[0-9]{3}([ \\-\\.])[0-9]{4}" + BOUNDARY_CHARACTERS; //NON-NLS
|
||||
private static final String IP_ADDRESS_REGEX = BOUNDARY_CHARACTERS + "(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\.){3}(1[0-9]{2}|2[0-4][0-9]|25[0-5]|[1-9][0-9]|[0-9])" + BOUNDARY_CHARACTERS; //NON-NLS
|
||||
private static final String EMAIL_ADDRESS_REGEX = "(\\{?)[a-zA-Z0-9%+_\\-]+(\\.[a-zA-Z0-9%+_\\-]+)*(\\}?)\\@([a-zA-Z0-9]([a-zA-Z0-9\\-]*[a-zA-Z0-9])?\\.)+[a-zA-Z]{2,4}"; //NON-NLS
|
||||
private static final String URL_REGEX = "(((((h|H)(t|T))|(f|F))(t|T)(p|P)(s|S?)\\:\\/\\/)|(w|W){3,3}\\.)[a-zA-Z0-9\\-\\.]+\\.([a-zA-Z]{2,5})(\\:[0-9]+)*(\\/($|[a-zA-Z0-9\\.\\,\\;\\?\\'\\\\+&%\\$#\\=~_\\-]+))*"; //NON-NLS
|
||||
private static final String EMAIL_ADDRESS_REGEX = "(\\{?)[a-z0-9%+_\\-]+(\\.[a-z0-9%+_\\-]+)*(\\}?)\\@([a-z0-9]([a-z0-9\\-]*[a-z0-9])?\\.)+[a-z]{2,4}"; //NON-NLS
|
||||
private static final String URL_REGEX = "(((((h)(t))|(f))(t)(p)(s?)\\:\\/\\/)|(w){3,3}\\.)[a-z0-9\\-\\.]+\\.([a-z]{2,5})(\\:[0-9]+)*(\\/($|[a-z0-9\\.\\,\\;\\?\\'\\\\+&%\\$#\\=~_\\-]+))*"; //NON-NLS
|
||||
|
||||
/**
|
||||
* 12-19 digits, with possible single spaces or dashes in between,
|
||||
|
@ -129,20 +129,16 @@ class KeywordSearchUtil {
|
||||
|
||||
static KeywordSearchQuery getQueryForKeyword(Keyword keyword, KeywordList keywordList) {
|
||||
KeywordSearchQuery query = null;
|
||||
if (keyword.searchTermIsLiteral()) {
|
||||
if (keyword.searchTermIsLiteral() && keyword.searchTermIsWholeWord()) {
|
||||
// literal, exact match
|
||||
if (keyword.searchTermIsWholeWord()) {
|
||||
query = new LuceneQuery(keywordList, keyword);
|
||||
query.escape();
|
||||
} // literal, substring match
|
||||
else {
|
||||
query = new TermsComponentQuery(keywordList, keyword);
|
||||
query.escape();
|
||||
query.setSubstringQuery();
|
||||
}
|
||||
} // regexp
|
||||
query = new LuceneQuery(keywordList, keyword);
|
||||
query.escape();
|
||||
} // regexp and literal substring match
|
||||
else {
|
||||
query = new RegexQuery(keywordList, keyword);
|
||||
if (keyword.searchTermIsLiteral()) {
|
||||
query.escape();
|
||||
}
|
||||
}
|
||||
return query;
|
||||
}
|
||||
|
@ -36,7 +36,6 @@ import org.apache.solr.client.solrj.response.QueryResponse;
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.common.SolrDocumentList;
|
||||
import org.apache.solr.common.params.CursorMarkParams;
|
||||
import org.openide.util.Exceptions;
|
||||
import org.openide.util.NbBundle;
|
||||
import org.sleuthkit.autopsy.casemodule.Case;
|
||||
import org.sleuthkit.autopsy.casemodule.NoCurrentCaseException;
|
||||
@ -44,9 +43,6 @@ import org.sleuthkit.autopsy.coreutils.Logger;
|
||||
import org.sleuthkit.autopsy.coreutils.MessageNotifyUtil;
|
||||
import org.sleuthkit.autopsy.datamodel.CreditCards;
|
||||
import static org.sleuthkit.autopsy.keywordsearch.KeywordSearchSettings.MODULE_NAME;
|
||||
import static org.sleuthkit.autopsy.keywordsearch.TermsComponentQuery.CREDIT_CARD_NUM_PATTERN;
|
||||
import static org.sleuthkit.autopsy.keywordsearch.TermsComponentQuery.CREDIT_CARD_TRACK2_PATTERN;
|
||||
import static org.sleuthkit.autopsy.keywordsearch.TermsComponentQuery.KEYWORD_SEARCH_DOCUMENT_ID;
|
||||
import org.sleuthkit.datamodel.AbstractFile;
|
||||
import org.sleuthkit.datamodel.Account;
|
||||
import org.sleuthkit.datamodel.AccountFileInstance;
|
||||
@ -93,6 +89,61 @@ final class RegexQuery implements KeywordSearchQuery {
|
||||
private static final int MIN_EMAIL_ADDR_LENGTH = 8;
|
||||
private static final String SNIPPET_DELIMITER = String.valueOf(Character.toChars(171));
|
||||
|
||||
/*
|
||||
* The following fields are part of the initial implementation of credit
|
||||
* card account search and should be factored into another class when time
|
||||
* permits.
|
||||
*/
|
||||
/**
|
||||
* 12-19 digits, with possible single spaces or dashes in between. First
|
||||
* digit is 2 through 6
|
||||
*
|
||||
*/
|
||||
static final Pattern CREDIT_CARD_NUM_PATTERN
|
||||
= Pattern.compile("(?<ccn>[2-6]([ -]?[0-9]){11,18})");
|
||||
static final Pattern CREDIT_CARD_TRACK1_PATTERN = Pattern.compile(
|
||||
/*
|
||||
* Track 1 is alphanumeric.
|
||||
*
|
||||
* This regex matches 12-19 digit ccns embeded in a track 1 formated
|
||||
* string. This regex matches (and extracts groups) even if the
|
||||
* entire track is not present as long as the part that is conforms
|
||||
* to the track format.
|
||||
*/
|
||||
"(?:" //begin nested optinal group //NON-NLS
|
||||
+ "%?" //optional start sentinal: % //NON-NLS
|
||||
+ "B)?" //format code //NON-NLS
|
||||
+ "(?<accountNumber>[2-6]([ -]?[0-9]){11,18})" //12-19 digits, with possible single spaces or dashes in between. first digit is 2,3,4,5, or 6 //NON-NLS
|
||||
+ "\\^" //separator //NON-NLS
|
||||
+ "(?<name>[^^]{2,26})" //2-26 charachter name, not containing ^ //NON-NLS
|
||||
+ "(?:\\^" //separator //NON-NLS
|
||||
+ "(?:(?:\\^|(?<expiration>\\d{4}))" //separator or 4 digit expiration YYMM //NON-NLS
|
||||
+ "(?:(?:\\^|(?<serviceCode>\\d{3}))"//separator or 3 digit service code //NON-NLS
|
||||
+ "(?:(?<discretionary>[^?]*)" // discretionary data not containing separator //NON-NLS
|
||||
+ "(?:\\?" // end sentinal: ? //NON-NLS
|
||||
+ "(?<LRC>.)" //longitudinal redundancy check //NON-NLS
|
||||
+ "?)?)?)?)?)?");//close nested optional groups //NON-NLS
|
||||
static final Pattern CREDIT_CARD_TRACK2_PATTERN = Pattern.compile(
|
||||
/*
|
||||
* Track 2 is numeric plus six punctuation symbolls :;<=>?
|
||||
*
|
||||
* This regex matches 12-19 digit ccns embeded in a track 2 formated
|
||||
* string. This regex matches (and extracts groups) even if the
|
||||
* entire track is not present as long as the part that is conforms
|
||||
* to the track format.
|
||||
*
|
||||
*/
|
||||
"[:;<=>?]?" //(optional)start sentinel //NON-NLS
|
||||
+ "(?<accountNumber>[2-6]([ -]?[0-9]){11,18})" //12-19 digits, with possible single spaces or dashes in between. first digit is 2,3,4,5, or 6 //NON-NLS
|
||||
+ "(?:[:;<=>?]" //separator //NON-NLS
|
||||
+ "(?:(?<expiration>\\d{4})" //4 digit expiration date YYMM //NON-NLS
|
||||
+ "(?:(?<serviceCode>\\d{3})" //3 digit service code //NON-NLS
|
||||
+ "(?:(?<discretionary>[^:;<=>?]*)" //discretionary data, not containing punctuation marks //NON-NLS
|
||||
+ "(?:[:;<=>?]" //end sentinel //NON-NLS
|
||||
+ "(?<LRC>.)" //longitudinal redundancy check //NON-NLS
|
||||
+ "?)?)?)?)?)?"); //close nested optional groups //NON-NLS
|
||||
static final BlackboardAttribute.Type KEYWORD_SEARCH_DOCUMENT_ID = new BlackboardAttribute.Type(ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_DOCUMENT_ID);
|
||||
|
||||
private final List<KeywordQueryFilter> filters = new ArrayList<>();
|
||||
private final KeywordList keywordList;
|
||||
private final Keyword originalKeyword; // The regular expression originalKeyword used to perform the search.
|
||||
@ -172,10 +223,13 @@ final class RegexQuery implements KeywordSearchQuery {
|
||||
// a regular expression search) and .* as anchors (if the query doesn't
|
||||
// already have them). We do not add .* if there is a boundary character.
|
||||
boolean skipWildcardPrefix = queryStringContainsWildcardPrefix || getQueryString().startsWith("^");
|
||||
boolean skipWildcardSuffix = queryStringContainsWildcardSuffix ||
|
||||
(getQueryString().endsWith("$") && ( ! getQueryString().endsWith("\\$")));
|
||||
boolean skipWildcardSuffix = queryStringContainsWildcardSuffix
|
||||
|| (getQueryString().endsWith("$") && (!getQueryString().endsWith("\\$")));
|
||||
solrQuery.setQuery((field == null ? Server.Schema.CONTENT_STR.toString() : field) + ":/"
|
||||
+ (skipWildcardPrefix ? "" : ".*") + getQueryString()
|
||||
+ (skipWildcardPrefix ? "" : ".*")
|
||||
// if the query is for a substring (i.e. literal search term) we want
|
||||
// to escape characters such as ()[]-.
|
||||
+ (originalKeyword.searchTermIsLiteral() ? getEscapedQueryString().toLowerCase() : getQueryString().toLowerCase())
|
||||
+ (skipWildcardSuffix ? "" : ".*") + "/");
|
||||
|
||||
// Set the fields we want to have returned by the query.
|
||||
@ -241,7 +295,28 @@ final class RegexQuery implements KeywordSearchQuery {
|
||||
|
||||
final Collection<Object> content_str = solrDoc.getFieldValues(Server.Schema.CONTENT_STR.toString());
|
||||
|
||||
final Pattern pattern = Pattern.compile(keywordString);
|
||||
String searchPattern;
|
||||
if (originalKeyword.searchTermIsLiteral()) {
|
||||
/**
|
||||
* For substring searches, the following pattern was arrived at
|
||||
* through trial and error in an attempt to reproduce the same hits
|
||||
* we were getting when we were using the TermComponent approach.
|
||||
* This basically looks for zero of more word characters followed
|
||||
* optionally by a dot or apostrophe, followed by the quoted
|
||||
* lowercase substring following by zero or more word characters
|
||||
* followed optionally by a dot or apostrophe. The reason that the
|
||||
* dot and apostrophe characters are being handled here is because
|
||||
* the old code used to find hits in domain names (e.g. hacks.ie)
|
||||
* and possessives (e.g. hacker's). This obviously works for English
|
||||
* but is probably not sufficient for other languages.
|
||||
*/
|
||||
searchPattern = "[\\w[\\.']]*" + Pattern.quote(keywordString.toLowerCase()) + "[\\w[\\.']]*";
|
||||
} else {
|
||||
searchPattern = keywordString;
|
||||
}
|
||||
|
||||
final Pattern pattern = Pattern.compile(searchPattern, Pattern.CASE_INSENSITIVE);
|
||||
|
||||
try {
|
||||
for (Object content_obj : content_str) {
|
||||
String content = (String) content_obj;
|
||||
@ -282,6 +357,31 @@ final class RegexQuery implements KeywordSearchQuery {
|
||||
hit = hit.replaceAll("[^0-9]$", "");
|
||||
}
|
||||
|
||||
/**
|
||||
* Boundary characters are removed from the start and end of
|
||||
* the hit to normalize the hits. This is being done for
|
||||
* substring searches only at this point. We don't do it for
|
||||
* real regular expression searches because the user may
|
||||
* have explicitly included boundary characters in their
|
||||
* regular expression.
|
||||
*/
|
||||
if (originalKeyword.searchTermIsLiteral()) {
|
||||
hit = hit.replaceAll("^" + KeywordSearchList.BOUNDARY_CHARACTERS + "*", "");
|
||||
hit = hit.replaceAll(KeywordSearchList.BOUNDARY_CHARACTERS + "*$", "");
|
||||
|
||||
/**
|
||||
* The Solr StandardTokenizerFactory maximum token
|
||||
* length is 255 and attempts to search for tokens
|
||||
* larger than this limit fail when we attempt to
|
||||
* highlight later. I have't found a programmatic
|
||||
* mechanism to get this value so I'm hardcoding it
|
||||
* here.
|
||||
*/
|
||||
if (hit.length() > 255) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The use of String interning is an optimization to ensure
|
||||
* that we reuse the same keyword hit String object across
|
||||
@ -340,7 +440,6 @@ final class RegexQuery implements KeywordSearchQuery {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
} catch (Throwable error) {
|
||||
/*
|
||||
@ -457,24 +556,24 @@ final class RegexQuery implements KeywordSearchQuery {
|
||||
createCCNAccount(content, foundKeyword, hit, snippet, listName);
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Create a "plain vanilla" keyword hit artifact with keyword and
|
||||
* regex attributes
|
||||
* Create a "plain vanilla" keyword hit artifact with keyword and regex
|
||||
* attributes
|
||||
*/
|
||||
BlackboardArtifact newArtifact;
|
||||
Collection<BlackboardAttribute> attributes = new ArrayList<>();
|
||||
|
||||
|
||||
attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD, MODULE_NAME, foundKeyword.getSearchTerm()));
|
||||
attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP, MODULE_NAME, getQueryString()));
|
||||
|
||||
|
||||
try {
|
||||
newArtifact = content.newArtifact(ARTIFACT_TYPE.TSK_KEYWORD_HIT);
|
||||
} catch (TskCoreException ex) {
|
||||
LOGGER.log(Level.SEVERE, "Error adding artifact for keyword hit to blackboard", ex); //NON-NLS
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
if (StringUtils.isNotBlank(listName)) {
|
||||
attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_SET_NAME, MODULE_NAME, listName));
|
||||
}
|
||||
@ -486,7 +585,11 @@ final class RegexQuery implements KeywordSearchQuery {
|
||||
-> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_ASSOCIATED_ARTIFACT, MODULE_NAME, artifactID))
|
||||
);
|
||||
|
||||
attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE, MODULE_NAME, KeywordSearch.QueryType.REGEX.ordinal()));
|
||||
if (originalKeyword.searchTermIsLiteral()) {
|
||||
attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE, MODULE_NAME, KeywordSearch.QueryType.SUBSTRING.ordinal()));
|
||||
} else {
|
||||
attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE, MODULE_NAME, KeywordSearch.QueryType.REGEX.ordinal()));
|
||||
}
|
||||
|
||||
try {
|
||||
newArtifact.addAttributes(attributes);
|
||||
@ -498,7 +601,7 @@ final class RegexQuery implements KeywordSearchQuery {
|
||||
}
|
||||
|
||||
private void createCCNAccount(Content content, Keyword foundKeyword, KeywordHit hit, String snippet, String listName) {
|
||||
|
||||
|
||||
final String MODULE_NAME = KeywordSearchModuleFactory.getModuleName();
|
||||
|
||||
if (originalKeyword.getArtifactAttributeType() != ATTRIBUTE_TYPE.TSK_CARD_NUMBER) {
|
||||
@ -506,14 +609,14 @@ final class RegexQuery implements KeywordSearchQuery {
|
||||
return;
|
||||
}
|
||||
/*
|
||||
* Create a credit card account with attributes
|
||||
* parsed from the snippet for the hit and looked up based on the
|
||||
* parsed bank identifcation number.
|
||||
* Create a credit card account with attributes parsed from the snippet
|
||||
* for the hit and looked up based on the parsed bank identifcation
|
||||
* number.
|
||||
*/
|
||||
Collection<BlackboardAttribute> attributes = new ArrayList<>();
|
||||
|
||||
|
||||
Map<BlackboardAttribute.Type, BlackboardAttribute> parsedTrackAttributeMap = new HashMap<>();
|
||||
Matcher matcher = TermsComponentQuery.CREDIT_CARD_TRACK1_PATTERN.matcher(hit.getSnippet());
|
||||
Matcher matcher = CREDIT_CARD_TRACK1_PATTERN.matcher(hit.getSnippet());
|
||||
if (matcher.find()) {
|
||||
parseTrack1Data(parsedTrackAttributeMap, matcher);
|
||||
}
|
||||
@ -523,7 +626,7 @@ final class RegexQuery implements KeywordSearchQuery {
|
||||
}
|
||||
final BlackboardAttribute ccnAttribute = parsedTrackAttributeMap.get(new BlackboardAttribute.Type(ATTRIBUTE_TYPE.TSK_CARD_NUMBER));
|
||||
if (ccnAttribute == null || StringUtils.isBlank(ccnAttribute.getValueString())) {
|
||||
|
||||
|
||||
if (hit.isArtifactHit()) {
|
||||
LOGGER.log(Level.SEVERE, String.format("Failed to parse credit card account number for artifact keyword hit: term = %s, snippet = '%s', artifact id = %d", foundKeyword.getSearchTerm(), hit.getSnippet(), hit.getArtifactID().get())); //NON-NLS
|
||||
} else {
|
||||
@ -565,8 +668,7 @@ final class RegexQuery implements KeywordSearchQuery {
|
||||
|
||||
/*
|
||||
* If the hit is from unused or unallocated space, record the Solr
|
||||
* document id to support showing just the chunk that contained the
|
||||
* hit.
|
||||
* document id to support showing just the chunk that contained the hit.
|
||||
*/
|
||||
if (content instanceof AbstractFile) {
|
||||
AbstractFile file = (AbstractFile) content;
|
||||
@ -586,24 +688,24 @@ final class RegexQuery implements KeywordSearchQuery {
|
||||
hit.getArtifactID().ifPresent(artifactID
|
||||
-> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_ASSOCIATED_ARTIFACT, MODULE_NAME, artifactID))
|
||||
);
|
||||
|
||||
|
||||
attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE, MODULE_NAME, KeywordSearch.QueryType.REGEX.ordinal()));
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Create an account instance.
|
||||
*/
|
||||
try {
|
||||
AccountFileInstance ccAccountInstance = Case.getCurrentCaseThrows().getSleuthkitCase().getCommunicationsManager().createAccountFileInstance(Account.Type.CREDIT_CARD, ccnAttribute.getValueString() , MODULE_NAME, content);
|
||||
|
||||
AccountFileInstance ccAccountInstance = Case.getCurrentCaseThrows().getSleuthkitCase().getCommunicationsManager().createAccountFileInstance(Account.Type.CREDIT_CARD, ccnAttribute.getValueString(), MODULE_NAME, content);
|
||||
|
||||
ccAccountInstance.addAttributes(attributes);
|
||||
|
||||
} catch (TskCoreException | NoCurrentCaseException ex) {
|
||||
LOGGER.log(Level.SEVERE, "Error creating CCN account instance", ex); //NON-NLS
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses the track 2 data from the snippet for a credit card account number
|
||||
* hit and turns them into artifact attributes.
|
||||
@ -649,14 +751,14 @@ final class RegexQuery implements KeywordSearchQuery {
|
||||
static private void addAttributeIfNotAlreadyCaptured(Map<BlackboardAttribute.Type, BlackboardAttribute> attributeMap, ATTRIBUTE_TYPE attrType, String groupName, Matcher matcher) {
|
||||
BlackboardAttribute.Type type = new BlackboardAttribute.Type(attrType);
|
||||
|
||||
if( ! attributeMap.containsKey(type)) {
|
||||
if (!attributeMap.containsKey(type)) {
|
||||
String value = matcher.group(groupName);
|
||||
if (attrType.equals(ATTRIBUTE_TYPE.TSK_CARD_NUMBER)) {
|
||||
attributeMap.put(new BlackboardAttribute.Type(ATTRIBUTE_TYPE.TSK_KEYWORD),
|
||||
new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD, MODULE_NAME, value));
|
||||
value = CharMatcher.anyOf(" -").removeFrom(value);
|
||||
}
|
||||
|
||||
|
||||
if (StringUtils.isNotBlank(value)) {
|
||||
attributeMap.put(type, new BlackboardAttribute(attrType, MODULE_NAME, value));
|
||||
}
|
||||
|
@ -1,561 +0,0 @@
|
||||
/*
|
||||
* Autopsy Forensic Browser
|
||||
*
|
||||
* Copyright 2011-2018 Basis Technology Corp.
|
||||
* Contact: carrier <at> sleuthkit <dot> org
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.sleuthkit.autopsy.keywordsearch;
|
||||
|
||||
import com.google.common.base.CharMatcher;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.logging.Level;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.apache.solr.client.solrj.SolrQuery;
|
||||
import org.apache.solr.client.solrj.response.TermsResponse.Term;
|
||||
import org.openide.util.Exceptions;
|
||||
import org.sleuthkit.autopsy.casemodule.Case;
|
||||
import org.sleuthkit.autopsy.casemodule.NoCurrentCaseException;
|
||||
import org.sleuthkit.autopsy.coreutils.Logger;
|
||||
import org.sleuthkit.autopsy.coreutils.Version;
|
||||
import org.sleuthkit.autopsy.datamodel.CreditCards;
|
||||
import org.sleuthkit.datamodel.AbstractFile;
|
||||
import org.sleuthkit.datamodel.Account;
|
||||
import org.sleuthkit.datamodel.AccountFileInstance;
|
||||
import org.sleuthkit.datamodel.BlackboardArtifact;
|
||||
import org.sleuthkit.datamodel.BlackboardArtifact.ARTIFACT_TYPE;
|
||||
import org.sleuthkit.datamodel.BlackboardAttribute;
|
||||
import org.sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE;
|
||||
import org.sleuthkit.datamodel.Content;
|
||||
import org.sleuthkit.datamodel.TskCoreException;
|
||||
import org.sleuthkit.datamodel.TskData;
|
||||
|
||||
/**
|
||||
* Implements a regex query that will be performed as a two step operation. In
|
||||
* the first step, the Solr terms component is used to find any terms in the
|
||||
* index that match the regex. In the second step, term queries are executed for
|
||||
* each matched term to produce the set of keyword hits for the regex.
|
||||
*/
|
||||
final class TermsComponentQuery implements KeywordSearchQuery {
|
||||
|
||||
private static final Logger LOGGER = Logger.getLogger(TermsComponentQuery.class.getName());
|
||||
private static final String MODULE_NAME = KeywordSearchModuleFactory.getModuleName();
|
||||
private static final String SEARCH_HANDLER = "/terms"; //NON-NLS
|
||||
private static final String SEARCH_FIELD = Server.Schema.TEXT.toString();
|
||||
private static final int TERMS_SEARCH_TIMEOUT = 90 * 1000; // Milliseconds
|
||||
private static final String CASE_INSENSITIVE = "case_insensitive"; //NON-NLS
|
||||
private static final boolean DEBUG_FLAG = Version.Type.DEVELOPMENT.equals(Version.getBuildType());
|
||||
private static final int MAX_TERMS_QUERY_RESULTS = 20000;
|
||||
|
||||
private final KeywordList keywordList;
|
||||
private final Keyword originalKeyword;
|
||||
private final List<KeywordQueryFilter> filters = new ArrayList<>(); // THIS APPEARS TO BE UNUSED
|
||||
|
||||
private String searchTerm;
|
||||
private boolean searchTermIsEscaped;
|
||||
|
||||
/*
|
||||
* The following fields are part of the initial implementation of credit
|
||||
* card account search and should be factored into another class when time
|
||||
* permits.
|
||||
*/
|
||||
/**
|
||||
* 12-19 digits, with possible single spaces or dashes in between. First
|
||||
* digit is 2 through 6
|
||||
*
|
||||
*/
|
||||
static final Pattern CREDIT_CARD_NUM_PATTERN
|
||||
= Pattern.compile("(?<ccn>[2-6]([ -]?[0-9]){11,18})");
|
||||
static final Pattern CREDIT_CARD_TRACK1_PATTERN = Pattern.compile(
|
||||
/*
|
||||
* Track 1 is alphanumeric.
|
||||
*
|
||||
* This regex matches 12-19 digit ccns embeded in a track 1 formated
|
||||
* string. This regex matches (and extracts groups) even if the
|
||||
* entire track is not present as long as the part that is conforms
|
||||
* to the track format.
|
||||
*/
|
||||
"(?:" //begin nested optinal group //NON-NLS
|
||||
+ "%?" //optional start sentinal: % //NON-NLS
|
||||
+ "B)?" //format code //NON-NLS
|
||||
+ "(?<accountNumber>[2-6]([ -]?[0-9]){11,18})" //12-19 digits, with possible single spaces or dashes in between. first digit is 2,3,4,5, or 6 //NON-NLS
|
||||
+ "\\^" //separator //NON-NLS
|
||||
+ "(?<name>[^^]{2,26})" //2-26 charachter name, not containing ^ //NON-NLS
|
||||
+ "(?:\\^" //separator //NON-NLS
|
||||
+ "(?:(?:\\^|(?<expiration>\\d{4}))" //separator or 4 digit expiration YYMM //NON-NLS
|
||||
+ "(?:(?:\\^|(?<serviceCode>\\d{3}))"//separator or 3 digit service code //NON-NLS
|
||||
+ "(?:(?<discretionary>[^?]*)" // discretionary data not containing separator //NON-NLS
|
||||
+ "(?:\\?" // end sentinal: ? //NON-NLS
|
||||
+ "(?<LRC>.)" //longitudinal redundancy check //NON-NLS
|
||||
+ "?)?)?)?)?)?");//close nested optional groups //NON-NLS
|
||||
static final Pattern CREDIT_CARD_TRACK2_PATTERN = Pattern.compile(
|
||||
/*
|
||||
* Track 2 is numeric plus six punctuation symbolls :;<=>?
|
||||
*
|
||||
* This regex matches 12-19 digit ccns embeded in a track 2 formated
|
||||
* string. This regex matches (and extracts groups) even if the
|
||||
* entire track is not present as long as the part that is conforms
|
||||
* to the track format.
|
||||
*
|
||||
*/
|
||||
"[:;<=>?]?" //(optional)start sentinel //NON-NLS
|
||||
+ "(?<accountNumber>[2-6]([ -]?[0-9]){11,18})" //12-19 digits, with possible single spaces or dashes in between. first digit is 2,3,4,5, or 6 //NON-NLS
|
||||
+ "(?:[:;<=>?]" //separator //NON-NLS
|
||||
+ "(?:(?<expiration>\\d{4})" //4 digit expiration date YYMM //NON-NLS
|
||||
+ "(?:(?<serviceCode>\\d{3})" //3 digit service code //NON-NLS
|
||||
+ "(?:(?<discretionary>[^:;<=>?]*)" //discretionary data, not containing punctuation marks //NON-NLS
|
||||
+ "(?:[:;<=>?]" //end sentinel //NON-NLS
|
||||
+ "(?<LRC>.)" //longitudinal redundancy check //NON-NLS
|
||||
+ "?)?)?)?)?)?"); //close nested optional groups //NON-NLS
|
||||
static final BlackboardAttribute.Type KEYWORD_SEARCH_DOCUMENT_ID = new BlackboardAttribute.Type(ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_DOCUMENT_ID);
|
||||
|
||||
/**
|
||||
* Constructs an object that implements a regex query that will be performed
|
||||
* as a two step operation. In the first step, the Solr terms component is
|
||||
* used to find any terms in the index that match the regex. In the second
|
||||
* step, term queries are executed for each matched term to produce the set
|
||||
* of keyword hits for the regex.
|
||||
*
|
||||
* @param keywordList A keyword list that contains the keyword that provides
|
||||
* the regex search term for the query.
|
||||
* @param keyword The keyword that provides the regex search term for
|
||||
* the query.
|
||||
*/
|
||||
// TODO: Why is both the list and the keyword added to the state of this
|
||||
// object?
|
||||
// TODO: Why is the search term not escaped and given substring wildcards,
|
||||
// if needed, here in the constructor?
|
||||
TermsComponentQuery(KeywordList keywordList, Keyword keyword) {
|
||||
this.keywordList = keywordList;
|
||||
this.originalKeyword = keyword;
|
||||
this.searchTerm = keyword.getSearchTerm();
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the keyword list that contains the keyword that provides the regex
|
||||
* search term for the query.
|
||||
*
|
||||
* @return The keyword list.
|
||||
*/
|
||||
@Override
|
||||
public KeywordList getKeywordList() {
|
||||
return keywordList;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the original search term for the query, without any escaping or, if
|
||||
* it is a literal term, the addition of wildcards for a substring search.
|
||||
*
|
||||
* @return The original search term.
|
||||
*/
|
||||
@Override
|
||||
public String getQueryString() {
|
||||
return originalKeyword.getSearchTerm();
|
||||
}
|
||||
|
||||
/**
|
||||
* Indicates whether or not the search term for the query is a literal term
|
||||
* that needs have wildcards added to it to make the query a substring
|
||||
* search.
|
||||
*
|
||||
* @return True or false.
|
||||
*/
|
||||
@Override
|
||||
public boolean isLiteral() {
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds wild cards to the search term for the query, which makes the query a
|
||||
* substring search, if it is a literal search term.
|
||||
*/
|
||||
@Override
|
||||
public void setSubstringQuery() {
|
||||
searchTerm = ".*" + searchTerm + ".*";
|
||||
}
|
||||
|
||||
/**
|
||||
* Escapes the search term for the query.
|
||||
*/
|
||||
@Override
|
||||
public void escape() {
|
||||
searchTerm = Pattern.quote(originalKeyword.getSearchTerm());
|
||||
searchTermIsEscaped = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Indicates whether or not the search term has been escaped yet.
|
||||
*
|
||||
* @return True or false.
|
||||
*/
|
||||
@Override
|
||||
public boolean isEscaped() {
|
||||
return searchTermIsEscaped;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the escaped search term for the query, assuming it has been escaped
|
||||
* by a call to TermsComponentQuery.escape.
|
||||
*
|
||||
* @return The search term, possibly escaped.
|
||||
*/
|
||||
@Override
|
||||
public String getEscapedQueryString() {
|
||||
return this.searchTerm;
|
||||
}
|
||||
|
||||
/**
|
||||
* Indicates whether or not the search term is a valid regex.
|
||||
*
|
||||
* @return True or false.
|
||||
*/
|
||||
@Override
|
||||
public boolean validate() {
|
||||
if (searchTerm.isEmpty()) {
|
||||
return false;
|
||||
}
|
||||
try {
|
||||
Pattern.compile(searchTerm);
|
||||
return true;
|
||||
} catch (IllegalArgumentException ex) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Does nothing, not applicable to a regex query, which always searches a
|
||||
* field created specifically for regex sesarches.
|
||||
*
|
||||
* @param field The name of a Solr document field to search.
|
||||
*/
|
||||
@Override
|
||||
public void setField(String field) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a filter to the query.
|
||||
*
|
||||
* @param filter The filter.
|
||||
*/
|
||||
// TODO: Document this better.
|
||||
@Override
|
||||
public void addFilter(KeywordQueryFilter filter) {
|
||||
this.filters.add(filter);
|
||||
}
|
||||
|
||||
/**
|
||||
* Executes the regex query as a two step operation. In the first step, the
|
||||
* Solr terms component is used to find any terms in the index that match
|
||||
* the regex. In the second step, term queries are executed for each matched
|
||||
* term to produce the set of keyword hits for the regex.
|
||||
*
|
||||
* @return A QueryResult object or null.
|
||||
*
|
||||
* @throws NoOpenCoreException
|
||||
*/
|
||||
@Override
|
||||
public QueryResults performQuery() throws KeywordSearchModuleException, NoOpenCoreException {
|
||||
/*
|
||||
* Do a query using the Solr terms component to find any terms in the
|
||||
* index that match the regex.
|
||||
*/
|
||||
final SolrQuery termsQuery = new SolrQuery();
|
||||
termsQuery.setRequestHandler(SEARCH_HANDLER);
|
||||
termsQuery.setTerms(true);
|
||||
termsQuery.setTermsRegexFlag(CASE_INSENSITIVE);
|
||||
termsQuery.setTermsRegex(searchTerm);
|
||||
termsQuery.addTermsField(SEARCH_FIELD);
|
||||
termsQuery.setTimeAllowed(TERMS_SEARCH_TIMEOUT);
|
||||
termsQuery.setShowDebugInfo(DEBUG_FLAG);
|
||||
termsQuery.setTermsLimit(MAX_TERMS_QUERY_RESULTS);
|
||||
List<Term> terms = KeywordSearch.getServer().queryTerms(termsQuery).getTerms(SEARCH_FIELD);
|
||||
/*
|
||||
* Do a term query for each term that matched the regex.
|
||||
*/
|
||||
QueryResults results = new QueryResults(this);
|
||||
for (Term term : terms) {
|
||||
/*
|
||||
* If searching for credit card account numbers, do a Luhn check on
|
||||
* the term and discard it if it does not pass.
|
||||
*/
|
||||
if (originalKeyword.getArtifactAttributeType() == ATTRIBUTE_TYPE.TSK_CARD_NUMBER) {
|
||||
Matcher matcher = CREDIT_CARD_NUM_PATTERN.matcher(term.getTerm());
|
||||
if (false == matcher.find()
|
||||
|| false == CreditCardValidator.isValidCCN(matcher.group("ccn"))) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Do an ordinary query with the escaped term and convert the query
|
||||
* results into a single list of keyword hits without duplicates.
|
||||
*
|
||||
* Note that the filters field appears to be unused. There is an old
|
||||
* comment here, what does it mean? "Note: we can't set filter query
|
||||
* on terms query but setting filter query on fileResults query will
|
||||
* yield the same result." The filter is NOT being added to the term
|
||||
* query.
|
||||
*/
|
||||
String escapedTerm = KeywordSearchUtil.escapeLuceneQuery(term.getTerm());
|
||||
LuceneQuery termQuery = new LuceneQuery(keywordList, new Keyword(escapedTerm, true, true));
|
||||
filters.forEach(termQuery::addFilter); // This appears to be unused
|
||||
QueryResults termQueryResult = termQuery.performQuery();
|
||||
Set<KeywordHit> termHits = new HashSet<>();
|
||||
for (Keyword word : termQueryResult.getKeywords()) {
|
||||
termHits.addAll(termQueryResult.getResults(word));
|
||||
}
|
||||
results.addResult(new Keyword(term.getTerm(), false, true, originalKeyword.getListName(), originalKeyword.getOriginalTerm()), new ArrayList<>(termHits));
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Posts a keyword hit artifact to the blackboard for a given keyword hit.
|
||||
*
|
||||
* @param content The text source object for the hit.
|
||||
* @param foundKeyword The keyword that was found by the search, this may be
|
||||
* different than the Keyword that was searched if, for
|
||||
* example, it was a RegexQuery.
|
||||
* @param hit The keyword hit.
|
||||
* @param snippet A snippet from the text that contains the hit.
|
||||
* @param listName The name of the keyword list that contained the
|
||||
* keyword for which the hit was found.
|
||||
*
|
||||
*
|
||||
* @return The newly created artifact or null if there was a problem
|
||||
* creating it.
|
||||
*/
|
||||
@Override
|
||||
public BlackboardArtifact postKeywordHitToBlackboard(Content content, Keyword foundKeyword, KeywordHit hit, String snippet, String listName) {
|
||||
|
||||
/*
|
||||
* CCN hits are handled specially
|
||||
*/
|
||||
if (originalKeyword.getArtifactAttributeType() == ATTRIBUTE_TYPE.TSK_CARD_NUMBER) {
|
||||
createCCNAccount(content, hit, snippet, listName);
|
||||
return null;
|
||||
}
|
||||
|
||||
/*
|
||||
* Create a "plain vanilla" keyword hit artifact with keyword and regex
|
||||
* attributes,
|
||||
*/
|
||||
BlackboardArtifact newArtifact;
|
||||
Collection<BlackboardAttribute> attributes = new ArrayList<>();
|
||||
|
||||
attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD, MODULE_NAME, foundKeyword.getSearchTerm()));
|
||||
attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP, MODULE_NAME, originalKeyword.getSearchTerm()));
|
||||
|
||||
try {
|
||||
newArtifact = content.newArtifact(ARTIFACT_TYPE.TSK_KEYWORD_HIT);
|
||||
|
||||
} catch (TskCoreException ex) {
|
||||
LOGGER.log(Level.SEVERE, "Error adding artifact for keyword hit to blackboard", ex); //NON-NLS
|
||||
return null;
|
||||
}
|
||||
|
||||
if (StringUtils.isNotBlank(listName)) {
|
||||
attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_SET_NAME, MODULE_NAME, listName));
|
||||
}
|
||||
if (snippet != null) {
|
||||
attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW, MODULE_NAME, snippet));
|
||||
}
|
||||
|
||||
hit.getArtifactID().ifPresent(
|
||||
artifactID -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_ASSOCIATED_ARTIFACT, MODULE_NAME, artifactID))
|
||||
);
|
||||
|
||||
// TermsComponentQuery is now being used exclusively for substring searches.
|
||||
attributes.add(new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE, MODULE_NAME, KeywordSearch.QueryType.SUBSTRING.ordinal()));
|
||||
|
||||
try {
|
||||
newArtifact.addAttributes(attributes);
|
||||
return newArtifact;
|
||||
} catch (TskCoreException e) {
|
||||
LOGGER.log(Level.SEVERE, "Error adding bb attributes for terms search artifact", e); //NON-NLS
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private void createCCNAccount(Content content, KeywordHit hit, String snippet, String listName) {
|
||||
|
||||
if (originalKeyword.getArtifactAttributeType() != ATTRIBUTE_TYPE.TSK_CARD_NUMBER) {
|
||||
LOGGER.log(Level.SEVERE, "Keyword hit is not a credit card number"); //NON-NLS
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Create a credit card account with attributes parsed from from the
|
||||
* snippet for the hit and looked up based on the parsed bank
|
||||
* identifcation number.
|
||||
*/
|
||||
Collection<BlackboardAttribute> attributes = new ArrayList<>();
|
||||
|
||||
Map<BlackboardAttribute.Type, BlackboardAttribute> parsedTrackAttributeMap = new HashMap<>();
|
||||
Matcher matcher = CREDIT_CARD_TRACK1_PATTERN.matcher(hit.getSnippet());
|
||||
if (matcher.find()) {
|
||||
parseTrack1Data(parsedTrackAttributeMap, matcher);
|
||||
}
|
||||
matcher = CREDIT_CARD_TRACK2_PATTERN.matcher(hit.getSnippet());
|
||||
if (matcher.find()) {
|
||||
parseTrack2Data(parsedTrackAttributeMap, matcher);
|
||||
}
|
||||
final BlackboardAttribute ccnAttribute = parsedTrackAttributeMap.get(new BlackboardAttribute.Type(ATTRIBUTE_TYPE.TSK_CARD_NUMBER));
|
||||
if (ccnAttribute == null || StringUtils.isBlank(ccnAttribute.getValueString())) {
|
||||
if (hit.isArtifactHit()) {
|
||||
LOGGER.log(Level.SEVERE, String.format("Failed to parse credit card account number for artifact keyword hit: term = %s, snippet = '%s', artifact id = %d", searchTerm, hit.getSnippet(), hit.getArtifactID().get())); //NON-NLS
|
||||
} else {
|
||||
long contentId = 0;
|
||||
try {
|
||||
contentId = hit.getContentID();
|
||||
} catch (TskCoreException ex) {
|
||||
LOGGER.log(Level.SEVERE, String.format("Failed to content id from keyword hit: term = %s, snippet = '%s'", searchTerm, hit.getSnippet()), ex); //NON-NLS
|
||||
}
|
||||
if (contentId > 0) {
|
||||
LOGGER.log(Level.SEVERE, String.format("Failed to parse credit card account number for content keyword hit: term = %s, snippet = '%s', object id = %d", searchTerm, hit.getSnippet(), contentId)); //NON-NLS
|
||||
} else {
|
||||
LOGGER.log(Level.SEVERE, String.format("Failed to parse credit card account number for content keyword hit: term = %s, snippet = '%s'", searchTerm, hit.getSnippet())); //NON-NLS
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
attributes.addAll(parsedTrackAttributeMap.values());
|
||||
|
||||
/*
|
||||
* Look up the bank name, scheme, etc. attributes for the bank
|
||||
* indentification number (BIN).
|
||||
*/
|
||||
final int bin = Integer.parseInt(ccnAttribute.getValueString().substring(0, 8));
|
||||
CreditCards.BankIdentificationNumber binInfo = CreditCards.getBINInfo(bin);
|
||||
if (binInfo != null) {
|
||||
binInfo.getScheme().ifPresent(scheme
|
||||
-> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_CARD_SCHEME, MODULE_NAME, scheme)));
|
||||
binInfo.getCardType().ifPresent(cardType
|
||||
-> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_CARD_TYPE, MODULE_NAME, cardType)));
|
||||
binInfo.getBrand().ifPresent(brand
|
||||
-> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_BRAND_NAME, MODULE_NAME, brand)));
|
||||
binInfo.getBankName().ifPresent(bankName
|
||||
-> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_BANK_NAME, MODULE_NAME, bankName)));
|
||||
binInfo.getBankPhoneNumber().ifPresent(phoneNumber
|
||||
-> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_PHONE_NUMBER, MODULE_NAME, phoneNumber)));
|
||||
binInfo.getBankURL().ifPresent(url
|
||||
-> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_URL, MODULE_NAME, url)));
|
||||
binInfo.getCountry().ifPresent(country
|
||||
-> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_COUNTRY, MODULE_NAME, country)));
|
||||
binInfo.getBankCity().ifPresent(city
|
||||
-> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_CITY, MODULE_NAME, city)));
|
||||
}
|
||||
|
||||
/*
|
||||
* If the hit is from unused or unallocated space, record the Solr
|
||||
* document id to support showing just the chunk that contained the hit.
|
||||
*/
|
||||
if (content instanceof AbstractFile) {
|
||||
AbstractFile file = (AbstractFile) content;
|
||||
if (file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.UNUSED_BLOCKS
|
||||
|| file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS) {
|
||||
attributes.add(new BlackboardAttribute(KEYWORD_SEARCH_DOCUMENT_ID, MODULE_NAME, hit.getSolrDocumentId()));
|
||||
}
|
||||
}
|
||||
|
||||
if (StringUtils.isNotBlank(listName)) {
|
||||
attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_SET_NAME, MODULE_NAME, listName));
|
||||
}
|
||||
if (snippet != null) {
|
||||
attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW, MODULE_NAME, snippet));
|
||||
}
|
||||
|
||||
hit.getArtifactID().ifPresent(
|
||||
artifactID -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_ASSOCIATED_ARTIFACT, MODULE_NAME, artifactID))
|
||||
);
|
||||
|
||||
// TermsComponentQuery is now being used exclusively for substring searches.
|
||||
attributes.add(new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE, MODULE_NAME, KeywordSearch.QueryType.SUBSTRING.ordinal()));
|
||||
|
||||
/*
|
||||
* Create an account.
|
||||
*/
|
||||
try {
|
||||
AccountFileInstance ccAccountInstance = Case.getCurrentCaseThrows().getSleuthkitCase().getCommunicationsManager().createAccountFileInstance(Account.Type.CREDIT_CARD, ccnAttribute.getValueString(), MODULE_NAME, content);
|
||||
ccAccountInstance.addAttributes(attributes);
|
||||
} catch (TskCoreException | NoCurrentCaseException ex) {
|
||||
LOGGER.log(Level.SEVERE, "Error creating CCN account instance", ex); //NON-NLS
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses the track 2 data from the snippet for a credit card account number
|
||||
* hit and turns them into artifact attributes.
|
||||
*
|
||||
* @param attributesMap A map of artifact attribute objects, used to avoid
|
||||
* creating duplicate attributes.
|
||||
* @param matcher A matcher for the snippet.
|
||||
*/
|
||||
static private void parseTrack2Data(Map<BlackboardAttribute.Type, BlackboardAttribute> attributesMap, Matcher matcher) {
|
||||
addAttributeIfNotAlreadyCaptured(attributesMap, ATTRIBUTE_TYPE.TSK_CARD_NUMBER, "accountNumber", matcher);
|
||||
addAttributeIfNotAlreadyCaptured(attributesMap, ATTRIBUTE_TYPE.TSK_CARD_EXPIRATION, "expiration", matcher);
|
||||
addAttributeIfNotAlreadyCaptured(attributesMap, ATTRIBUTE_TYPE.TSK_CARD_SERVICE_CODE, "serviceCode", matcher);
|
||||
addAttributeIfNotAlreadyCaptured(attributesMap, ATTRIBUTE_TYPE.TSK_CARD_DISCRETIONARY, "discretionary", matcher);
|
||||
addAttributeIfNotAlreadyCaptured(attributesMap, ATTRIBUTE_TYPE.TSK_CARD_LRC, "LRC", matcher);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses the track 1 data from the snippet for a credit card account number
|
||||
* hit and turns them into artifact attributes. The track 1 data has the
|
||||
* same fields as the track two data, plus the account holder's name.
|
||||
*
|
||||
* @param attributeMap A map of artifact attribute objects, used to avoid
|
||||
* creating duplicate attributes.
|
||||
* @param matcher A matcher for the snippet.
|
||||
*/
|
||||
static private void parseTrack1Data(Map<BlackboardAttribute.Type, BlackboardAttribute> attributeMap, Matcher matcher) {
|
||||
parseTrack2Data(attributeMap, matcher);
|
||||
addAttributeIfNotAlreadyCaptured(attributeMap, ATTRIBUTE_TYPE.TSK_NAME_PERSON, "name", matcher);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates an attribute of the the given type to the given artifact with a
|
||||
* value parsed from the snippet for a credit account number hit.
|
||||
*
|
||||
* @param attributeMap A map of artifact attribute objects, used to avoid
|
||||
* creating duplicate attributes.
|
||||
* @param attrType The type of attribute to create.
|
||||
* @param groupName The group name of the regular expression that was
|
||||
* used to parse the attribute data.
|
||||
* @param matcher A matcher for the snippet.
|
||||
*/
|
||||
static private void addAttributeIfNotAlreadyCaptured(Map<BlackboardAttribute.Type, BlackboardAttribute> attributeMap, ATTRIBUTE_TYPE attrType, String groupName, Matcher matcher) {
|
||||
BlackboardAttribute.Type type = new BlackboardAttribute.Type(attrType);
|
||||
attributeMap.computeIfAbsent(type, (BlackboardAttribute.Type t) -> {
|
||||
String value = matcher.group(groupName);
|
||||
if (attrType.equals(ATTRIBUTE_TYPE.TSK_CARD_NUMBER)) {
|
||||
value = CharMatcher.anyOf(" -").removeFrom(value);
|
||||
}
|
||||
if (StringUtils.isNotBlank(value)) {
|
||||
return new BlackboardAttribute(attrType, MODULE_NAME, value);
|
||||
}
|
||||
return null;
|
||||
});
|
||||
}
|
||||
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user