Store content_str field as lowercase to support case insensitive regex search. Convert substring search to regex.

2025-07-06 21:00:22 +00:00 · 2018-09-18 09:52:49 -04:00 · 2018-09-18 09:52:49 -04:00 · 4db94a7965
commit 4db94a7965
parent 9ad3b9cee9
8 changed files with 174 additions and 623 deletions
--- a/KeywordSearch/solr/solr/configsets/AutopsyConfig/conf/schema.xml
+++ b/KeywordSearch/solr/solr/configsets/AutopsyConfig/conf/schema.xml
@ -45,7 +45,7 @@
    that avoids logging every request
 -->

-<schema name="Autopsy Keyword Search" version="2.0">
+<schema name="Autopsy Keyword Search" version="2.1">
  <!-- attribute "name" is the name of this schema and is only used for display purposes.
       Applications should change this to reflect the nature of the search collection.
       version="1.4" is Solr's version number for the schema syntax and semantics.  It should
@ -60,6 +60,8 @@
       1.7 added _version_ field for Solr Cloud
       1.8 added new content_str string field and stopped copying content and file_name into content_ws
       2.0 added chunk_size field
+       2.1 to facilitate case insensitive regex search,no longer copying content into content_str.
+           content_str will be populated with lowercase content by Autopsy.
     -->

  <types>
@ -649,8 +651,13 @@
   <copyField source="file_name" dest="content_ws"/>
 -->

-   <!-- Copy content into content_str field for regex searches -->
-   <copyField source="content" dest="content_str"/>
+   <!-- Copying of content into content_str field has been removed
+        so that we can perform case insensitive regex searches.
+        The content_str field will be populated directly by Autopsy
+
+        copyField source="content" dest="content_str"/ -->
+
+   <!-- Copy file_name into content_str field for regex searches -->
   <copyField source="file_name" dest="content_str"/>

   <!-- Above, multiple source fields are copied to the [text] field. 
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/HighlightedText.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/HighlightedText.java
@ -357,11 +357,7 @@ class HighlightedText implements IndexedText {
            }
            final String filterQuery = Server.Schema.ID.toString() + ":" + KeywordSearchUtil.escapeLuceneQuery(contentIdStr);

-            double indexSchemaVersion = NumberUtils.toDouble(solrServer.getIndexInfo().getSchemaVersion());
-            //choose field to highlight based on isLiteral and Solr index schema version.
-            highlightField = (isLiteral || (indexSchemaVersion < 2.0))
-                    ? LuceneQuery.HIGHLIGHT_FIELD
-                    : Server.Schema.CONTENT_STR.toString();
+            highlightField = LuceneQuery.HIGHLIGHT_FIELD;
            if (isLiteral) {
                //if the query is literal try to get solr to do the highlighting
                final String highlightQuery = keywords.stream()
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/IndexFinder.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/IndexFinder.java
@ -39,7 +39,7 @@ class IndexFinder {
    private static final String KWS_DATA_FOLDER_NAME = "data";
    private static final String INDEX_FOLDER_NAME = "index";
    private static final String CURRENT_SOLR_VERSION = "4";
-    private static final String CURRENT_SOLR_SCHEMA_VERSION = "2.0";
+    private static final String CURRENT_SOLR_SCHEMA_VERSION = "2.1";

    static String getCurrentSolrVersion() {
        return CURRENT_SOLR_VERSION;
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java
@ -22,6 +22,7 @@ import java.io.BufferedReader;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.logging.Level;
+import org.apache.commons.lang3.math.NumberUtils;
 import org.apache.solr.client.solrj.SolrServerException;
 import org.apache.solr.common.SolrInputDocument;
 import org.openide.util.NbBundle;
@ -208,7 +209,7 @@ class Ingester {
     * /update handler e.g. with XMLUpdateRequestHandler (deprecated in SOlr
     * 4.0.0), see if possible to stream with UpdateRequestHandler
     *
-     * @param chunk  The chunk content as a string
+     * @param chunk  The chunk content as a string, or null for metadata only
     * @param fields
     * @param size
     *
@ -231,13 +232,23 @@ class Ingester {
        for (String key : fields.keySet()) {
            updateDoc.addField(key, fields.get(key));
        }
-        //add the content to the SolrInputDocument
-        //JMTODO: can we just add it to the field map before passing that in?
-        updateDoc.addField(Server.Schema.CONTENT.toString(), chunk);

        try {
            //TODO: consider timeout thread, or vary socket timeout based on size of indexed content
+
+            //add the content to the SolrInputDocument
+            //JMTODO: can we just add it to the field map before passing that in?
+            updateDoc.addField(Server.Schema.CONTENT.toString(), chunk);
+
+            // We also add the content (if present) in lowercase form to facilitate case
+            // insensitive substring/regular expression search.
+            double indexSchemaVersion = NumberUtils.toDouble(solrServer.getIndexInfo().getSchemaVersion());
+            if (indexSchemaVersion >= 2.1) {
+                updateDoc.addField(Server.Schema.CONTENT_STR.toString(), ((chunk == null) ? "" : chunk.toLowerCase()));
+            }
+
            TimingMetric metric = HealthMonitor.getTimingMetric("Solr: Index chunk");
+
            solrServer.addDocument(updateDoc);
            HealthMonitor.submitTimingMetric(metric);
            uncommitedIngests = true;
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchList.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchList.java
@ -44,11 +44,11 @@ abstract class KeywordSearchList {
    // positives for phone numbers and IP addresses. They don't work as well
    // for "string" types such as URLs since the characters are more likely to
    // appear in the resulting hit.
-    private static final String BOUNDARY_CHARACTERS = "[ \t\r\n\\.\\-\\?\\,\\;\\\\!\\:\\[\\]\\/\\(\\)\\\"\\\'\\>\\{\\}]";
+    static final String BOUNDARY_CHARACTERS = "[ \t\r\n\\.\\-\\?\\,\\;\\\\!\\:\\[\\]\\/\\(\\)\\\"\\\'\\>\\{\\}]";
    private static final String PHONE_NUMBER_REGEX = BOUNDARY_CHARACTERS + "(\\([0-9]{3}\\)|[0-9]{3})([ \\-\\.])[0-9]{3}([ \\-\\.])[0-9]{4}" + BOUNDARY_CHARACTERS;  //NON-NLS
    private static final String IP_ADDRESS_REGEX = BOUNDARY_CHARACTERS + "(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\.){3}(1[0-9]{2}|2[0-4][0-9]|25[0-5]|[1-9][0-9]|[0-9])" + BOUNDARY_CHARACTERS;  //NON-NLS
-    private static final String EMAIL_ADDRESS_REGEX = "(\\{?)[a-zA-Z0-9%+_\\-]+(\\.[a-zA-Z0-9%+_\\-]+)*(\\}?)\\@([a-zA-Z0-9]([a-zA-Z0-9\\-]*[a-zA-Z0-9])?\\.)+[a-zA-Z]{2,4}";  //NON-NLS
-    private static final String URL_REGEX = "(((((h|H)(t|T))|(f|F))(t|T)(p|P)(s|S?)\\:\\/\\/)|(w|W){3,3}\\.)[a-zA-Z0-9\\-\\.]+\\.([a-zA-Z]{2,5})(\\:[0-9]+)*(\\/($|[a-zA-Z0-9\\.\\,\\;\\?\\'\\\\+&amp;%\\$#\\=~_\\-]+))*";  //NON-NLS
+    private static final String EMAIL_ADDRESS_REGEX = "(\\{?)[a-z0-9%+_\\-]+(\\.[a-z0-9%+_\\-]+)*(\\}?)\\@([a-z0-9]([a-z0-9\\-]*[a-z0-9])?\\.)+[a-z]{2,4}";  //NON-NLS
+    private static final String URL_REGEX = "(((((h)(t))|(f))(t)(p)(s?)\\:\\/\\/)|(w){3,3}\\.)[a-z0-9\\-\\.]+\\.([a-z]{2,5})(\\:[0-9]+)*(\\/($|[a-z0-9\\.\\,\\;\\?\\'\\\\+&amp;%\\$#\\=~_\\-]+))*";  //NON-NLS

    /**
     * 12-19 digits, with possible single spaces or dashes in between,
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchUtil.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchUtil.java
@ -129,20 +129,16 @@ class KeywordSearchUtil {
    
    static KeywordSearchQuery getQueryForKeyword(Keyword keyword, KeywordList keywordList) {
        KeywordSearchQuery query = null;
-        if (keyword.searchTermIsLiteral()) {
+        if (keyword.searchTermIsLiteral() && keyword.searchTermIsWholeWord()) {
            // literal, exact match
-            if (keyword.searchTermIsWholeWord()) {
-                query = new LuceneQuery(keywordList, keyword);
-                query.escape();
-            } // literal, substring match
-            else {
-                query = new TermsComponentQuery(keywordList, keyword);
-                query.escape();
-                query.setSubstringQuery();
-            }
-        } // regexp
+            query = new LuceneQuery(keywordList, keyword);
+            query.escape();
+        } // regexp and literal substring match
        else {
            query = new RegexQuery(keywordList, keyword);
+            if (keyword.searchTermIsLiteral()) {
+                query.escape();
+            }
        }
        return query;
    }
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/RegexQuery.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/RegexQuery.java
@ -36,7 +36,6 @@ import org.apache.solr.client.solrj.response.QueryResponse;
 import org.apache.solr.common.SolrDocument;
 import org.apache.solr.common.SolrDocumentList;
 import org.apache.solr.common.params.CursorMarkParams;
-import org.openide.util.Exceptions;
 import org.openide.util.NbBundle;
 import org.sleuthkit.autopsy.casemodule.Case;
 import org.sleuthkit.autopsy.casemodule.NoCurrentCaseException;
@ -44,9 +43,6 @@ import org.sleuthkit.autopsy.coreutils.Logger;
 import org.sleuthkit.autopsy.coreutils.MessageNotifyUtil;
 import org.sleuthkit.autopsy.datamodel.CreditCards;
 import static org.sleuthkit.autopsy.keywordsearch.KeywordSearchSettings.MODULE_NAME;
-import static org.sleuthkit.autopsy.keywordsearch.TermsComponentQuery.CREDIT_CARD_NUM_PATTERN;
-import static org.sleuthkit.autopsy.keywordsearch.TermsComponentQuery.CREDIT_CARD_TRACK2_PATTERN;
-import static org.sleuthkit.autopsy.keywordsearch.TermsComponentQuery.KEYWORD_SEARCH_DOCUMENT_ID;
 import org.sleuthkit.datamodel.AbstractFile;
 import org.sleuthkit.datamodel.Account;
 import org.sleuthkit.datamodel.AccountFileInstance;
@ -93,6 +89,61 @@ final class RegexQuery implements KeywordSearchQuery {
    private static final int MIN_EMAIL_ADDR_LENGTH = 8;
    private static final String SNIPPET_DELIMITER = String.valueOf(Character.toChars(171));

+    /*
+     * The following fields are part of the initial implementation of credit
+     * card account search and should be factored into another class when time
+     * permits.
+     */
+    /**
+     * 12-19 digits, with possible single spaces or dashes in between. First
+     * digit is 2 through 6
+     *
+     */
+    static final Pattern CREDIT_CARD_NUM_PATTERN
+            = Pattern.compile("(?<ccn>[2-6]([ -]?[0-9]){11,18})");
+    static final Pattern CREDIT_CARD_TRACK1_PATTERN = Pattern.compile(
+            /*
+             * Track 1 is alphanumeric.
+             *
+             * This regex matches 12-19 digit ccns embeded in a track 1 formated
+             * string. This regex matches (and extracts groups) even if the
+             * entire track is not present as long as the part that is conforms
+             * to the track format.
+             */
+            "(?:" //begin nested optinal group //NON-NLS
+            + "%?" //optional start sentinal: % //NON-NLS
+            + "B)?" //format code  //NON-NLS
+            + "(?<accountNumber>[2-6]([ -]?[0-9]){11,18})" //12-19 digits, with possible single spaces or dashes in between. first digit is 2,3,4,5, or 6 //NON-NLS
+            + "\\^" //separator //NON-NLS
+            + "(?<name>[^^]{2,26})" //2-26 charachter name, not containing ^ //NON-NLS
+            + "(?:\\^" //separator //NON-NLS
+            + "(?:(?:\\^|(?<expiration>\\d{4}))" //separator or 4 digit expiration YYMM //NON-NLS
+            + "(?:(?:\\^|(?<serviceCode>\\d{3}))"//separator or 3 digit service code //NON-NLS
+            + "(?:(?<discretionary>[^?]*)" // discretionary data not containing separator //NON-NLS
+            + "(?:\\?" // end sentinal: ? //NON-NLS
+            + "(?<LRC>.)" //longitudinal redundancy check //NON-NLS
+            + "?)?)?)?)?)?");//close nested optional groups //NON-NLS
+    static final Pattern CREDIT_CARD_TRACK2_PATTERN = Pattern.compile(
+            /*
+             * Track 2 is numeric plus six punctuation symbolls :;<=>?
+             *
+             * This regex matches 12-19 digit ccns embeded in a track 2 formated
+             * string. This regex matches (and extracts groups) even if the
+             * entire track is not present as long as the part that is conforms
+             * to the track format.
+             *
+             */
+            "[:;<=>?]?" //(optional)start sentinel //NON-NLS
+            + "(?<accountNumber>[2-6]([ -]?[0-9]){11,18})" //12-19 digits, with possible single spaces or dashes in between. first digit is 2,3,4,5, or 6 //NON-NLS
+            + "(?:[:;<=>?]" //separator //NON-NLS
+            + "(?:(?<expiration>\\d{4})" //4 digit expiration date YYMM //NON-NLS
+            + "(?:(?<serviceCode>\\d{3})" //3 digit service code //NON-NLS
+            + "(?:(?<discretionary>[^:;<=>?]*)" //discretionary data, not containing punctuation marks //NON-NLS
+            + "(?:[:;<=>?]" //end sentinel //NON-NLS
+            + "(?<LRC>.)" //longitudinal redundancy check //NON-NLS
+            + "?)?)?)?)?)?"); //close nested optional groups //NON-NLS
+    static final BlackboardAttribute.Type KEYWORD_SEARCH_DOCUMENT_ID = new BlackboardAttribute.Type(ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_DOCUMENT_ID);
+
    private final List<KeywordQueryFilter> filters = new ArrayList<>();
    private final KeywordList keywordList;
    private final Keyword originalKeyword; // The regular expression originalKeyword used to perform the search.
@ -172,10 +223,13 @@ final class RegexQuery implements KeywordSearchQuery {
        // a regular expression search) and .* as anchors (if the query doesn't
        // already have them). We do not add .* if there is a boundary character.
        boolean skipWildcardPrefix = queryStringContainsWildcardPrefix || getQueryString().startsWith("^");
-        boolean skipWildcardSuffix = queryStringContainsWildcardSuffix || 
-                (getQueryString().endsWith("$") && ( ! getQueryString().endsWith("\\$")));
+        boolean skipWildcardSuffix = queryStringContainsWildcardSuffix
+                || (getQueryString().endsWith("$") && (!getQueryString().endsWith("\\$")));
        solrQuery.setQuery((field == null ? Server.Schema.CONTENT_STR.toString() : field) + ":/"
-                + (skipWildcardPrefix ? "" : ".*") + getQueryString()
+                + (skipWildcardPrefix ? "" : ".*")
+                // if the query is for a substring (i.e. literal search term) we want
+                // to escape characters such as ()[]-.
+                + (originalKeyword.searchTermIsLiteral() ? getEscapedQueryString().toLowerCase() : getQueryString().toLowerCase())
                + (skipWildcardSuffix ? "" : ".*") + "/");

        // Set the fields we want to have returned by the query.
@ -241,7 +295,28 @@ final class RegexQuery implements KeywordSearchQuery {

        final Collection<Object> content_str = solrDoc.getFieldValues(Server.Schema.CONTENT_STR.toString());

-        final Pattern pattern = Pattern.compile(keywordString);
+        String searchPattern;
+        if (originalKeyword.searchTermIsLiteral()) {
+            /**
+             * For substring searches, the following pattern was arrived at
+             * through trial and error in an attempt to reproduce the same hits
+             * we were getting when we were using the TermComponent approach.
+             * This basically looks for zero of more word characters followed
+             * optionally by a dot or apostrophe, followed by the quoted
+             * lowercase substring following by zero or more word characters
+             * followed optionally by a dot or apostrophe. The reason that the
+             * dot and apostrophe characters are being handled here is because
+             * the old code used to find hits in domain names (e.g. hacks.ie)
+             * and possessives (e.g. hacker's). This obviously works for English
+             * but is probably not sufficient for other languages.
+             */
+            searchPattern = "[\\w[\\.']]*" + Pattern.quote(keywordString.toLowerCase()) + "[\\w[\\.']]*";
+        } else {
+            searchPattern = keywordString;
+        }
+
+        final Pattern pattern = Pattern.compile(searchPattern, Pattern.CASE_INSENSITIVE);
+
        try {
            for (Object content_obj : content_str) {
                String content = (String) content_obj;
@ -282,6 +357,31 @@ final class RegexQuery implements KeywordSearchQuery {
                        hit = hit.replaceAll("[^0-9]$", "");
                    }

+                    /**
+                     * Boundary characters are removed from the start and end of
+                     * the hit to normalize the hits. This is being done for
+                     * substring searches only at this point. We don't do it for
+                     * real regular expression searches because the user may
+                     * have explicitly included boundary characters in their
+                     * regular expression.
+                     */
+                    if (originalKeyword.searchTermIsLiteral()) {
+                        hit = hit.replaceAll("^" + KeywordSearchList.BOUNDARY_CHARACTERS + "*", "");
+                        hit = hit.replaceAll(KeywordSearchList.BOUNDARY_CHARACTERS + "*$", "");
+
+                        /**
+                         * The Solr StandardTokenizerFactory maximum token
+                         * length is 255 and attempts to search for tokens
+                         * larger than this limit fail when we attempt to
+                         * highlight later. I have't found a programmatic
+                         * mechanism to get this value so I'm hardcoding it
+                         * here.
+                         */
+                        if (hit.length() > 255) {
+                            break;
+                        }
+                    }
+
                    /**
                     * The use of String interning is an optimization to ensure
                     * that we reuse the same keyword hit String object across
@ -340,7 +440,6 @@ final class RegexQuery implements KeywordSearchQuery {
                        }
                    }
                }
-
            }
        } catch (Throwable error) {
            /*
@ -459,8 +558,8 @@ final class RegexQuery implements KeywordSearchQuery {
        }

        /*
-         * Create a "plain vanilla" keyword hit artifact with keyword and
-         * regex attributes
+         * Create a "plain vanilla" keyword hit artifact with keyword and regex
+         * attributes
         */
        BlackboardArtifact newArtifact;
        Collection<BlackboardAttribute> attributes = new ArrayList<>();
@ -486,7 +585,11 @@ final class RegexQuery implements KeywordSearchQuery {
                -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_ASSOCIATED_ARTIFACT, MODULE_NAME, artifactID))
        );

-        attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE, MODULE_NAME, KeywordSearch.QueryType.REGEX.ordinal()));
+        if (originalKeyword.searchTermIsLiteral()) {
+            attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE, MODULE_NAME, KeywordSearch.QueryType.SUBSTRING.ordinal()));
+        } else {
+            attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE, MODULE_NAME, KeywordSearch.QueryType.REGEX.ordinal()));
+        }

        try {
            newArtifact.addAttributes(attributes);
@ -506,14 +609,14 @@ final class RegexQuery implements KeywordSearchQuery {
            return;
        }
        /*
-         * Create a credit card account  with attributes
-         * parsed from the snippet for the hit and looked up based on the
-         * parsed bank identifcation number.
+         * Create a credit card account with attributes parsed from the snippet
+         * for the hit and looked up based on the parsed bank identifcation
+         * number.
         */
        Collection<BlackboardAttribute> attributes = new ArrayList<>();

        Map<BlackboardAttribute.Type, BlackboardAttribute> parsedTrackAttributeMap = new HashMap<>();
-        Matcher matcher = TermsComponentQuery.CREDIT_CARD_TRACK1_PATTERN.matcher(hit.getSnippet());
+        Matcher matcher = CREDIT_CARD_TRACK1_PATTERN.matcher(hit.getSnippet());
        if (matcher.find()) {
            parseTrack1Data(parsedTrackAttributeMap, matcher);
        }
@ -565,8 +668,7 @@ final class RegexQuery implements KeywordSearchQuery {

        /*
         * If the hit is from unused or unallocated space, record the Solr
-         * document id to support showing just the chunk that contained the
-         * hit.
+         * document id to support showing just the chunk that contained the hit.
         */
        if (content instanceof AbstractFile) {
            AbstractFile file = (AbstractFile) content;
@ -589,12 +691,11 @@ final class RegexQuery implements KeywordSearchQuery {

        attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE, MODULE_NAME, KeywordSearch.QueryType.REGEX.ordinal()));

-        
        /*
         * Create an account instance.
         */
        try {
-            AccountFileInstance ccAccountInstance = Case.getCurrentCaseThrows().getSleuthkitCase().getCommunicationsManager().createAccountFileInstance(Account.Type.CREDIT_CARD, ccnAttribute.getValueString() , MODULE_NAME, content);
+            AccountFileInstance ccAccountInstance = Case.getCurrentCaseThrows().getSleuthkitCase().getCommunicationsManager().createAccountFileInstance(Account.Type.CREDIT_CARD, ccnAttribute.getValueString(), MODULE_NAME, content);

            ccAccountInstance.addAttributes(attributes);

@ -604,6 +705,7 @@ final class RegexQuery implements KeywordSearchQuery {
        }

    }
+
    /**
     * Parses the track 2 data from the snippet for a credit card account number
     * hit and turns them into artifact attributes.
@ -649,7 +751,7 @@ final class RegexQuery implements KeywordSearchQuery {
    static private void addAttributeIfNotAlreadyCaptured(Map<BlackboardAttribute.Type, BlackboardAttribute> attributeMap, ATTRIBUTE_TYPE attrType, String groupName, Matcher matcher) {
        BlackboardAttribute.Type type = new BlackboardAttribute.Type(attrType);

-        if( ! attributeMap.containsKey(type)) {
+        if (!attributeMap.containsKey(type)) {
            String value = matcher.group(groupName);
            if (attrType.equals(ATTRIBUTE_TYPE.TSK_CARD_NUMBER)) {
                attributeMap.put(new BlackboardAttribute.Type(ATTRIBUTE_TYPE.TSK_KEYWORD),
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/TermsComponentQuery.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/TermsComponentQuery.java
@ -1,561 +0,0 @@
-/*
- * Autopsy Forensic Browser
- *
- * Copyright 2011-2018 Basis Technology Corp.
- * Contact: carrier <at> sleuthkit <dot> org
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.sleuthkit.autopsy.keywordsearch;
-
-import com.google.common.base.CharMatcher;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.logging.Level;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-import org.apache.commons.lang.StringUtils;
-import org.apache.solr.client.solrj.SolrQuery;
-import org.apache.solr.client.solrj.response.TermsResponse.Term;
-import org.openide.util.Exceptions;
-import org.sleuthkit.autopsy.casemodule.Case;
-import org.sleuthkit.autopsy.casemodule.NoCurrentCaseException;
-import org.sleuthkit.autopsy.coreutils.Logger;
-import org.sleuthkit.autopsy.coreutils.Version;
-import org.sleuthkit.autopsy.datamodel.CreditCards;
-import org.sleuthkit.datamodel.AbstractFile;
-import org.sleuthkit.datamodel.Account;
-import org.sleuthkit.datamodel.AccountFileInstance;
-import org.sleuthkit.datamodel.BlackboardArtifact;
-import org.sleuthkit.datamodel.BlackboardArtifact.ARTIFACT_TYPE;
-import org.sleuthkit.datamodel.BlackboardAttribute;
-import org.sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE;
-import org.sleuthkit.datamodel.Content;
-import org.sleuthkit.datamodel.TskCoreException;
-import org.sleuthkit.datamodel.TskData;
-
-/**
- * Implements a regex query that will be performed as a two step operation. In
- * the first step, the Solr terms component is used to find any terms in the
- * index that match the regex. In the second step, term queries are executed for
- * each matched term to produce the set of keyword hits for the regex.
- */
-final class TermsComponentQuery implements KeywordSearchQuery {
-
-    private static final Logger LOGGER = Logger.getLogger(TermsComponentQuery.class.getName());
-    private static final String MODULE_NAME = KeywordSearchModuleFactory.getModuleName();
-    private static final String SEARCH_HANDLER = "/terms"; //NON-NLS
-    private static final String SEARCH_FIELD = Server.Schema.TEXT.toString();
-    private static final int TERMS_SEARCH_TIMEOUT = 90 * 1000; // Milliseconds
-    private static final String CASE_INSENSITIVE = "case_insensitive"; //NON-NLS
-    private static final boolean DEBUG_FLAG = Version.Type.DEVELOPMENT.equals(Version.getBuildType());
-    private static final int MAX_TERMS_QUERY_RESULTS = 20000;
-
-    private final KeywordList keywordList;
-    private final Keyword originalKeyword;
-    private final List<KeywordQueryFilter> filters = new ArrayList<>(); // THIS APPEARS TO BE UNUSED
-
-    private String searchTerm;
-    private boolean searchTermIsEscaped;
-
-    /*
-     * The following fields are part of the initial implementation of credit
-     * card account search and should be factored into another class when time
-     * permits.
-     */
-    /**
-     * 12-19 digits, with possible single spaces or dashes in between. First
-     * digit is 2 through 6
-     *
-     */
-    static final Pattern CREDIT_CARD_NUM_PATTERN
-            = Pattern.compile("(?<ccn>[2-6]([ -]?[0-9]){11,18})");
-    static final Pattern CREDIT_CARD_TRACK1_PATTERN = Pattern.compile(
-            /*
-             * Track 1 is alphanumeric.
-             *
-             * This regex matches 12-19 digit ccns embeded in a track 1 formated
-             * string. This regex matches (and extracts groups) even if the
-             * entire track is not present as long as the part that is conforms
-             * to the track format.
-             */
-            "(?:" //begin nested optinal group //NON-NLS
-            + "%?" //optional start sentinal: % //NON-NLS
-            + "B)?" //format code  //NON-NLS
-            + "(?<accountNumber>[2-6]([ -]?[0-9]){11,18})" //12-19 digits, with possible single spaces or dashes in between. first digit is 2,3,4,5, or 6 //NON-NLS
-            + "\\^" //separator //NON-NLS
-            + "(?<name>[^^]{2,26})" //2-26 charachter name, not containing ^ //NON-NLS
-            + "(?:\\^" //separator //NON-NLS
-            + "(?:(?:\\^|(?<expiration>\\d{4}))" //separator or 4 digit expiration YYMM //NON-NLS
-            + "(?:(?:\\^|(?<serviceCode>\\d{3}))"//separator or 3 digit service code //NON-NLS
-            + "(?:(?<discretionary>[^?]*)" // discretionary data not containing separator //NON-NLS
-            + "(?:\\?" // end sentinal: ? //NON-NLS
-            + "(?<LRC>.)" //longitudinal redundancy check //NON-NLS
-            + "?)?)?)?)?)?");//close nested optional groups //NON-NLS
-    static final Pattern CREDIT_CARD_TRACK2_PATTERN = Pattern.compile(
-            /*
-             * Track 2 is numeric plus six punctuation symbolls :;<=>?
-             *
-             * This regex matches 12-19 digit ccns embeded in a track 2 formated
-             * string. This regex matches (and extracts groups) even if the
-             * entire track is not present as long as the part that is conforms
-             * to the track format.
-             *
-             */
-            "[:;<=>?]?" //(optional)start sentinel //NON-NLS
-            + "(?<accountNumber>[2-6]([ -]?[0-9]){11,18})" //12-19 digits, with possible single spaces or dashes in between. first digit is 2,3,4,5, or 6 //NON-NLS
-            + "(?:[:;<=>?]" //separator //NON-NLS
-            + "(?:(?<expiration>\\d{4})" //4 digit expiration date YYMM //NON-NLS
-            + "(?:(?<serviceCode>\\d{3})" //3 digit service code //NON-NLS
-            + "(?:(?<discretionary>[^:;<=>?]*)" //discretionary data, not containing punctuation marks //NON-NLS
-            + "(?:[:;<=>?]" //end sentinel //NON-NLS
-            + "(?<LRC>.)" //longitudinal redundancy check //NON-NLS
-            + "?)?)?)?)?)?"); //close nested optional groups //NON-NLS
-    static final BlackboardAttribute.Type KEYWORD_SEARCH_DOCUMENT_ID = new BlackboardAttribute.Type(ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_DOCUMENT_ID);
-
-    /**
-     * Constructs an object that implements a regex query that will be performed
-     * as a two step operation. In the first step, the Solr terms component is
-     * used to find any terms in the index that match the regex. In the second
-     * step, term queries are executed for each matched term to produce the set
-     * of keyword hits for the regex.
-     *
-     * @param keywordList A keyword list that contains the keyword that provides
-     *                    the regex search term for the query.
-     * @param keyword     The keyword that provides the regex search term for
-     *                    the query.
-     */
-    // TODO: Why is both the list and the keyword added to the state of this
-    // object?
-    // TODO: Why is the search term not escaped and given substring wildcards,
-    // if needed, here in the constructor?
-    TermsComponentQuery(KeywordList keywordList, Keyword keyword) {
-        this.keywordList = keywordList;
-        this.originalKeyword = keyword;
-        this.searchTerm = keyword.getSearchTerm();
-    }
-
-    /**
-     * Gets the keyword list that contains the keyword that provides the regex
-     * search term for the query.
-     *
-     * @return The keyword list.
-     */
-    @Override
-    public KeywordList getKeywordList() {
-        return keywordList;
-    }
-
-    /**
-     * Gets the original search term for the query, without any escaping or, if
-     * it is a literal term, the addition of wildcards for a substring search.
-     *
-     * @return The original search term.
-     */
-    @Override
-    public String getQueryString() {
-        return originalKeyword.getSearchTerm();
-    }
-
-    /**
-     * Indicates whether or not the search term for the query is a literal term
-     * that needs have wildcards added to it to make the query a substring
-     * search.
-     *
-     * @return True or false.
-     */
-    @Override
-    public boolean isLiteral() {
-        return false;
-    }
-
-    /**
-     * Adds wild cards to the search term for the query, which makes the query a
-     * substring search, if it is a literal search term.
-     */
-    @Override
-    public void setSubstringQuery() {
-        searchTerm = ".*" + searchTerm + ".*";
-    }
-
-    /**
-     * Escapes the search term for the query.
-     */
-    @Override
-    public void escape() {
-        searchTerm = Pattern.quote(originalKeyword.getSearchTerm());
-        searchTermIsEscaped = true;
-    }
-
-    /**
-     * Indicates whether or not the search term has been escaped yet.
-     *
-     * @return True or false.
-     */
-    @Override
-    public boolean isEscaped() {
-        return searchTermIsEscaped;
-    }
-
-    /**
-     * Gets the escaped search term for the query, assuming it has been escaped
-     * by a call to TermsComponentQuery.escape.
-     *
-     * @return The search term, possibly escaped.
-     */
-    @Override
-    public String getEscapedQueryString() {
-        return this.searchTerm;
-    }
-
-    /**
-     * Indicates whether or not the search term is a valid regex.
-     *
-     * @return True or false.
-     */
-    @Override
-    public boolean validate() {
-        if (searchTerm.isEmpty()) {
-            return false;
-        }
-        try {
-            Pattern.compile(searchTerm);
-            return true;
-        } catch (IllegalArgumentException ex) {
-            return false;
-        }
-    }
-
-    /**
-     * Does nothing, not applicable to a regex query, which always searches a
-     * field created specifically for regex sesarches.
-     *
-     * @param field The name of a Solr document field to search.
-     */
-    @Override
-    public void setField(String field) {
-    }
-
-    /**
-     * Adds a filter to the query.
-     *
-     * @param filter The filter.
-     */
-    // TODO: Document this better.
-    @Override
-    public void addFilter(KeywordQueryFilter filter) {
-        this.filters.add(filter);
-    }
-
-    /**
-     * Executes the regex query as a two step operation. In the first step, the
-     * Solr terms component is used to find any terms in the index that match
-     * the regex. In the second step, term queries are executed for each matched
-     * term to produce the set of keyword hits for the regex.
-     *
-     * @return A QueryResult object or null.
-     *
-     * @throws NoOpenCoreException
-     */
-    @Override
-    public QueryResults performQuery() throws KeywordSearchModuleException, NoOpenCoreException {
-        /*
-         * Do a query using the Solr terms component to find any terms in the
-         * index that match the regex.
-         */
-        final SolrQuery termsQuery = new SolrQuery();
-        termsQuery.setRequestHandler(SEARCH_HANDLER);
-        termsQuery.setTerms(true);
-        termsQuery.setTermsRegexFlag(CASE_INSENSITIVE);
-        termsQuery.setTermsRegex(searchTerm);
-        termsQuery.addTermsField(SEARCH_FIELD);
-        termsQuery.setTimeAllowed(TERMS_SEARCH_TIMEOUT);
-        termsQuery.setShowDebugInfo(DEBUG_FLAG);
-        termsQuery.setTermsLimit(MAX_TERMS_QUERY_RESULTS);
-        List<Term> terms = KeywordSearch.getServer().queryTerms(termsQuery).getTerms(SEARCH_FIELD);
-        /*
-         * Do a term query for each term that matched the regex.
-         */
-        QueryResults results = new QueryResults(this);
-        for (Term term : terms) {
-            /*
-             * If searching for credit card account numbers, do a Luhn check on
-             * the term and discard it if it does not pass.
-             */
-            if (originalKeyword.getArtifactAttributeType() == ATTRIBUTE_TYPE.TSK_CARD_NUMBER) {
-                Matcher matcher = CREDIT_CARD_NUM_PATTERN.matcher(term.getTerm());
-                if (false == matcher.find()
-                        || false == CreditCardValidator.isValidCCN(matcher.group("ccn"))) {
-                    continue;
-                }
-            }
-
-            /*
-             * Do an ordinary query with the escaped term and convert the query
-             * results into a single list of keyword hits without duplicates.
-             *
-             * Note that the filters field appears to be unused. There is an old
-             * comment here, what does it mean? "Note: we can't set filter query
-             * on terms query but setting filter query on fileResults query will
-             * yield the same result." The filter is NOT being added to the term
-             * query.
-             */
-            String escapedTerm = KeywordSearchUtil.escapeLuceneQuery(term.getTerm());
-            LuceneQuery termQuery = new LuceneQuery(keywordList, new Keyword(escapedTerm, true, true));
-            filters.forEach(termQuery::addFilter); // This appears to be unused
-            QueryResults termQueryResult = termQuery.performQuery();
-            Set<KeywordHit> termHits = new HashSet<>();
-            for (Keyword word : termQueryResult.getKeywords()) {
-                termHits.addAll(termQueryResult.getResults(word));
-            }
-            results.addResult(new Keyword(term.getTerm(), false, true, originalKeyword.getListName(), originalKeyword.getOriginalTerm()), new ArrayList<>(termHits));
-        }
-        return results;
-    }
-
-    /**
-     * Posts a keyword hit artifact to the blackboard for a given keyword hit.
-     *
-     * @param content      The text source object for the hit.
-     * @param foundKeyword The keyword that was found by the search, this may be
-     *                     different than the Keyword that was searched if, for
-     *                     example, it was a RegexQuery.
-     * @param hit          The keyword hit.
-     * @param snippet      A snippet from the text that contains the hit.
-     * @param listName     The name of the keyword list that contained the
-     *                     keyword for which the hit was found.
-     *
-     *
-     * @return The newly created artifact or null if there was a problem
-     *         creating it.
-     */
-    @Override
-    public BlackboardArtifact postKeywordHitToBlackboard(Content content, Keyword foundKeyword, KeywordHit hit, String snippet, String listName) {
-
-        /*
-         * CCN hits are handled specially
-         */
-        if (originalKeyword.getArtifactAttributeType() == ATTRIBUTE_TYPE.TSK_CARD_NUMBER) {
-            createCCNAccount(content, hit, snippet, listName);
-            return null;
-        }
-
-        /*
-         * Create a "plain vanilla" keyword hit artifact with keyword and regex
-         * attributes,
-         */
-        BlackboardArtifact newArtifact;
-        Collection<BlackboardAttribute> attributes = new ArrayList<>();
-
-        attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD, MODULE_NAME, foundKeyword.getSearchTerm()));
-        attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP, MODULE_NAME, originalKeyword.getSearchTerm()));
-
-        try {
-            newArtifact = content.newArtifact(ARTIFACT_TYPE.TSK_KEYWORD_HIT);
-
-        } catch (TskCoreException ex) {
-            LOGGER.log(Level.SEVERE, "Error adding artifact for keyword hit to blackboard", ex); //NON-NLS
-            return null;
-        }
-
-        if (StringUtils.isNotBlank(listName)) {
-            attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_SET_NAME, MODULE_NAME, listName));
-        }
-        if (snippet != null) {
-            attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW, MODULE_NAME, snippet));
-        }
-
-        hit.getArtifactID().ifPresent(
-                artifactID -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_ASSOCIATED_ARTIFACT, MODULE_NAME, artifactID))
-        );
-
-        // TermsComponentQuery is now being used exclusively for substring searches.
-        attributes.add(new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE, MODULE_NAME, KeywordSearch.QueryType.SUBSTRING.ordinal()));
-
-        try {
-            newArtifact.addAttributes(attributes);
-            return newArtifact;
-        } catch (TskCoreException e) {
-            LOGGER.log(Level.SEVERE, "Error adding bb attributes for terms search artifact", e); //NON-NLS
-            return null;
-        }
-    }
-
-    private void createCCNAccount(Content content, KeywordHit hit, String snippet, String listName) {
-
-        if (originalKeyword.getArtifactAttributeType() != ATTRIBUTE_TYPE.TSK_CARD_NUMBER) {
-            LOGGER.log(Level.SEVERE, "Keyword hit is not a credit card number"); //NON-NLS
-            return;
-        }
-
-        /*
-         * Create a credit card account with attributes parsed from from the
-         * snippet for the hit and looked up based on the parsed bank
-         * identifcation number.
-         */
-        Collection<BlackboardAttribute> attributes = new ArrayList<>();
-
-        Map<BlackboardAttribute.Type, BlackboardAttribute> parsedTrackAttributeMap = new HashMap<>();
-        Matcher matcher = CREDIT_CARD_TRACK1_PATTERN.matcher(hit.getSnippet());
-        if (matcher.find()) {
-            parseTrack1Data(parsedTrackAttributeMap, matcher);
-        }
-        matcher = CREDIT_CARD_TRACK2_PATTERN.matcher(hit.getSnippet());
-        if (matcher.find()) {
-            parseTrack2Data(parsedTrackAttributeMap, matcher);
-        }
-        final BlackboardAttribute ccnAttribute = parsedTrackAttributeMap.get(new BlackboardAttribute.Type(ATTRIBUTE_TYPE.TSK_CARD_NUMBER));
-        if (ccnAttribute == null || StringUtils.isBlank(ccnAttribute.getValueString())) {
-            if (hit.isArtifactHit()) {
-                LOGGER.log(Level.SEVERE, String.format("Failed to parse credit card account number for artifact keyword hit: term = %s, snippet = '%s', artifact id = %d", searchTerm, hit.getSnippet(), hit.getArtifactID().get())); //NON-NLS
-            } else {
-                long contentId = 0;
-                try {
-                    contentId = hit.getContentID();
-                } catch (TskCoreException ex) {
-                    LOGGER.log(Level.SEVERE, String.format("Failed to content id from keyword hit: term = %s, snippet = '%s'", searchTerm, hit.getSnippet()), ex); //NON-NLS
-                }
-                if (contentId > 0) {
-                    LOGGER.log(Level.SEVERE, String.format("Failed to parse credit card account number for content keyword hit: term = %s, snippet = '%s', object id = %d", searchTerm, hit.getSnippet(), contentId)); //NON-NLS
-                } else {
-                    LOGGER.log(Level.SEVERE, String.format("Failed to parse credit card account number for content keyword hit: term = %s, snippet = '%s'", searchTerm, hit.getSnippet())); //NON-NLS                    
-                }
-            }
-            return;
-        }
-        attributes.addAll(parsedTrackAttributeMap.values());
-
-        /*
-         * Look up the bank name, scheme, etc. attributes for the bank
-         * indentification number (BIN).
-         */
-        final int bin = Integer.parseInt(ccnAttribute.getValueString().substring(0, 8));
-        CreditCards.BankIdentificationNumber binInfo = CreditCards.getBINInfo(bin);
-        if (binInfo != null) {
-            binInfo.getScheme().ifPresent(scheme
-                    -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_CARD_SCHEME, MODULE_NAME, scheme)));
-            binInfo.getCardType().ifPresent(cardType
-                    -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_CARD_TYPE, MODULE_NAME, cardType)));
-            binInfo.getBrand().ifPresent(brand
-                    -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_BRAND_NAME, MODULE_NAME, brand)));
-            binInfo.getBankName().ifPresent(bankName
-                    -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_BANK_NAME, MODULE_NAME, bankName)));
-            binInfo.getBankPhoneNumber().ifPresent(phoneNumber
-                    -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_PHONE_NUMBER, MODULE_NAME, phoneNumber)));
-            binInfo.getBankURL().ifPresent(url
-                    -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_URL, MODULE_NAME, url)));
-            binInfo.getCountry().ifPresent(country
-                    -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_COUNTRY, MODULE_NAME, country)));
-            binInfo.getBankCity().ifPresent(city
-                    -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_CITY, MODULE_NAME, city)));
-        }
-
-        /*
-         * If the hit is from unused or unallocated space, record the Solr
-         * document id to support showing just the chunk that contained the hit.
-         */
-        if (content instanceof AbstractFile) {
-            AbstractFile file = (AbstractFile) content;
-            if (file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.UNUSED_BLOCKS
-                    || file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS) {
-                attributes.add(new BlackboardAttribute(KEYWORD_SEARCH_DOCUMENT_ID, MODULE_NAME, hit.getSolrDocumentId()));
-            }
-        }
-
-        if (StringUtils.isNotBlank(listName)) {
-            attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_SET_NAME, MODULE_NAME, listName));
-        }
-        if (snippet != null) {
-            attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW, MODULE_NAME, snippet));
-        }
-
-        hit.getArtifactID().ifPresent(
-                artifactID -> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_ASSOCIATED_ARTIFACT, MODULE_NAME, artifactID))
-        );
-
-        // TermsComponentQuery is now being used exclusively for substring searches.
-        attributes.add(new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE, MODULE_NAME, KeywordSearch.QueryType.SUBSTRING.ordinal()));
-
-        /*
-         * Create an account.
-         */
-        try {
-            AccountFileInstance ccAccountInstance = Case.getCurrentCaseThrows().getSleuthkitCase().getCommunicationsManager().createAccountFileInstance(Account.Type.CREDIT_CARD, ccnAttribute.getValueString(), MODULE_NAME, content);
-            ccAccountInstance.addAttributes(attributes);
-        } catch (TskCoreException | NoCurrentCaseException ex) {
-            LOGGER.log(Level.SEVERE, "Error creating CCN account instance", ex); //NON-NLS
-        }
-
-    }
-
-    /**
-     * Parses the track 2 data from the snippet for a credit card account number
-     * hit and turns them into artifact attributes.
-     *
-     * @param attributesMap A map of artifact attribute objects, used to avoid
-     *                      creating duplicate attributes.
-     * @param matcher       A matcher for the snippet.
-     */
-    static private void parseTrack2Data(Map<BlackboardAttribute.Type, BlackboardAttribute> attributesMap, Matcher matcher) {
-        addAttributeIfNotAlreadyCaptured(attributesMap, ATTRIBUTE_TYPE.TSK_CARD_NUMBER, "accountNumber", matcher);
-        addAttributeIfNotAlreadyCaptured(attributesMap, ATTRIBUTE_TYPE.TSK_CARD_EXPIRATION, "expiration", matcher);
-        addAttributeIfNotAlreadyCaptured(attributesMap, ATTRIBUTE_TYPE.TSK_CARD_SERVICE_CODE, "serviceCode", matcher);
-        addAttributeIfNotAlreadyCaptured(attributesMap, ATTRIBUTE_TYPE.TSK_CARD_DISCRETIONARY, "discretionary", matcher);
-        addAttributeIfNotAlreadyCaptured(attributesMap, ATTRIBUTE_TYPE.TSK_CARD_LRC, "LRC", matcher);
-    }
-
-    /**
-     * Parses the track 1 data from the snippet for a credit card account number
-     * hit and turns them into artifact attributes. The track 1 data has the
-     * same fields as the track two data, plus the account holder's name.
-     *
-     * @param attributeMap A map of artifact attribute objects, used to avoid
-     *                     creating duplicate attributes.
-     * @param matcher      A matcher for the snippet.
-     */
-    static private void parseTrack1Data(Map<BlackboardAttribute.Type, BlackboardAttribute> attributeMap, Matcher matcher) {
-        parseTrack2Data(attributeMap, matcher);
-        addAttributeIfNotAlreadyCaptured(attributeMap, ATTRIBUTE_TYPE.TSK_NAME_PERSON, "name", matcher);
-    }
-
-    /**
-     * Creates an attribute of the the given type to the given artifact with a
-     * value parsed from the snippet for a credit account number hit.
-     *
-     * @param attributeMap A map of artifact attribute objects, used to avoid
-     *                     creating duplicate attributes.
-     * @param attrType     The type of attribute to create.
-     * @param groupName    The group name of the regular expression that was
-     *                     used to parse the attribute data.
-     * @param matcher      A matcher for the snippet.
-     */
-    static private void addAttributeIfNotAlreadyCaptured(Map<BlackboardAttribute.Type, BlackboardAttribute> attributeMap, ATTRIBUTE_TYPE attrType, String groupName, Matcher matcher) {
-        BlackboardAttribute.Type type = new BlackboardAttribute.Type(attrType);
-        attributeMap.computeIfAbsent(type, (BlackboardAttribute.Type t) -> {
-            String value = matcher.group(groupName);
-            if (attrType.equals(ATTRIBUTE_TYPE.TSK_CARD_NUMBER)) {
-                value = CharMatcher.anyOf(" -").removeFrom(value);
-            }
-            if (StringUtils.isNotBlank(value)) {
-                return new BlackboardAttribute(attrType, MODULE_NAME, value);
-            }
-            return null;
-        });
-    }
-
-}