From 61a4d52b84059ceb0069602ec1ba3776e04cecbb Mon Sep 17 00:00:00 2001 From: millmanorama Date: Fri, 9 Sep 2016 14:01:36 +0200 Subject: [PATCH] modify regex to match ccn with spaces or dashes in between digits --- .../autopsy/keywordsearch/KeywordSearchList.java | 2 +- .../autopsy/keywordsearch/TermComponentQuery.java | 12 ++++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchList.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchList.java index 5c7044e2ab..ab06fc0cbc 100755 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchList.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchList.java @@ -43,7 +43,7 @@ abstract class KeywordSearchList { private static final String IP_ADDRESS_REGEX = "(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])"; //NON-NLS private static final String EMAIL_ADDRESS_REGEX = "(?=.{8})[a-z0-9%+_-]+(?:\\.[a-z0-9%+_-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\\.)+[a-z]{2,4}(? theLists; //the keyword data diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/TermComponentQuery.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/TermComponentQuery.java index 4ed91c142f..4a4b8cf6d9 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/TermComponentQuery.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/TermComponentQuery.java @@ -19,6 +19,7 @@ // package org.sleuthkit.autopsy.keywordsearch; +import com.google.common.base.CharMatcher; import java.util.ArrayList; import java.util.Collection; import java.util.HashSet; @@ -65,7 +66,7 @@ final class TermComponentQuery implements KeywordSearchQuery { */ private static final Pattern TRACK2_PATTERN = Pattern.compile( "[:;<=>?]?" //(optional)start sentinel //NON-NLS - + "(?[3456]\\d{11,18})" //12-19 digit ccn, first digit is 3, 4, 5, or 6 //NON-NLS + + "(?[3456]([ -]?\\d){11,18})" //12-19 digits, with possible single spaces or dashes in between. first digit is 3,4,5, or 6 //NON-NLS + "(?:[:;<=>?]" //separator //NON-NLS + "(?:(?\\d{4})" //4 digit expiration date YYMM //NON-NLS + "(?:(?\\d{3})" //3 digit service code //NON-NLS @@ -85,7 +86,7 @@ final class TermComponentQuery implements KeywordSearchQuery { "(?:" //begin nested optinal group //NON-NLS + "%?" //optional start sentinal: % //NON-NLS + "B)?" //format code //NON-NLS - + "(?[3456]\\d{11,18})" //12-19 digit ccn, first digit is 3, 4, 5, or 6 //NON-NLS + + "(?[3456]([ -]?\\d){11,18})" //12-19 digits, with possible single spaces or dashes in between. first digit is 3,4,5, or 6 //NON-NLS + "\\^" //separator //NON-NLS + "(?[^^]{2,26})" //2-26 charachter name, not containing ^ //NON-NLS + "(?:\\^" //separator //NON-NLS @@ -95,7 +96,7 @@ final class TermComponentQuery implements KeywordSearchQuery { + "(?:\\?" // end sentinal: ? //NON-NLS + "(?.)" //longitudinal redundancy check //NON-NLS + "?)?)?)?)?)?");//close nested optional groups //NON-NLS - private static final Pattern CCN_PATTERN = Pattern.compile("(?[3456]\\d{11,18})"); //12-19 digit ccn, first digit is 3, 4, 5, or 6 //NON-NLS + private static final Pattern CCN_PATTERN = Pattern.compile("(?[3456]([ -]?\\d){11,18})"); //12-19 digits, with possible single spaces or dashes in between. first digit is 3,4,5, or 6 //NON-NLS private static final LuhnCheckDigit LUHN_CHECK = new LuhnCheckDigit(); //corresponds to field in Solr schema, analyzed with white-space tokenizer only @@ -324,7 +325,7 @@ final class TermComponentQuery implements KeywordSearchQuery { //If the keyword is a credit card number, pass it through luhn validator Matcher matcher = CCN_PATTERN.matcher(term.getTerm()); matcher.find(); - final String ccn = matcher.group("ccn"); + final String ccn = CharMatcher.anyOf(" -").removeFrom(matcher.group("ccn")); if (false == LUHN_CHECK.isValid(ccn)) { continue; //if the hit does not pass the luhn check, skip it. } @@ -384,6 +385,9 @@ final class TermComponentQuery implements KeywordSearchQuery { BlackboardAttribute.Type type = new BlackboardAttribute.Type(attrType); if (artifact.getAttribute(type) == null) { String value = matcher.group(groupName); + if (attrType.equals(ATTRIBUTE_TYPE.TSK_ACCOUNT_NUMBER)) { + value = CharMatcher.anyOf(" -").removeFrom(value); + } if (StringUtils.isNotBlank(value)) { artifact.addAttribute(new BlackboardAttribute(type, MODULE_NAME, value)); }