modify regex to match ccn with spaces or dashes in between digits

This commit is contained in:
millmanorama 2016-09-09 14:01:36 +02:00
parent 6b4e472a50
commit 61a4d52b84
2 changed files with 9 additions and 5 deletions

View File

@ -43,7 +43,7 @@ abstract class KeywordSearchList {
private static final String IP_ADDRESS_REGEX = "(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])"; //NON-NLS private static final String IP_ADDRESS_REGEX = "(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])"; //NON-NLS
private static final String EMAIL_ADDRESS_REGEX = "(?=.{8})[a-z0-9%+_-]+(?:\\.[a-z0-9%+_-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\\.)+[a-z]{2,4}(?<!\\.txt|\\.exe|\\.dll|\\.jpg|\\.xml)"; //NON-NLS private static final String EMAIL_ADDRESS_REGEX = "(?=.{8})[a-z0-9%+_-]+(?:\\.[a-z0-9%+_-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\\.)+[a-z]{2,4}(?<!\\.txt|\\.exe|\\.dll|\\.jpg|\\.xml)"; //NON-NLS
private static final String URL_REGEX = "((((ht|f)tp(s?))\\://)|www\\.)[a-zA-Z0-9\\-\\.]+\\.([a-zA-Z]{2,5})(\\:[0-9]+)*(/($|[a-zA-Z0-9\\.\\,\\;\\?\\'\\\\+&amp;%\\$#\\=~_\\-]+))*"; //NON-NLS private static final String URL_REGEX = "((((ht|f)tp(s?))\\://)|www\\.)[a-zA-Z0-9\\-\\.]+\\.([a-zA-Z]{2,5})(\\:[0-9]+)*(/($|[a-zA-Z0-9\\.\\,\\;\\?\\'\\\\+&amp;%\\$#\\=~_\\-]+))*"; //NON-NLS
private static final String CCN_REGEX = ".*[3456]\\d{11,18}.*"; //12-19 digits. first digit is 3,4,5, or 6 //NON-NLS private static final String CCN_REGEX = ".*[3456]([ -]?\\d){11,18}.*"; //12-19 digits, with possible single spaces or dashes in between. first digit is 3,4,5, or 6 //NON-NLS
protected String filePath; protected String filePath;
Map<String, KeywordList> theLists; //the keyword data Map<String, KeywordList> theLists; //the keyword data

View File

@ -19,6 +19,7 @@
// //
package org.sleuthkit.autopsy.keywordsearch; package org.sleuthkit.autopsy.keywordsearch;
import com.google.common.base.CharMatcher;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.HashSet; import java.util.HashSet;
@ -65,7 +66,7 @@ final class TermComponentQuery implements KeywordSearchQuery {
*/ */
private static final Pattern TRACK2_PATTERN = Pattern.compile( private static final Pattern TRACK2_PATTERN = Pattern.compile(
"[:;<=>?]?" //(optional)start sentinel //NON-NLS "[:;<=>?]?" //(optional)start sentinel //NON-NLS
+ "(?<accountNumber>[3456]\\d{11,18})" //12-19 digit ccn, first digit is 3, 4, 5, or 6 //NON-NLS + "(?<accountNumber>[3456]([ -]?\\d){11,18})" //12-19 digits, with possible single spaces or dashes in between. first digit is 3,4,5, or 6 //NON-NLS
+ "(?:[:;<=>?]" //separator //NON-NLS + "(?:[:;<=>?]" //separator //NON-NLS
+ "(?:(?<expiration>\\d{4})" //4 digit expiration date YYMM //NON-NLS + "(?:(?<expiration>\\d{4})" //4 digit expiration date YYMM //NON-NLS
+ "(?:(?<serviceCode>\\d{3})" //3 digit service code //NON-NLS + "(?:(?<serviceCode>\\d{3})" //3 digit service code //NON-NLS
@ -85,7 +86,7 @@ final class TermComponentQuery implements KeywordSearchQuery {
"(?:" //begin nested optinal group //NON-NLS "(?:" //begin nested optinal group //NON-NLS
+ "%?" //optional start sentinal: % //NON-NLS + "%?" //optional start sentinal: % //NON-NLS
+ "B)?" //format code //NON-NLS + "B)?" //format code //NON-NLS
+ "(?<accountNumber>[3456]\\d{11,18})" //12-19 digit ccn, first digit is 3, 4, 5, or 6 //NON-NLS + "(?<accountNumber>[3456]([ -]?\\d){11,18})" //12-19 digits, with possible single spaces or dashes in between. first digit is 3,4,5, or 6 //NON-NLS
+ "\\^" //separator //NON-NLS + "\\^" //separator //NON-NLS
+ "(?<name>[^^]{2,26})" //2-26 charachter name, not containing ^ //NON-NLS + "(?<name>[^^]{2,26})" //2-26 charachter name, not containing ^ //NON-NLS
+ "(?:\\^" //separator //NON-NLS + "(?:\\^" //separator //NON-NLS
@ -95,7 +96,7 @@ final class TermComponentQuery implements KeywordSearchQuery {
+ "(?:\\?" // end sentinal: ? //NON-NLS + "(?:\\?" // end sentinal: ? //NON-NLS
+ "(?<LRC>.)" //longitudinal redundancy check //NON-NLS + "(?<LRC>.)" //longitudinal redundancy check //NON-NLS
+ "?)?)?)?)?)?");//close nested optional groups //NON-NLS + "?)?)?)?)?)?");//close nested optional groups //NON-NLS
private static final Pattern CCN_PATTERN = Pattern.compile("(?<ccn>[3456]\\d{11,18})"); //12-19 digit ccn, first digit is 3, 4, 5, or 6 //NON-NLS private static final Pattern CCN_PATTERN = Pattern.compile("(?<ccn>[3456]([ -]?\\d){11,18})"); //12-19 digits, with possible single spaces or dashes in between. first digit is 3,4,5, or 6 //NON-NLS
private static final LuhnCheckDigit LUHN_CHECK = new LuhnCheckDigit(); private static final LuhnCheckDigit LUHN_CHECK = new LuhnCheckDigit();
//corresponds to field in Solr schema, analyzed with white-space tokenizer only //corresponds to field in Solr schema, analyzed with white-space tokenizer only
@ -324,7 +325,7 @@ final class TermComponentQuery implements KeywordSearchQuery {
//If the keyword is a credit card number, pass it through luhn validator //If the keyword is a credit card number, pass it through luhn validator
Matcher matcher = CCN_PATTERN.matcher(term.getTerm()); Matcher matcher = CCN_PATTERN.matcher(term.getTerm());
matcher.find(); matcher.find();
final String ccn = matcher.group("ccn"); final String ccn = CharMatcher.anyOf(" -").removeFrom(matcher.group("ccn"));
if (false == LUHN_CHECK.isValid(ccn)) { if (false == LUHN_CHECK.isValid(ccn)) {
continue; //if the hit does not pass the luhn check, skip it. continue; //if the hit does not pass the luhn check, skip it.
} }
@ -384,6 +385,9 @@ final class TermComponentQuery implements KeywordSearchQuery {
BlackboardAttribute.Type type = new BlackboardAttribute.Type(attrType); BlackboardAttribute.Type type = new BlackboardAttribute.Type(attrType);
if (artifact.getAttribute(type) == null) { if (artifact.getAttribute(type) == null) {
String value = matcher.group(groupName); String value = matcher.group(groupName);
if (attrType.equals(ATTRIBUTE_TYPE.TSK_ACCOUNT_NUMBER)) {
value = CharMatcher.anyOf(" -").removeFrom(value);
}
if (StringUtils.isNotBlank(value)) { if (StringUtils.isNotBlank(value)) {
artifact.addAttribute(new BlackboardAttribute(type, MODULE_NAME, value)); artifact.addAttribute(new BlackboardAttribute(type, MODULE_NAME, value));
} }