Limited the changes to IP and email regexes only

This commit is contained in:
Eugene Livis 2021-09-20 17:10:29 -04:00
parent b87c8b876c
commit f780b92af5

View File

@ -364,15 +364,6 @@ final class RegexQuery implements KeywordSearchQuery {
}
offset = hitMatcher.end();
if (offset > 1) {
/* NOTE: some of our regex patterns look for boundary characters immediately before and
* after the keyword hit (e.g. PHONE_NUMBER_REGEX, IP_ADDRESS_REGEX). After a match, Java
* pattern mather re-starts at the first character not matched by the previous match. This
* basically requires two boundary characters to be present between each pattern match.
* To mitigate this we are resetting the offest one character back.
*/
offset--;
}
final ATTRIBUTE_TYPE artifactAttributeType = originalKeyword.getArtifactAttributeType();
// We attempt to reduce false positives for phone numbers and IP address hits
@ -393,6 +384,20 @@ final class RegexQuery implements KeywordSearchQuery {
}
// Replace all non numeric at the end of the hit.
hit = hit.replaceAll("[^0-9]$", "");
if (offset > 1) {
/*
* NOTE: our IP and phone number regex patterns look for
* boundary characters immediately before and after
* the keyword hit. After a match, Java pattern
* mather re-starts at the first character not
* matched by the previous match. This basically
* requires two boundary characters to be present
* between each pattern match. To mitigate this we
* are resetting the offest one character back.
*/
offset--;
}
}
/**