- add regex matches to blackboard, a separate artifact per each match. Do a check if the term actually hits in that file.

- use new function to add BB attributes in bulk
This commit is contained in:
adam-m 2012-02-10 15:37:10 -05:00
parent baad89a464
commit 55735bafcb

View File

@ -398,14 +398,11 @@ public final class KeywordSearchIngestService implements IngestServiceFsContent
if (query.isLiteral()) {
del = new LuceneQuery(queryStr);
del.escape();
} else {
del = new TermComponentQuery(queryStr);
}
if (query.isLiteral()) {
del.escape();
}
List<FsContent> queryResult = null;
try {
@ -447,6 +444,8 @@ public final class KeywordSearchIngestService implements IngestServiceFsContent
//write results to BB
for (FsContent hitFile : newResults) {
Collection<BlackboardAttribute> attributes = new ArrayList<BlackboardAttribute>();
if (query.isLiteral()) {
BlackboardArtifact bba = null;
try {
bba = hitFile.newArtifact(ARTIFACT_TYPE.TSK_KEYWORD_HIT);
@ -454,7 +453,7 @@ public final class KeywordSearchIngestService implements IngestServiceFsContent
logger.log(Level.INFO, "Error adding bb artifact for keyword hit", e);
continue;
}
if (query.isLiteral()) {
String snippet = null;
try {
snippet = LuceneQuery.getSnippet(queryStr, hitFile.getId());
@ -462,77 +461,89 @@ public final class KeywordSearchIngestService implements IngestServiceFsContent
logger.log(Level.INFO, "Error querying snippet: " + queryStr, e);
}
if (snippet != null) {
//first try to add attr not in bulk so we can catch sql exception and encode the string
try {
bba.addAttribute(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW.getTypeID(), MODULE_NAME, "keyword", snippet));
} catch (Exception e1) {
logger.log(Level.INFO, "Error adding bb snippet attribute, will encode and retry", e1);
try {
//escape in case of garbage so that sql accepts it
snippet = URLEncoder.encode(snippet, "UTF-8");
bba.addAttribute(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW.getTypeID(), MODULE_NAME, "keyword", snippet));
}
catch (Exception e2) {
attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW.getTypeID(), MODULE_NAME, "keyword", snippet));
} catch (Exception e2) {
logger.log(Level.INFO, "Error adding bb snippet attribute", e2);
}
}
}
try {
//keyword
bba.addAttribute(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD.getTypeID(), MODULE_NAME, "keyword", queryStr));
attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD.getTypeID(), MODULE_NAME, "keyword", queryStr));
//bogus
bba.addAttribute(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP.getTypeID(), MODULE_NAME, "keyword", ""));
attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP.getTypeID(), MODULE_NAME, "keyword", ""));
} catch (Exception e) {
logger.log(Level.INFO, "Error adding bb attribute", e);
}
} else {
//regex case
try {
//regex keyword
bba.addAttribute(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP.getTypeID(), MODULE_NAME, "keyword", queryStr));
//bogus
bba.addAttribute(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD.getTypeID(), MODULE_NAME, "keyword", ""));
} catch (Exception e) {
logger.log(Level.INFO, "Error adding bb attribute", e);
}
//build preview query from terms
StringBuilder termSb = new StringBuilder();
Collection<Term> terms = del.getTerms();
int i = 0;
final int total = terms.size();
for (Term term : terms) {
termSb.append(term.getTerm());
if (i < total - 1) {
termSb.append(" "); //OR
}
++i;
}
final String termSnipQuery = termSb.toString();
String snippet = null;
try {
snippet = LuceneQuery.getSnippet(termSnipQuery, hitFile.getId());
} catch (Exception e) {
logger.log(Level.INFO, "Error querying snippet: " + termSnipQuery, e);
}
if (snippet != null) {
try {
bba.addAttributes(attributes);
} catch (TskException e) {
logger.log(Level.INFO, "Error adding bb attributes to artifact", e);
}
} else {
//regex case
//create a separate artifact per regex hit
final Collection<Term> terms = del.getTerms();
for (Term term : terms) {
final String regexMatch = term.getTerm();
//snippet
String snippet = null;
try {
snippet = LuceneQuery.getSnippet(regexMatch, hitFile.getId());
} catch (Exception e) {
logger.log(Level.INFO, "Error querying snippet: " + regexMatch, e);
continue;
}
if (snippet != null && ! snippet.equals("")) {
//there is match actually in this file, create artifact only then
BlackboardArtifact bba = null;
try {
bba = hitFile.newArtifact(ARTIFACT_TYPE.TSK_KEYWORD_HIT);
} catch (Exception e) {
logger.log(Level.INFO, "Error adding bb artifact for keyword hit", e);
continue;
}
try {
//regex keyword
attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP.getTypeID(), MODULE_NAME, "keyword", queryStr));
//regex match
attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD.getTypeID(), MODULE_NAME, "keyword", regexMatch));
} catch (Exception e) {
logger.log(Level.INFO, "Error adding bb attribute", e);
}
try {
//first try to add attr not in bulk so we can catch sql exception and encode the string
bba.addAttribute(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW.getTypeID(), MODULE_NAME, "keyword", snippet));
} catch (Exception e) {
logger.log(Level.INFO, "Error adding bb snippet attribute, will encode and retry", e);
try {
//escape in case of garbage so that sql accepts it
snippet = URLEncoder.encode(snippet, "UTF-8");
bba.addAttribute(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW.getTypeID(), MODULE_NAME, "keyword", snippet));
attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW.getTypeID(), MODULE_NAME, "keyword", snippet));
} catch (Exception e2) {
logger.log(Level.INFO, "Error adding bb snippet attribute", e2);
}
}
bba.addAttributes(attributes);
}
//TODO add all terms that matched to attribute
}
} //for each term
}
} //end regex case
} //for each file hit
//update artifact browser
//TODO use has data evt