- add regex matches to blackboard, a separate artifact per each match. Do a check if the term actually hits in that file.

- use new function to add BB attributes in bulk
This commit is contained in:
adam-m 2012-02-10 15:37:10 -05:00
parent baad89a464
commit 55735bafcb

View File

@ -398,14 +398,11 @@ public final class KeywordSearchIngestService implements IngestServiceFsContent
if (query.isLiteral()) { if (query.isLiteral()) {
del = new LuceneQuery(queryStr); del = new LuceneQuery(queryStr);
del.escape();
} else { } else {
del = new TermComponentQuery(queryStr); del = new TermComponentQuery(queryStr);
} }
if (query.isLiteral()) {
del.escape();
}
List<FsContent> queryResult = null; List<FsContent> queryResult = null;
try { try {
@ -447,14 +444,16 @@ public final class KeywordSearchIngestService implements IngestServiceFsContent
//write results to BB //write results to BB
for (FsContent hitFile : newResults) { for (FsContent hitFile : newResults) {
BlackboardArtifact bba = null; Collection<BlackboardAttribute> attributes = new ArrayList<BlackboardAttribute>();
try {
bba = hitFile.newArtifact(ARTIFACT_TYPE.TSK_KEYWORD_HIT);
} catch (Exception e) {
logger.log(Level.INFO, "Error adding bb artifact for keyword hit", e);
continue;
}
if (query.isLiteral()) { if (query.isLiteral()) {
BlackboardArtifact bba = null;
try {
bba = hitFile.newArtifact(ARTIFACT_TYPE.TSK_KEYWORD_HIT);
} catch (Exception e) {
logger.log(Level.INFO, "Error adding bb artifact for keyword hit", e);
continue;
}
String snippet = null; String snippet = null;
try { try {
snippet = LuceneQuery.getSnippet(queryStr, hitFile.getId()); snippet = LuceneQuery.getSnippet(queryStr, hitFile.getId());
@ -462,77 +461,89 @@ public final class KeywordSearchIngestService implements IngestServiceFsContent
logger.log(Level.INFO, "Error querying snippet: " + queryStr, e); logger.log(Level.INFO, "Error querying snippet: " + queryStr, e);
} }
if (snippet != null) { if (snippet != null) {
//first try to add attr not in bulk so we can catch sql exception and encode the string
try { try {
bba.addAttribute(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW.getTypeID(), MODULE_NAME, "keyword", snippet)); bba.addAttribute(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW.getTypeID(), MODULE_NAME, "keyword", snippet));
} catch (Exception e1) { } catch (Exception e1) {
logger.log(Level.INFO, "Error adding bb snippet attribute, will encode and retry", e1);
try { try {
//escape in case of garbage so that sql accepts it //escape in case of garbage so that sql accepts it
snippet = URLEncoder.encode(snippet, "UTF-8"); snippet = URLEncoder.encode(snippet, "UTF-8");
bba.addAttribute(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW.getTypeID(), MODULE_NAME, "keyword", snippet)); attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW.getTypeID(), MODULE_NAME, "keyword", snippet));
} } catch (Exception e2) {
catch (Exception e2) {
logger.log(Level.INFO, "Error adding bb snippet attribute", e2); logger.log(Level.INFO, "Error adding bb snippet attribute", e2);
} }
} }
} }
try { try {
//keyword //keyword
bba.addAttribute(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD.getTypeID(), MODULE_NAME, "keyword", queryStr)); attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD.getTypeID(), MODULE_NAME, "keyword", queryStr));
//bogus //bogus
bba.addAttribute(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP.getTypeID(), MODULE_NAME, "keyword", "")); attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP.getTypeID(), MODULE_NAME, "keyword", ""));
} catch (Exception e) { } catch (Exception e) {
logger.log(Level.INFO, "Error adding bb attribute", e); logger.log(Level.INFO, "Error adding bb attribute", e);
} }
try {
bba.addAttributes(attributes);
} catch (TskException e) {
logger.log(Level.INFO, "Error adding bb attributes to artifact", e);
}
} else { } else {
//regex case //regex case
try { //create a separate artifact per regex hit
//regex keyword
bba.addAttribute(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP.getTypeID(), MODULE_NAME, "keyword", queryStr)); final Collection<Term> terms = del.getTerms();
//bogus
bba.addAttribute(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD.getTypeID(), MODULE_NAME, "keyword", ""));
} catch (Exception e) {
logger.log(Level.INFO, "Error adding bb attribute", e);
}
//build preview query from terms
StringBuilder termSb = new StringBuilder();
Collection<Term> terms = del.getTerms();
int i = 0;
final int total = terms.size();
for (Term term : terms) { for (Term term : terms) {
termSb.append(term.getTerm()); final String regexMatch = term.getTerm();
if (i < total - 1) { //snippet
termSb.append(" "); //OR String snippet = null;
}
++i;
}
final String termSnipQuery = termSb.toString();
String snippet = null;
try {
snippet = LuceneQuery.getSnippet(termSnipQuery, hitFile.getId());
} catch (Exception e) {
logger.log(Level.INFO, "Error querying snippet: " + termSnipQuery, e);
}
if (snippet != null) {
try { try {
bba.addAttribute(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW.getTypeID(), MODULE_NAME, "keyword", snippet)); snippet = LuceneQuery.getSnippet(regexMatch, hitFile.getId());
} catch (Exception e) { } catch (Exception e) {
logger.log(Level.INFO, "Error adding bb snippet attribute, will encode and retry", e); logger.log(Level.INFO, "Error querying snippet: " + regexMatch, e);
try { continue;
//escape in case of garbage so that sql accepts it
snippet = URLEncoder.encode(snippet, "UTF-8");
bba.addAttribute(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW.getTypeID(), MODULE_NAME, "keyword", snippet));
} catch (Exception e2) {
logger.log(Level.INFO, "Error adding bb snippet attribute", e2);
}
} }
}
//TODO add all terms that matched to attribute if (snippet != null && ! snippet.equals("")) {
} //there is match actually in this file, create artifact only then
BlackboardArtifact bba = null;
try {
bba = hitFile.newArtifact(ARTIFACT_TYPE.TSK_KEYWORD_HIT);
} catch (Exception e) {
logger.log(Level.INFO, "Error adding bb artifact for keyword hit", e);
continue;
}
} try {
//regex keyword
attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP.getTypeID(), MODULE_NAME, "keyword", queryStr));
//regex match
attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD.getTypeID(), MODULE_NAME, "keyword", regexMatch));
} catch (Exception e) {
logger.log(Level.INFO, "Error adding bb attribute", e);
}
try {
//first try to add attr not in bulk so we can catch sql exception and encode the string
bba.addAttribute(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW.getTypeID(), MODULE_NAME, "keyword", snippet));
} catch (Exception e) {
try {
//escape in case of garbage so that sql accepts it
snippet = URLEncoder.encode(snippet, "UTF-8");
attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW.getTypeID(), MODULE_NAME, "keyword", snippet));
} catch (Exception e2) {
logger.log(Level.INFO, "Error adding bb snippet attribute", e2);
}
}
bba.addAttributes(attributes);
}
} //for each term
} //end regex case
} //for each file hit
//update artifact browser //update artifact browser
//TODO use has data evt //TODO use has data evt