Do CR queries in batches

This commit is contained in:
Ann Priestman 2019-07-18 13:53:15 -04:00
parent 6e77f15116
commit 009b8c540f
5 changed files with 172 additions and 26 deletions

View File

@ -2217,6 +2217,45 @@ abstract class AbstractSqlEamDb implements EamDb {
EamDbUtil.closeConnection(conn); EamDbUtil.closeConnection(conn);
} }
} }
/**
* Process a SELECT query
*
* @param selectClause query string to execute
* @param instanceTableCallback callback to process the instance
*
* @throws EamDbException
*/
@Override
public void processSelectClause(String selectClause, InstanceTableCallback instanceTableCallback) throws EamDbException {
if (instanceTableCallback == null) {
throw new EamDbException("Callback interface is null");
}
if (selectClause == null) {
throw new EamDbException("Select clause is null");
}
Connection conn = connect();
PreparedStatement preparedStatement = null;
ResultSet resultSet = null;
StringBuilder sql = new StringBuilder(300);
sql.append("select ")
.append(selectClause);
try {
preparedStatement = conn.prepareStatement(sql.toString());
resultSet = preparedStatement.executeQuery();
instanceTableCallback.process(resultSet);
} catch (SQLException ex) {
throw new EamDbException("Error running query", ex);
} finally {
EamDbUtil.closeStatement(preparedStatement);
EamDbUtil.closeResultSet(resultSet);
EamDbUtil.closeConnection(conn);
}
}
@Override @Override
public EamOrganization newOrganization(EamOrganization eamOrg) throws EamDbException { public EamOrganization newOrganization(EamOrganization eamOrg) throws EamDbException {

View File

@ -780,4 +780,13 @@ public interface EamDb {
*/ */
void processInstanceTableWhere(CorrelationAttributeInstance.Type type, String whereClause, InstanceTableCallback instanceTableCallback) throws EamDbException; void processInstanceTableWhere(CorrelationAttributeInstance.Type type, String whereClause, InstanceTableCallback instanceTableCallback) throws EamDbException;
/**
* Process a SELECT query
*
* @param selectClause query string to execute
* @param instanceTableCallback callback to process the instance
*
* @throws EamDbException
*/
public void processSelectClause(String selectClause, InstanceTableCallback instanceTableCallback) throws EamDbException;
} }

View File

@ -804,6 +804,24 @@ final class SqliteEamDb extends AbstractSqlEamDb {
} }
} }
/**
* Process a SELECT query
*
* @param selectClause query string to execute
* @param instanceTableCallback callback to process the instance
*
* @throws EamDbException
*/
@Override
public void processSelectClause(String selectClause, InstanceTableCallback instanceTableCallback) throws EamDbException {
try {
acquireSharedLock();
super.processSelectClause(selectClause, instanceTableCallback);
} finally {
releaseSharedLock();
}
}
/** /**
* Check whether a reference set with the given name/version is in the * Check whether a reference set with the given name/version is in the
* central repo. Used to check for name collisions when creating reference * central repo. Used to check for name collisions when creating reference

View File

@ -21,18 +21,31 @@ package org.sleuthkit.autopsy.filequery;
import java.sql.ResultSet; import java.sql.ResultSet;
import java.sql.SQLException; import java.sql.SQLException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap; import java.util.LinkedHashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Objects; import java.util.Objects;
import java.util.Set;
import java.util.TreeMap;
import java.util.logging.Level; import java.util.logging.Level;
import java.util.stream.Collectors;
import org.openide.util.NbBundle; import org.openide.util.NbBundle;
import org.sleuthkit.autopsy.centralrepository.datamodel.CorrelationAttributeInstance; import org.sleuthkit.autopsy.centralrepository.datamodel.CorrelationAttributeInstance;
import org.sleuthkit.autopsy.centralrepository.datamodel.CorrelationAttributeNormalizationException; import org.sleuthkit.autopsy.centralrepository.datamodel.CorrelationAttributeNormalizationException;
import org.sleuthkit.autopsy.centralrepository.datamodel.EamDb; import org.sleuthkit.autopsy.centralrepository.datamodel.EamDb;
import org.sleuthkit.autopsy.centralrepository.datamodel.EamDbException; import org.sleuthkit.autopsy.centralrepository.datamodel.EamDbException;
import org.sleuthkit.autopsy.centralrepository.datamodel.EamDbUtil;
import org.sleuthkit.autopsy.centralrepository.datamodel.InstanceTableCallback;
import org.sleuthkit.autopsy.commonpropertiessearch.AbstractCommonAttributeInstance;
import org.sleuthkit.autopsy.commonpropertiessearch.CentralRepoCommonAttributeInstance;
import org.sleuthkit.autopsy.commonpropertiessearch.CommonAttributeValue;
import org.sleuthkit.autopsy.commonpropertiessearch.CommonAttributeValueList;
import org.sleuthkit.autopsy.coreutils.Logger; import org.sleuthkit.autopsy.coreutils.Logger;
import org.sleuthkit.autopsy.filequery.FileSearchData.FileSize; import org.sleuthkit.autopsy.filequery.FileSearchData.FileSize;
import org.sleuthkit.autopsy.filequery.FileSearchData.FileType; import org.sleuthkit.autopsy.filequery.FileSearchData.FileType;
@ -44,6 +57,7 @@ import org.sleuthkit.datamodel.BlackboardAttribute;
import org.sleuthkit.datamodel.CaseDbAccessManager; import org.sleuthkit.datamodel.CaseDbAccessManager;
import org.sleuthkit.datamodel.Content; import org.sleuthkit.datamodel.Content;
import org.sleuthkit.datamodel.ContentTag; import org.sleuthkit.datamodel.ContentTag;
import org.sleuthkit.datamodel.HashUtility;
import org.sleuthkit.datamodel.SleuthkitCase; import org.sleuthkit.datamodel.SleuthkitCase;
import org.sleuthkit.datamodel.TskCoreException; import org.sleuthkit.datamodel.TskCoreException;
@ -772,6 +786,8 @@ class FileSearch {
* Attribute for grouping/sorting by frequency in the central repository * Attribute for grouping/sorting by frequency in the central repository
*/ */
static class FrequencyAttribute extends AttributeType { static class FrequencyAttribute extends AttributeType {
static final int BATCH_SIZE = 50; // Number of hashes to look up at one time
@Override @Override
GroupKey getGroupKey(ResultFile file) { GroupKey getGroupKey(ResultFile file) {
@ -779,31 +795,101 @@ class FileSearch {
} }
@Override @Override
void addAttributeToResultFiles(List<ResultFile> files, SleuthkitCase caseDb, void addAttributeToResultFiles(List<ResultFile> files, SleuthkitCase caseDb,
EamDb centralRepoDb) throws FileSearchException { EamDb centralRepoDb) throws FileSearchException {
if (centralRepoDb == null) { if (centralRepoDb == null) {
throw new FileSearchException("Central Repository is not enabled - can not add frequency data"); // NON-NLS throw new FileSearchException("Central Repository is not enabled - can not add frequency data"); // NON-NLS
} }
// We'll make this more efficient later - for now, add the frequency of each file individually // Set frequency in batches
Set<String> hashesToLookUp = new HashSet<>();
List<ResultFile> currentFiles = new ArrayList<>();
for (ResultFile file : files) { for (ResultFile file : files) {
if (file.getFrequency() == Frequency.UNKNOWN) { if (file.getFrequency() == Frequency.UNKNOWN
try { && file.getAbstractFile().getMd5Hash() != null
if (file.getAbstractFile().getMd5Hash() != null && !file.getAbstractFile().getMd5Hash().isEmpty()) { && !file.getAbstractFile().getMd5Hash().isEmpty()) {
CorrelationAttributeInstance.Type attributeType = centralRepoDb.getCorrelationTypeById(CorrelationAttributeInstance.FILES_TYPE_ID); hashesToLookUp.add(file.getAbstractFile().getMd5Hash());
long count = centralRepoDb.getCountUniqueCaseDataSourceTuplesHavingTypeValue(attributeType, file.getAbstractFile().getMd5Hash()); currentFiles.add(file);
}
if (hashesToLookUp.size() >= BATCH_SIZE) {
computeFrequency(hashesToLookUp, currentFiles, centralRepoDb);
hashesToLookUp.clear();
currentFiles.clear();
}
}
computeFrequency(hashesToLookUp, currentFiles, centralRepoDb);
}
}
/**
* Computes the CR frequency of all the given hashes and updates the list of files.
*
* @param hashesToLookUp Hashes to find the frequency of
* @param currentFiles List of files to update with frequencies
*/
private static void computeFrequency(Set<String> hashesToLookUp, List<ResultFile> currentFiles, EamDb centralRepoDb) {
if (hashesToLookUp.isEmpty()) {
return;
}
String hashes = String.join("','", hashesToLookUp);
hashes = "'" + hashes + "'";
try {
CorrelationAttributeInstance.Type attributeType = centralRepoDb.getCorrelationTypeById(CorrelationAttributeInstance.FILES_TYPE_ID);
String tableName = EamDbUtil.correlationTypeToInstanceTableName(attributeType);
String selectClause = " value, COUNT(value) FROM "
+ "(SELECT DISTINCT case_id, data_source_id, value FROM " + tableName
+ " WHERE value IN ("
+ hashes
+ ")) AS foo GROUP BY value";
FrequencyCallback callback = new FrequencyCallback(currentFiles);
centralRepoDb.processSelectClause(selectClause, callback);
} catch (EamDbException ex) {
logger.log(Level.WARNING, "Error getting frequency counts from Central Repository", ex); // NON-NLS
}
}
/**
* Callback to use with findInterCaseValuesByCount which generates a list of
* values for common property search
*/
private static class FrequencyCallback implements InstanceTableCallback {
private final List<ResultFile> files;
private FrequencyCallback(List<ResultFile> files) {
this.files = files;
}
@Override
public void process(ResultSet resultSet) {
try {
while (resultSet.next()) {
String hash = resultSet.getString(1);
int count = resultSet.getInt(2);
for (Iterator<ResultFile> iterator = files.iterator(); iterator.hasNext();) {
ResultFile file = iterator.next();
if (file.getAbstractFile().getMd5Hash().equalsIgnoreCase(hash)) {
file.setFrequency(Frequency.fromCount(count)); file.setFrequency(Frequency.fromCount(count));
iterator.remove();
} }
} catch (EamDbException | CorrelationAttributeNormalizationException ex) {
throw new FileSearchException("Error looking up central repository frequency for file with ID "
+ file.getAbstractFile().getId(), ex); // NON-NLS
} }
} }
} catch (SQLException ex) {
logger.log(Level.WARNING, "Error getting frequency counts from Central Repository", ex); // NON-NLS
} }
} }
} }
/** /**
* Key representing a central repository frequency group * Key representing a central repository frequency group
*/ */

View File

@ -506,21 +506,15 @@ class FileSearchFiltering {
throw new FileSearchException("Can not run on empty list"); // NON-NLS throw new FileSearchException("Can not run on empty list"); // NON-NLS
} }
// We can try to make this more efficient later - for now, check the frequency of each file individually // Set the frequency for each file
FileSearch.FrequencyAttribute freqAttr = new FileSearch.FrequencyAttribute();
freqAttr.addAttributeToResultFiles(currentResults, caseDb, centralRepoDb);
// If the frequency matches the filter, add the file to the results
List<ResultFile> frequencyResults = new ArrayList<>(); List<ResultFile> frequencyResults = new ArrayList<>();
for (ResultFile file : currentResults) { for (ResultFile file : currentResults) {
try { if (frequencies.contains(file.getFrequency())) {
if (file.getAbstractFile().getMd5Hash() != null && ! file.getAbstractFile().getMd5Hash().isEmpty()) { frequencyResults.add(file);
CorrelationAttributeInstance.Type attributeType = centralRepoDb.getCorrelationTypeById(CorrelationAttributeInstance.FILES_TYPE_ID);
long count = centralRepoDb.getCountUniqueCaseDataSourceTuplesHavingTypeValue(attributeType, file.getAbstractFile().getMd5Hash());
file.setFrequency(Frequency.fromCount(count));
}
if (frequencies.contains(file.getFrequency())) {
frequencyResults.add(file);
}
} catch (EamDbException | CorrelationAttributeNormalizationException ex) {
throw new FileSearchException("Error querying central repository", ex); // NON-NLS
} }
} }
return frequencyResults; return frequencyResults;