Do CR queries in batches

This commit is contained in:
Ann Priestman 2019-07-18 13:53:15 -04:00
parent 6e77f15116
commit 009b8c540f
5 changed files with 172 additions and 26 deletions

View File

@ -2218,6 +2218,45 @@ abstract class AbstractSqlEamDb implements EamDb {
}
}
/**
* Process a SELECT query
*
* @param selectClause query string to execute
* @param instanceTableCallback callback to process the instance
*
* @throws EamDbException
*/
@Override
public void processSelectClause(String selectClause, InstanceTableCallback instanceTableCallback) throws EamDbException {
if (instanceTableCallback == null) {
throw new EamDbException("Callback interface is null");
}
if (selectClause == null) {
throw new EamDbException("Select clause is null");
}
Connection conn = connect();
PreparedStatement preparedStatement = null;
ResultSet resultSet = null;
StringBuilder sql = new StringBuilder(300);
sql.append("select ")
.append(selectClause);
try {
preparedStatement = conn.prepareStatement(sql.toString());
resultSet = preparedStatement.executeQuery();
instanceTableCallback.process(resultSet);
} catch (SQLException ex) {
throw new EamDbException("Error running query", ex);
} finally {
EamDbUtil.closeStatement(preparedStatement);
EamDbUtil.closeResultSet(resultSet);
EamDbUtil.closeConnection(conn);
}
}
@Override
public EamOrganization newOrganization(EamOrganization eamOrg) throws EamDbException {
if (eamOrg == null) {

View File

@ -780,4 +780,13 @@ public interface EamDb {
*/
void processInstanceTableWhere(CorrelationAttributeInstance.Type type, String whereClause, InstanceTableCallback instanceTableCallback) throws EamDbException;
/**
* Process a SELECT query
*
* @param selectClause query string to execute
* @param instanceTableCallback callback to process the instance
*
* @throws EamDbException
*/
public void processSelectClause(String selectClause, InstanceTableCallback instanceTableCallback) throws EamDbException;
}

View File

@ -804,6 +804,24 @@ final class SqliteEamDb extends AbstractSqlEamDb {
}
}
/**
* Process a SELECT query
*
* @param selectClause query string to execute
* @param instanceTableCallback callback to process the instance
*
* @throws EamDbException
*/
@Override
public void processSelectClause(String selectClause, InstanceTableCallback instanceTableCallback) throws EamDbException {
try {
acquireSharedLock();
super.processSelectClause(selectClause, instanceTableCallback);
} finally {
releaseSharedLock();
}
}
/**
* Check whether a reference set with the given name/version is in the
* central repo. Used to check for name collisions when creating reference

View File

@ -21,18 +21,31 @@ package org.sleuthkit.autopsy.filequery;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.TreeMap;
import java.util.logging.Level;
import java.util.stream.Collectors;
import org.openide.util.NbBundle;
import org.sleuthkit.autopsy.centralrepository.datamodel.CorrelationAttributeInstance;
import org.sleuthkit.autopsy.centralrepository.datamodel.CorrelationAttributeNormalizationException;
import org.sleuthkit.autopsy.centralrepository.datamodel.EamDb;
import org.sleuthkit.autopsy.centralrepository.datamodel.EamDbException;
import org.sleuthkit.autopsy.centralrepository.datamodel.EamDbUtil;
import org.sleuthkit.autopsy.centralrepository.datamodel.InstanceTableCallback;
import org.sleuthkit.autopsy.commonpropertiessearch.AbstractCommonAttributeInstance;
import org.sleuthkit.autopsy.commonpropertiessearch.CentralRepoCommonAttributeInstance;
import org.sleuthkit.autopsy.commonpropertiessearch.CommonAttributeValue;
import org.sleuthkit.autopsy.commonpropertiessearch.CommonAttributeValueList;
import org.sleuthkit.autopsy.coreutils.Logger;
import org.sleuthkit.autopsy.filequery.FileSearchData.FileSize;
import org.sleuthkit.autopsy.filequery.FileSearchData.FileType;
@ -44,6 +57,7 @@ import org.sleuthkit.datamodel.BlackboardAttribute;
import org.sleuthkit.datamodel.CaseDbAccessManager;
import org.sleuthkit.datamodel.Content;
import org.sleuthkit.datamodel.ContentTag;
import org.sleuthkit.datamodel.HashUtility;
import org.sleuthkit.datamodel.SleuthkitCase;
import org.sleuthkit.datamodel.TskCoreException;
@ -773,6 +787,8 @@ class FileSearch {
*/
static class FrequencyAttribute extends AttributeType {
static final int BATCH_SIZE = 50; // Number of hashes to look up at one time
@Override
GroupKey getGroupKey(ResultFile file) {
return new FrequencyGroupKey(file);
@ -786,21 +802,91 @@ class FileSearch {
throw new FileSearchException("Central Repository is not enabled - can not add frequency data"); // NON-NLS
}
// We'll make this more efficient later - for now, add the frequency of each file individually
// Set frequency in batches
Set<String> hashesToLookUp = new HashSet<>();
List<ResultFile> currentFiles = new ArrayList<>();
for (ResultFile file : files) {
if (file.getFrequency() == Frequency.UNKNOWN) {
if (file.getFrequency() == Frequency.UNKNOWN
&& file.getAbstractFile().getMd5Hash() != null
&& !file.getAbstractFile().getMd5Hash().isEmpty()) {
hashesToLookUp.add(file.getAbstractFile().getMd5Hash());
currentFiles.add(file);
}
if (hashesToLookUp.size() >= BATCH_SIZE) {
computeFrequency(hashesToLookUp, currentFiles, centralRepoDb);
hashesToLookUp.clear();
currentFiles.clear();
}
}
computeFrequency(hashesToLookUp, currentFiles, centralRepoDb);
}
}
/**
* Computes the CR frequency of all the given hashes and updates the list of files.
*
* @param hashesToLookUp Hashes to find the frequency of
* @param currentFiles List of files to update with frequencies
*/
private static void computeFrequency(Set<String> hashesToLookUp, List<ResultFile> currentFiles, EamDb centralRepoDb) {
if (hashesToLookUp.isEmpty()) {
return;
}
String hashes = String.join("','", hashesToLookUp);
hashes = "'" + hashes + "'";
try {
if (file.getAbstractFile().getMd5Hash() != null && !file.getAbstractFile().getMd5Hash().isEmpty()) {
CorrelationAttributeInstance.Type attributeType = centralRepoDb.getCorrelationTypeById(CorrelationAttributeInstance.FILES_TYPE_ID);
long count = centralRepoDb.getCountUniqueCaseDataSourceTuplesHavingTypeValue(attributeType, file.getAbstractFile().getMd5Hash());
String tableName = EamDbUtil.correlationTypeToInstanceTableName(attributeType);
String selectClause = " value, COUNT(value) FROM "
+ "(SELECT DISTINCT case_id, data_source_id, value FROM " + tableName
+ " WHERE value IN ("
+ hashes
+ ")) AS foo GROUP BY value";
FrequencyCallback callback = new FrequencyCallback(currentFiles);
centralRepoDb.processSelectClause(selectClause, callback);
} catch (EamDbException ex) {
logger.log(Level.WARNING, "Error getting frequency counts from Central Repository", ex); // NON-NLS
}
}
/**
* Callback to use with findInterCaseValuesByCount which generates a list of
* values for common property search
*/
private static class FrequencyCallback implements InstanceTableCallback {
private final List<ResultFile> files;
private FrequencyCallback(List<ResultFile> files) {
this.files = files;
}
@Override
public void process(ResultSet resultSet) {
try {
while (resultSet.next()) {
String hash = resultSet.getString(1);
int count = resultSet.getInt(2);
for (Iterator<ResultFile> iterator = files.iterator(); iterator.hasNext();) {
ResultFile file = iterator.next();
if (file.getAbstractFile().getMd5Hash().equalsIgnoreCase(hash)) {
file.setFrequency(Frequency.fromCount(count));
}
} catch (EamDbException | CorrelationAttributeNormalizationException ex) {
throw new FileSearchException("Error looking up central repository frequency for file with ID "
+ file.getAbstractFile().getId(), ex); // NON-NLS
iterator.remove();
}
}
}
} catch (SQLException ex) {
logger.log(Level.WARNING, "Error getting frequency counts from Central Repository", ex); // NON-NLS
}
}
}

View File

@ -506,22 +506,16 @@ class FileSearchFiltering {
throw new FileSearchException("Can not run on empty list"); // NON-NLS
}
// We can try to make this more efficient later - for now, check the frequency of each file individually
// Set the frequency for each file
FileSearch.FrequencyAttribute freqAttr = new FileSearch.FrequencyAttribute();
freqAttr.addAttributeToResultFiles(currentResults, caseDb, centralRepoDb);
// If the frequency matches the filter, add the file to the results
List<ResultFile> frequencyResults = new ArrayList<>();
for (ResultFile file : currentResults) {
try {
if (file.getAbstractFile().getMd5Hash() != null && ! file.getAbstractFile().getMd5Hash().isEmpty()) {
CorrelationAttributeInstance.Type attributeType = centralRepoDb.getCorrelationTypeById(CorrelationAttributeInstance.FILES_TYPE_ID);
long count = centralRepoDb.getCountUniqueCaseDataSourceTuplesHavingTypeValue(attributeType, file.getAbstractFile().getMd5Hash());
file.setFrequency(Frequency.fromCount(count));
}
if (frequencies.contains(file.getFrequency())) {
frequencyResults.add(file);
}
} catch (EamDbException | CorrelationAttributeNormalizationException ex) {
throw new FileSearchException("Error querying central repository", ex); // NON-NLS
}
}
return frequencyResults;
}