mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-18 02:27:42 +00:00
Do CR queries in batches
This commit is contained in:
parent
6e77f15116
commit
009b8c540f
@ -2218,6 +2218,45 @@ abstract class AbstractSqlEamDb implements EamDb {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Process a SELECT query
|
||||
*
|
||||
* @param selectClause query string to execute
|
||||
* @param instanceTableCallback callback to process the instance
|
||||
*
|
||||
* @throws EamDbException
|
||||
*/
|
||||
@Override
|
||||
public void processSelectClause(String selectClause, InstanceTableCallback instanceTableCallback) throws EamDbException {
|
||||
|
||||
if (instanceTableCallback == null) {
|
||||
throw new EamDbException("Callback interface is null");
|
||||
}
|
||||
|
||||
if (selectClause == null) {
|
||||
throw new EamDbException("Select clause is null");
|
||||
}
|
||||
|
||||
Connection conn = connect();
|
||||
PreparedStatement preparedStatement = null;
|
||||
ResultSet resultSet = null;
|
||||
StringBuilder sql = new StringBuilder(300);
|
||||
sql.append("select ")
|
||||
.append(selectClause);
|
||||
|
||||
try {
|
||||
preparedStatement = conn.prepareStatement(sql.toString());
|
||||
resultSet = preparedStatement.executeQuery();
|
||||
instanceTableCallback.process(resultSet);
|
||||
} catch (SQLException ex) {
|
||||
throw new EamDbException("Error running query", ex);
|
||||
} finally {
|
||||
EamDbUtil.closeStatement(preparedStatement);
|
||||
EamDbUtil.closeResultSet(resultSet);
|
||||
EamDbUtil.closeConnection(conn);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public EamOrganization newOrganization(EamOrganization eamOrg) throws EamDbException {
|
||||
if (eamOrg == null) {
|
||||
|
@ -780,4 +780,13 @@ public interface EamDb {
|
||||
*/
|
||||
void processInstanceTableWhere(CorrelationAttributeInstance.Type type, String whereClause, InstanceTableCallback instanceTableCallback) throws EamDbException;
|
||||
|
||||
/**
|
||||
* Process a SELECT query
|
||||
*
|
||||
* @param selectClause query string to execute
|
||||
* @param instanceTableCallback callback to process the instance
|
||||
*
|
||||
* @throws EamDbException
|
||||
*/
|
||||
public void processSelectClause(String selectClause, InstanceTableCallback instanceTableCallback) throws EamDbException;
|
||||
}
|
||||
|
@ -804,6 +804,24 @@ final class SqliteEamDb extends AbstractSqlEamDb {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Process a SELECT query
|
||||
*
|
||||
* @param selectClause query string to execute
|
||||
* @param instanceTableCallback callback to process the instance
|
||||
*
|
||||
* @throws EamDbException
|
||||
*/
|
||||
@Override
|
||||
public void processSelectClause(String selectClause, InstanceTableCallback instanceTableCallback) throws EamDbException {
|
||||
try {
|
||||
acquireSharedLock();
|
||||
super.processSelectClause(selectClause, instanceTableCallback);
|
||||
} finally {
|
||||
releaseSharedLock();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether a reference set with the given name/version is in the
|
||||
* central repo. Used to check for name collisions when creating reference
|
||||
|
@ -21,18 +21,31 @@ package org.sleuthkit.autopsy.filequery;
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.SQLException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.TreeMap;
|
||||
import java.util.logging.Level;
|
||||
import java.util.stream.Collectors;
|
||||
import org.openide.util.NbBundle;
|
||||
|
||||
import org.sleuthkit.autopsy.centralrepository.datamodel.CorrelationAttributeInstance;
|
||||
import org.sleuthkit.autopsy.centralrepository.datamodel.CorrelationAttributeNormalizationException;
|
||||
import org.sleuthkit.autopsy.centralrepository.datamodel.EamDb;
|
||||
import org.sleuthkit.autopsy.centralrepository.datamodel.EamDbException;
|
||||
import org.sleuthkit.autopsy.centralrepository.datamodel.EamDbUtil;
|
||||
import org.sleuthkit.autopsy.centralrepository.datamodel.InstanceTableCallback;
|
||||
import org.sleuthkit.autopsy.commonpropertiessearch.AbstractCommonAttributeInstance;
|
||||
import org.sleuthkit.autopsy.commonpropertiessearch.CentralRepoCommonAttributeInstance;
|
||||
import org.sleuthkit.autopsy.commonpropertiessearch.CommonAttributeValue;
|
||||
import org.sleuthkit.autopsy.commonpropertiessearch.CommonAttributeValueList;
|
||||
import org.sleuthkit.autopsy.coreutils.Logger;
|
||||
import org.sleuthkit.autopsy.filequery.FileSearchData.FileSize;
|
||||
import org.sleuthkit.autopsy.filequery.FileSearchData.FileType;
|
||||
@ -44,6 +57,7 @@ import org.sleuthkit.datamodel.BlackboardAttribute;
|
||||
import org.sleuthkit.datamodel.CaseDbAccessManager;
|
||||
import org.sleuthkit.datamodel.Content;
|
||||
import org.sleuthkit.datamodel.ContentTag;
|
||||
import org.sleuthkit.datamodel.HashUtility;
|
||||
import org.sleuthkit.datamodel.SleuthkitCase;
|
||||
import org.sleuthkit.datamodel.TskCoreException;
|
||||
|
||||
@ -773,6 +787,8 @@ class FileSearch {
|
||||
*/
|
||||
static class FrequencyAttribute extends AttributeType {
|
||||
|
||||
static final int BATCH_SIZE = 50; // Number of hashes to look up at one time
|
||||
|
||||
@Override
|
||||
GroupKey getGroupKey(ResultFile file) {
|
||||
return new FrequencyGroupKey(file);
|
||||
@ -786,21 +802,91 @@ class FileSearch {
|
||||
throw new FileSearchException("Central Repository is not enabled - can not add frequency data"); // NON-NLS
|
||||
}
|
||||
|
||||
// We'll make this more efficient later - for now, add the frequency of each file individually
|
||||
// Set frequency in batches
|
||||
Set<String> hashesToLookUp = new HashSet<>();
|
||||
List<ResultFile> currentFiles = new ArrayList<>();
|
||||
for (ResultFile file : files) {
|
||||
if (file.getFrequency() == Frequency.UNKNOWN) {
|
||||
if (file.getFrequency() == Frequency.UNKNOWN
|
||||
&& file.getAbstractFile().getMd5Hash() != null
|
||||
&& !file.getAbstractFile().getMd5Hash().isEmpty()) {
|
||||
hashesToLookUp.add(file.getAbstractFile().getMd5Hash());
|
||||
currentFiles.add(file);
|
||||
}
|
||||
|
||||
if (hashesToLookUp.size() >= BATCH_SIZE) {
|
||||
computeFrequency(hashesToLookUp, currentFiles, centralRepoDb);
|
||||
|
||||
hashesToLookUp.clear();
|
||||
currentFiles.clear();
|
||||
}
|
||||
}
|
||||
computeFrequency(hashesToLookUp, currentFiles, centralRepoDb);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes the CR frequency of all the given hashes and updates the list of files.
|
||||
*
|
||||
* @param hashesToLookUp Hashes to find the frequency of
|
||||
* @param currentFiles List of files to update with frequencies
|
||||
*/
|
||||
private static void computeFrequency(Set<String> hashesToLookUp, List<ResultFile> currentFiles, EamDb centralRepoDb) {
|
||||
|
||||
if (hashesToLookUp.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
String hashes = String.join("','", hashesToLookUp);
|
||||
hashes = "'" + hashes + "'";
|
||||
try {
|
||||
if (file.getAbstractFile().getMd5Hash() != null && !file.getAbstractFile().getMd5Hash().isEmpty()) {
|
||||
CorrelationAttributeInstance.Type attributeType = centralRepoDb.getCorrelationTypeById(CorrelationAttributeInstance.FILES_TYPE_ID);
|
||||
long count = centralRepoDb.getCountUniqueCaseDataSourceTuplesHavingTypeValue(attributeType, file.getAbstractFile().getMd5Hash());
|
||||
String tableName = EamDbUtil.correlationTypeToInstanceTableName(attributeType);
|
||||
|
||||
String selectClause = " value, COUNT(value) FROM "
|
||||
+ "(SELECT DISTINCT case_id, data_source_id, value FROM " + tableName
|
||||
+ " WHERE value IN ("
|
||||
+ hashes
|
||||
+ ")) AS foo GROUP BY value";
|
||||
|
||||
FrequencyCallback callback = new FrequencyCallback(currentFiles);
|
||||
centralRepoDb.processSelectClause(selectClause, callback);
|
||||
|
||||
} catch (EamDbException ex) {
|
||||
logger.log(Level.WARNING, "Error getting frequency counts from Central Repository", ex); // NON-NLS
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Callback to use with findInterCaseValuesByCount which generates a list of
|
||||
* values for common property search
|
||||
*/
|
||||
private static class FrequencyCallback implements InstanceTableCallback {
|
||||
|
||||
private final List<ResultFile> files;
|
||||
|
||||
private FrequencyCallback(List<ResultFile> files) {
|
||||
this.files = files;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void process(ResultSet resultSet) {
|
||||
try {
|
||||
|
||||
while (resultSet.next()) {
|
||||
String hash = resultSet.getString(1);
|
||||
int count = resultSet.getInt(2);
|
||||
for (Iterator<ResultFile> iterator = files.iterator(); iterator.hasNext();) {
|
||||
ResultFile file = iterator.next();
|
||||
if (file.getAbstractFile().getMd5Hash().equalsIgnoreCase(hash)) {
|
||||
file.setFrequency(Frequency.fromCount(count));
|
||||
}
|
||||
} catch (EamDbException | CorrelationAttributeNormalizationException ex) {
|
||||
throw new FileSearchException("Error looking up central repository frequency for file with ID "
|
||||
+ file.getAbstractFile().getId(), ex); // NON-NLS
|
||||
iterator.remove();
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (SQLException ex) {
|
||||
logger.log(Level.WARNING, "Error getting frequency counts from Central Repository", ex); // NON-NLS
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -506,22 +506,16 @@ class FileSearchFiltering {
|
||||
throw new FileSearchException("Can not run on empty list"); // NON-NLS
|
||||
}
|
||||
|
||||
// We can try to make this more efficient later - for now, check the frequency of each file individually
|
||||
// Set the frequency for each file
|
||||
FileSearch.FrequencyAttribute freqAttr = new FileSearch.FrequencyAttribute();
|
||||
freqAttr.addAttributeToResultFiles(currentResults, caseDb, centralRepoDb);
|
||||
|
||||
// If the frequency matches the filter, add the file to the results
|
||||
List<ResultFile> frequencyResults = new ArrayList<>();
|
||||
for (ResultFile file : currentResults) {
|
||||
try {
|
||||
if (file.getAbstractFile().getMd5Hash() != null && ! file.getAbstractFile().getMd5Hash().isEmpty()) {
|
||||
CorrelationAttributeInstance.Type attributeType = centralRepoDb.getCorrelationTypeById(CorrelationAttributeInstance.FILES_TYPE_ID);
|
||||
long count = centralRepoDb.getCountUniqueCaseDataSourceTuplesHavingTypeValue(attributeType, file.getAbstractFile().getMd5Hash());
|
||||
file.setFrequency(Frequency.fromCount(count));
|
||||
}
|
||||
|
||||
if (frequencies.contains(file.getFrequency())) {
|
||||
frequencyResults.add(file);
|
||||
}
|
||||
} catch (EamDbException | CorrelationAttributeNormalizationException ex) {
|
||||
throw new FileSearchException("Error querying central repository", ex); // NON-NLS
|
||||
}
|
||||
}
|
||||
return frequencyResults;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user