mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-12 07:56:16 +00:00
keyword search: fixes duplicates during ingest for Local Files - when local file set added, ensures to only search the currently ingested sources
This commit is contained in:
parent
2167f41bbe
commit
3415c49b04
@ -41,6 +41,7 @@ import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.common.util.ContentStream;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.openide.util.Exceptions;
|
||||
import org.sleuthkit.autopsy.casemodule.Case;
|
||||
import org.sleuthkit.autopsy.coreutils.Logger;
|
||||
import org.sleuthkit.autopsy.datamodel.ContentUtils;
|
||||
import org.sleuthkit.autopsy.keywordsearch.Server.SolrServerNoPortException;
|
||||
@ -52,10 +53,10 @@ import org.sleuthkit.datamodel.DerivedFile;
|
||||
import org.sleuthkit.datamodel.Directory;
|
||||
import org.sleuthkit.datamodel.File;
|
||||
import org.sleuthkit.datamodel.FsContent;
|
||||
import org.sleuthkit.datamodel.Image;
|
||||
import org.sleuthkit.datamodel.LayoutFile;
|
||||
import org.sleuthkit.datamodel.LocalFile;
|
||||
import org.sleuthkit.datamodel.ReadContentInputStream;
|
||||
import org.sleuthkit.datamodel.SleuthkitCase;
|
||||
import org.sleuthkit.datamodel.TskCoreException;
|
||||
|
||||
/**
|
||||
@ -184,6 +185,12 @@ public class Ingester {
|
||||
|
||||
private class GetContentFieldsV extends ContentVisitor.Default<Map<String, String>> {
|
||||
|
||||
private SleuthkitCase curCase = null;
|
||||
|
||||
GetContentFieldsV() {
|
||||
curCase = Case.getCurrentCase().getSleuthkitCase();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Map<String, String> defaultVisit(Content cntnt) {
|
||||
return new HashMap<String, String>();
|
||||
@ -217,11 +224,7 @@ public class Ingester {
|
||||
|
||||
@Override
|
||||
public Map<String, String> visit(LocalFile lf) {
|
||||
final Map<String, String> params = new HashMap<String, String>();
|
||||
params.put(Server.Schema.ID.toString(), Long.toString(lf.getId()));
|
||||
params.put(Server.Schema.FILE_NAME.toString(), lf.getName());
|
||||
params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(-1));
|
||||
return params;
|
||||
return getCommonFields(lf);
|
||||
}
|
||||
|
||||
private Map<String, String> getCommonFsContentFields(Map<String, String> params, FsContent fsContent) {
|
||||
@ -235,15 +238,13 @@ public class Ingester {
|
||||
private Map<String, String> getCommonFields(AbstractFile af) {
|
||||
Map<String, String> params = new HashMap<String, String>();
|
||||
params.put(Server.Schema.ID.toString(), Long.toString(af.getId()));
|
||||
long imageId = -1;
|
||||
long dataSourceId = -1;
|
||||
try {
|
||||
Image image = af.getImage();
|
||||
if (image != null) {
|
||||
imageId = image.getId();
|
||||
}
|
||||
params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(imageId));
|
||||
dataSourceId = curCase.getFileDataSource(af);
|
||||
params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(dataSourceId));
|
||||
} catch (TskCoreException ex) {
|
||||
logger.log(Level.SEVERE, "Could not get image id to properly index the file " + af.getId());
|
||||
logger.log(Level.SEVERE, "Could not get data source id to properly index the file " + af.getId());
|
||||
params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(-1));
|
||||
}
|
||||
|
||||
params.put(Server.Schema.FILE_NAME.toString(), af.getName());
|
||||
|
@ -18,6 +18,10 @@
|
||||
*/
|
||||
package org.sleuthkit.autopsy.keywordsearch;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
*
|
||||
* Filter to restrict query only specific files, chunks, images
|
||||
@ -27,23 +31,23 @@ public class KeywordQueryFilter {
|
||||
|
||||
public static enum FilterType {
|
||||
|
||||
FILE, CHUNK, IMAGE
|
||||
FILE, CHUNK, DATA_SOURCE
|
||||
};
|
||||
private long[] idFilters;
|
||||
private Set<Long>idFilters;
|
||||
private FilterType filterType;
|
||||
|
||||
public KeywordQueryFilter(FilterType filterType, long id) {
|
||||
this.filterType = filterType;
|
||||
this.idFilters = new long[1];
|
||||
this.idFilters[0] = id;
|
||||
this.idFilters = new HashSet<Long>();
|
||||
this.idFilters.add(id);
|
||||
}
|
||||
|
||||
public KeywordQueryFilter(FilterType filterType, long[] ids) {
|
||||
public KeywordQueryFilter(FilterType filterType, Set<Long>ids) {
|
||||
this.filterType = filterType;
|
||||
this.idFilters = ids;
|
||||
}
|
||||
|
||||
public long[] getIdFilters() {
|
||||
public Set<Long> getIdFilters() {
|
||||
return idFilters;
|
||||
}
|
||||
|
||||
@ -55,12 +59,14 @@ public class KeywordQueryFilter {
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
String id = null;
|
||||
for (int i = 0; i < idFilters.length; ++i) {
|
||||
|
||||
Iterator<Long>it = idFilters.iterator();
|
||||
for (int i = 0; it.hasNext(); ++i) {
|
||||
if (i > 0) {
|
||||
sb.append(" "); //OR
|
||||
}
|
||||
long idVal = idFilters[i];
|
||||
if (filterType == FilterType.IMAGE) {
|
||||
long idVal = it.next();
|
||||
if (filterType == FilterType.DATA_SOURCE) {
|
||||
id = Server.Schema.IMAGE_ID.toString();
|
||||
} else {
|
||||
id = Server.Schema.ID.toString();
|
||||
|
@ -44,6 +44,7 @@ import org.netbeans.api.progress.aggregate.AggregateProgressFactory;
|
||||
import org.netbeans.api.progress.aggregate.AggregateProgressHandle;
|
||||
import org.netbeans.api.progress.aggregate.ProgressContributor;
|
||||
import org.openide.util.Cancellable;
|
||||
import org.openide.util.Exceptions;
|
||||
import org.sleuthkit.autopsy.casemodule.Case;
|
||||
import org.sleuthkit.autopsy.coreutils.EscapeUtil;
|
||||
import org.sleuthkit.autopsy.coreutils.StopWatch;
|
||||
@ -60,6 +61,7 @@ import org.sleuthkit.datamodel.BlackboardArtifact;
|
||||
import org.sleuthkit.datamodel.BlackboardArtifact.ARTIFACT_TYPE;
|
||||
import org.sleuthkit.datamodel.BlackboardAttribute;
|
||||
import org.sleuthkit.datamodel.AbstractFile;
|
||||
import org.sleuthkit.datamodel.Content;
|
||||
import org.sleuthkit.datamodel.Image;
|
||||
import org.sleuthkit.datamodel.ReadContentInputStream;
|
||||
import org.sleuthkit.datamodel.SleuthkitCase;
|
||||
@ -115,7 +117,7 @@ public final class KeywordSearchIngestModule extends IngestModuleAbstractFile {
|
||||
private Map<Keyword, List<Long>> currentResults;
|
||||
//only search images from current ingest, not images previously ingested/indexed
|
||||
//accessed read-only by searcher thread
|
||||
private Set<Long> curImageIds;
|
||||
private Set<Long> curDataSourceIds;
|
||||
private static final ReentrantReadWriteLock rwLock = new ReentrantReadWriteLock(true); //use fairness policy
|
||||
private static final Lock searcherLock = rwLock.writeLock();
|
||||
private volatile int messageID = 0;
|
||||
@ -128,6 +130,7 @@ public final class KeywordSearchIngestModule extends IngestModuleAbstractFile {
|
||||
private boolean initialized = false;
|
||||
private KeywordSearchConfigurationPanel panel;
|
||||
private Tika tikaFormatDetector;
|
||||
|
||||
|
||||
private enum IngestStatus {
|
||||
|
||||
@ -160,12 +163,10 @@ public final class KeywordSearchIngestModule extends IngestModuleAbstractFile {
|
||||
return ProcessResult.OK;
|
||||
}
|
||||
try {
|
||||
//add image id of the file to the set, keeping track of images being ingested
|
||||
final Image fileImage = abstractFile.getImage();
|
||||
if (fileImage != null) {
|
||||
//not all Content objects have an image associated (e.g. LocalFiles)
|
||||
curImageIds.add(fileImage.getId());
|
||||
}
|
||||
//add data source id of the file to the set, keeping track of images being ingested
|
||||
final long fileSourceId = caseHandle.getFileDataSource(abstractFile);
|
||||
curDataSourceIds.add(fileSourceId);
|
||||
|
||||
} catch (TskCoreException ex) {
|
||||
logger.log(Level.SEVERE, "Error getting image id of file processed by keyword search: " + abstractFile.getName(), ex);
|
||||
}
|
||||
@ -288,7 +289,7 @@ public final class KeywordSearchIngestModule extends IngestModuleAbstractFile {
|
||||
private void cleanup() {
|
||||
ingestStatus.clear();
|
||||
currentResults.clear();
|
||||
curImageIds.clear();
|
||||
curDataSourceIds.clear();
|
||||
currentSearcher = null;
|
||||
//finalSearcher = null; //do not collect, might be finalizing
|
||||
|
||||
@ -399,7 +400,7 @@ public final class KeywordSearchIngestModule extends IngestModuleAbstractFile {
|
||||
//keeps track of all results per run not to repeat reporting the same hits
|
||||
currentResults = new HashMap<Keyword, List<Long>>();
|
||||
|
||||
curImageIds = new HashSet<Long>();
|
||||
curDataSourceIds = new HashSet<Long>();
|
||||
|
||||
indexer = new Indexer();
|
||||
|
||||
@ -930,15 +931,10 @@ public final class KeywordSearchIngestModule extends IngestModuleAbstractFile {
|
||||
del = new TermComponentQuery(keywordQuery);
|
||||
}
|
||||
|
||||
//limit search to currently ingested images
|
||||
final long imageIds[] = new long[curImageIds.size()];
|
||||
final Iterator<Long> it = curImageIds.iterator();
|
||||
for (int imageI = 0; it.hasNext(); ++imageI) {
|
||||
imageIds[imageI] = it.next();
|
||||
}
|
||||
//limit search to currently ingested data sources
|
||||
//set up a filter with 1 or more image ids OR'ed
|
||||
final KeywordQueryFilter imageFilter = new KeywordQueryFilter(KeywordQueryFilter.FilterType.IMAGE, imageIds);
|
||||
del.addFilter(imageFilter);
|
||||
final KeywordQueryFilter dataSourceFilter = new KeywordQueryFilter(KeywordQueryFilter.FilterType.DATA_SOURCE, curDataSourceIds);
|
||||
del.addFilter(dataSourceFilter);
|
||||
|
||||
Map<String, List<ContentHit>> queryResult = null;
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user