mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-12 07:56:16 +00:00
keyword search: fixes duplicates during ingest for Local Files - when local file set added, ensures to only search the currently ingested sources
This commit is contained in:
parent
2167f41bbe
commit
3415c49b04
@ -41,6 +41,7 @@ import org.apache.solr.common.SolrException.ErrorCode;
|
|||||||
import org.apache.solr.common.util.ContentStream;
|
import org.apache.solr.common.util.ContentStream;
|
||||||
import org.apache.solr.common.SolrInputDocument;
|
import org.apache.solr.common.SolrInputDocument;
|
||||||
import org.openide.util.Exceptions;
|
import org.openide.util.Exceptions;
|
||||||
|
import org.sleuthkit.autopsy.casemodule.Case;
|
||||||
import org.sleuthkit.autopsy.coreutils.Logger;
|
import org.sleuthkit.autopsy.coreutils.Logger;
|
||||||
import org.sleuthkit.autopsy.datamodel.ContentUtils;
|
import org.sleuthkit.autopsy.datamodel.ContentUtils;
|
||||||
import org.sleuthkit.autopsy.keywordsearch.Server.SolrServerNoPortException;
|
import org.sleuthkit.autopsy.keywordsearch.Server.SolrServerNoPortException;
|
||||||
@ -52,10 +53,10 @@ import org.sleuthkit.datamodel.DerivedFile;
|
|||||||
import org.sleuthkit.datamodel.Directory;
|
import org.sleuthkit.datamodel.Directory;
|
||||||
import org.sleuthkit.datamodel.File;
|
import org.sleuthkit.datamodel.File;
|
||||||
import org.sleuthkit.datamodel.FsContent;
|
import org.sleuthkit.datamodel.FsContent;
|
||||||
import org.sleuthkit.datamodel.Image;
|
|
||||||
import org.sleuthkit.datamodel.LayoutFile;
|
import org.sleuthkit.datamodel.LayoutFile;
|
||||||
import org.sleuthkit.datamodel.LocalFile;
|
import org.sleuthkit.datamodel.LocalFile;
|
||||||
import org.sleuthkit.datamodel.ReadContentInputStream;
|
import org.sleuthkit.datamodel.ReadContentInputStream;
|
||||||
|
import org.sleuthkit.datamodel.SleuthkitCase;
|
||||||
import org.sleuthkit.datamodel.TskCoreException;
|
import org.sleuthkit.datamodel.TskCoreException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -184,6 +185,12 @@ public class Ingester {
|
|||||||
|
|
||||||
private class GetContentFieldsV extends ContentVisitor.Default<Map<String, String>> {
|
private class GetContentFieldsV extends ContentVisitor.Default<Map<String, String>> {
|
||||||
|
|
||||||
|
private SleuthkitCase curCase = null;
|
||||||
|
|
||||||
|
GetContentFieldsV() {
|
||||||
|
curCase = Case.getCurrentCase().getSleuthkitCase();
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Map<String, String> defaultVisit(Content cntnt) {
|
protected Map<String, String> defaultVisit(Content cntnt) {
|
||||||
return new HashMap<String, String>();
|
return new HashMap<String, String>();
|
||||||
@ -217,11 +224,7 @@ public class Ingester {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Map<String, String> visit(LocalFile lf) {
|
public Map<String, String> visit(LocalFile lf) {
|
||||||
final Map<String, String> params = new HashMap<String, String>();
|
return getCommonFields(lf);
|
||||||
params.put(Server.Schema.ID.toString(), Long.toString(lf.getId()));
|
|
||||||
params.put(Server.Schema.FILE_NAME.toString(), lf.getName());
|
|
||||||
params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(-1));
|
|
||||||
return params;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private Map<String, String> getCommonFsContentFields(Map<String, String> params, FsContent fsContent) {
|
private Map<String, String> getCommonFsContentFields(Map<String, String> params, FsContent fsContent) {
|
||||||
@ -235,15 +238,13 @@ public class Ingester {
|
|||||||
private Map<String, String> getCommonFields(AbstractFile af) {
|
private Map<String, String> getCommonFields(AbstractFile af) {
|
||||||
Map<String, String> params = new HashMap<String, String>();
|
Map<String, String> params = new HashMap<String, String>();
|
||||||
params.put(Server.Schema.ID.toString(), Long.toString(af.getId()));
|
params.put(Server.Schema.ID.toString(), Long.toString(af.getId()));
|
||||||
long imageId = -1;
|
long dataSourceId = -1;
|
||||||
try {
|
try {
|
||||||
Image image = af.getImage();
|
dataSourceId = curCase.getFileDataSource(af);
|
||||||
if (image != null) {
|
params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(dataSourceId));
|
||||||
imageId = image.getId();
|
|
||||||
}
|
|
||||||
params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(imageId));
|
|
||||||
} catch (TskCoreException ex) {
|
} catch (TskCoreException ex) {
|
||||||
logger.log(Level.SEVERE, "Could not get image id to properly index the file " + af.getId());
|
logger.log(Level.SEVERE, "Could not get data source id to properly index the file " + af.getId());
|
||||||
|
params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(-1));
|
||||||
}
|
}
|
||||||
|
|
||||||
params.put(Server.Schema.FILE_NAME.toString(), af.getName());
|
params.put(Server.Schema.FILE_NAME.toString(), af.getName());
|
||||||
|
@ -18,6 +18,10 @@
|
|||||||
*/
|
*/
|
||||||
package org.sleuthkit.autopsy.keywordsearch;
|
package org.sleuthkit.autopsy.keywordsearch;
|
||||||
|
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
* Filter to restrict query only specific files, chunks, images
|
* Filter to restrict query only specific files, chunks, images
|
||||||
@ -27,23 +31,23 @@ public class KeywordQueryFilter {
|
|||||||
|
|
||||||
public static enum FilterType {
|
public static enum FilterType {
|
||||||
|
|
||||||
FILE, CHUNK, IMAGE
|
FILE, CHUNK, DATA_SOURCE
|
||||||
};
|
};
|
||||||
private long[] idFilters;
|
private Set<Long>idFilters;
|
||||||
private FilterType filterType;
|
private FilterType filterType;
|
||||||
|
|
||||||
public KeywordQueryFilter(FilterType filterType, long id) {
|
public KeywordQueryFilter(FilterType filterType, long id) {
|
||||||
this.filterType = filterType;
|
this.filterType = filterType;
|
||||||
this.idFilters = new long[1];
|
this.idFilters = new HashSet<Long>();
|
||||||
this.idFilters[0] = id;
|
this.idFilters.add(id);
|
||||||
}
|
}
|
||||||
|
|
||||||
public KeywordQueryFilter(FilterType filterType, long[] ids) {
|
public KeywordQueryFilter(FilterType filterType, Set<Long>ids) {
|
||||||
this.filterType = filterType;
|
this.filterType = filterType;
|
||||||
this.idFilters = ids;
|
this.idFilters = ids;
|
||||||
}
|
}
|
||||||
|
|
||||||
public long[] getIdFilters() {
|
public Set<Long> getIdFilters() {
|
||||||
return idFilters;
|
return idFilters;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -55,12 +59,14 @@ public class KeywordQueryFilter {
|
|||||||
public String toString() {
|
public String toString() {
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
String id = null;
|
String id = null;
|
||||||
for (int i = 0; i < idFilters.length; ++i) {
|
|
||||||
|
Iterator<Long>it = idFilters.iterator();
|
||||||
|
for (int i = 0; it.hasNext(); ++i) {
|
||||||
if (i > 0) {
|
if (i > 0) {
|
||||||
sb.append(" "); //OR
|
sb.append(" "); //OR
|
||||||
}
|
}
|
||||||
long idVal = idFilters[i];
|
long idVal = it.next();
|
||||||
if (filterType == FilterType.IMAGE) {
|
if (filterType == FilterType.DATA_SOURCE) {
|
||||||
id = Server.Schema.IMAGE_ID.toString();
|
id = Server.Schema.IMAGE_ID.toString();
|
||||||
} else {
|
} else {
|
||||||
id = Server.Schema.ID.toString();
|
id = Server.Schema.ID.toString();
|
||||||
|
@ -44,6 +44,7 @@ import org.netbeans.api.progress.aggregate.AggregateProgressFactory;
|
|||||||
import org.netbeans.api.progress.aggregate.AggregateProgressHandle;
|
import org.netbeans.api.progress.aggregate.AggregateProgressHandle;
|
||||||
import org.netbeans.api.progress.aggregate.ProgressContributor;
|
import org.netbeans.api.progress.aggregate.ProgressContributor;
|
||||||
import org.openide.util.Cancellable;
|
import org.openide.util.Cancellable;
|
||||||
|
import org.openide.util.Exceptions;
|
||||||
import org.sleuthkit.autopsy.casemodule.Case;
|
import org.sleuthkit.autopsy.casemodule.Case;
|
||||||
import org.sleuthkit.autopsy.coreutils.EscapeUtil;
|
import org.sleuthkit.autopsy.coreutils.EscapeUtil;
|
||||||
import org.sleuthkit.autopsy.coreutils.StopWatch;
|
import org.sleuthkit.autopsy.coreutils.StopWatch;
|
||||||
@ -60,6 +61,7 @@ import org.sleuthkit.datamodel.BlackboardArtifact;
|
|||||||
import org.sleuthkit.datamodel.BlackboardArtifact.ARTIFACT_TYPE;
|
import org.sleuthkit.datamodel.BlackboardArtifact.ARTIFACT_TYPE;
|
||||||
import org.sleuthkit.datamodel.BlackboardAttribute;
|
import org.sleuthkit.datamodel.BlackboardAttribute;
|
||||||
import org.sleuthkit.datamodel.AbstractFile;
|
import org.sleuthkit.datamodel.AbstractFile;
|
||||||
|
import org.sleuthkit.datamodel.Content;
|
||||||
import org.sleuthkit.datamodel.Image;
|
import org.sleuthkit.datamodel.Image;
|
||||||
import org.sleuthkit.datamodel.ReadContentInputStream;
|
import org.sleuthkit.datamodel.ReadContentInputStream;
|
||||||
import org.sleuthkit.datamodel.SleuthkitCase;
|
import org.sleuthkit.datamodel.SleuthkitCase;
|
||||||
@ -115,7 +117,7 @@ public final class KeywordSearchIngestModule extends IngestModuleAbstractFile {
|
|||||||
private Map<Keyword, List<Long>> currentResults;
|
private Map<Keyword, List<Long>> currentResults;
|
||||||
//only search images from current ingest, not images previously ingested/indexed
|
//only search images from current ingest, not images previously ingested/indexed
|
||||||
//accessed read-only by searcher thread
|
//accessed read-only by searcher thread
|
||||||
private Set<Long> curImageIds;
|
private Set<Long> curDataSourceIds;
|
||||||
private static final ReentrantReadWriteLock rwLock = new ReentrantReadWriteLock(true); //use fairness policy
|
private static final ReentrantReadWriteLock rwLock = new ReentrantReadWriteLock(true); //use fairness policy
|
||||||
private static final Lock searcherLock = rwLock.writeLock();
|
private static final Lock searcherLock = rwLock.writeLock();
|
||||||
private volatile int messageID = 0;
|
private volatile int messageID = 0;
|
||||||
@ -128,6 +130,7 @@ public final class KeywordSearchIngestModule extends IngestModuleAbstractFile {
|
|||||||
private boolean initialized = false;
|
private boolean initialized = false;
|
||||||
private KeywordSearchConfigurationPanel panel;
|
private KeywordSearchConfigurationPanel panel;
|
||||||
private Tika tikaFormatDetector;
|
private Tika tikaFormatDetector;
|
||||||
|
|
||||||
|
|
||||||
private enum IngestStatus {
|
private enum IngestStatus {
|
||||||
|
|
||||||
@ -160,12 +163,10 @@ public final class KeywordSearchIngestModule extends IngestModuleAbstractFile {
|
|||||||
return ProcessResult.OK;
|
return ProcessResult.OK;
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
//add image id of the file to the set, keeping track of images being ingested
|
//add data source id of the file to the set, keeping track of images being ingested
|
||||||
final Image fileImage = abstractFile.getImage();
|
final long fileSourceId = caseHandle.getFileDataSource(abstractFile);
|
||||||
if (fileImage != null) {
|
curDataSourceIds.add(fileSourceId);
|
||||||
//not all Content objects have an image associated (e.g. LocalFiles)
|
|
||||||
curImageIds.add(fileImage.getId());
|
|
||||||
}
|
|
||||||
} catch (TskCoreException ex) {
|
} catch (TskCoreException ex) {
|
||||||
logger.log(Level.SEVERE, "Error getting image id of file processed by keyword search: " + abstractFile.getName(), ex);
|
logger.log(Level.SEVERE, "Error getting image id of file processed by keyword search: " + abstractFile.getName(), ex);
|
||||||
}
|
}
|
||||||
@ -288,7 +289,7 @@ public final class KeywordSearchIngestModule extends IngestModuleAbstractFile {
|
|||||||
private void cleanup() {
|
private void cleanup() {
|
||||||
ingestStatus.clear();
|
ingestStatus.clear();
|
||||||
currentResults.clear();
|
currentResults.clear();
|
||||||
curImageIds.clear();
|
curDataSourceIds.clear();
|
||||||
currentSearcher = null;
|
currentSearcher = null;
|
||||||
//finalSearcher = null; //do not collect, might be finalizing
|
//finalSearcher = null; //do not collect, might be finalizing
|
||||||
|
|
||||||
@ -399,7 +400,7 @@ public final class KeywordSearchIngestModule extends IngestModuleAbstractFile {
|
|||||||
//keeps track of all results per run not to repeat reporting the same hits
|
//keeps track of all results per run not to repeat reporting the same hits
|
||||||
currentResults = new HashMap<Keyword, List<Long>>();
|
currentResults = new HashMap<Keyword, List<Long>>();
|
||||||
|
|
||||||
curImageIds = new HashSet<Long>();
|
curDataSourceIds = new HashSet<Long>();
|
||||||
|
|
||||||
indexer = new Indexer();
|
indexer = new Indexer();
|
||||||
|
|
||||||
@ -930,15 +931,10 @@ public final class KeywordSearchIngestModule extends IngestModuleAbstractFile {
|
|||||||
del = new TermComponentQuery(keywordQuery);
|
del = new TermComponentQuery(keywordQuery);
|
||||||
}
|
}
|
||||||
|
|
||||||
//limit search to currently ingested images
|
//limit search to currently ingested data sources
|
||||||
final long imageIds[] = new long[curImageIds.size()];
|
|
||||||
final Iterator<Long> it = curImageIds.iterator();
|
|
||||||
for (int imageI = 0; it.hasNext(); ++imageI) {
|
|
||||||
imageIds[imageI] = it.next();
|
|
||||||
}
|
|
||||||
//set up a filter with 1 or more image ids OR'ed
|
//set up a filter with 1 or more image ids OR'ed
|
||||||
final KeywordQueryFilter imageFilter = new KeywordQueryFilter(KeywordQueryFilter.FilterType.IMAGE, imageIds);
|
final KeywordQueryFilter dataSourceFilter = new KeywordQueryFilter(KeywordQueryFilter.FilterType.DATA_SOURCE, curDataSourceIds);
|
||||||
del.addFilter(imageFilter);
|
del.addFilter(dataSourceFilter);
|
||||||
|
|
||||||
Map<String, List<ContentHit>> queryResult = null;
|
Map<String, List<ContentHit>> queryResult = null;
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user