mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-17 10:17:41 +00:00
more cleanup
more cleanup
This commit is contained in:
parent
b904c37dd2
commit
c42f687bfb
@ -21,7 +21,6 @@ package org.sleuthkit.autopsy.keywordsearch;
|
|||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.io.InputStreamReader;
|
import java.io.InputStreamReader;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.util.logging.Level;
|
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.openide.util.Exceptions;
|
import org.openide.util.Exceptions;
|
||||||
import org.sleuthkit.autopsy.casemodule.Case;
|
import org.sleuthkit.autopsy.casemodule.Case;
|
||||||
@ -34,8 +33,8 @@ import org.sleuthkit.datamodel.Content;
|
|||||||
import org.sleuthkit.datamodel.SleuthkitCase;
|
import org.sleuthkit.datamodel.SleuthkitCase;
|
||||||
import org.sleuthkit.datamodel.TskCoreException;
|
import org.sleuthkit.datamodel.TskCoreException;
|
||||||
|
|
||||||
public class ArtifactExtractor extends TextExtractor<Void, BlackboardArtifact> {
|
public class ArtifactTextExtractor extends TextExtractor<Void, BlackboardArtifact> {
|
||||||
static final private Logger logger = Logger.getLogger(ArtifactExtractor.class.getName());
|
static final private Logger logger = Logger.getLogger(ArtifactTextExtractor.class.getName());
|
||||||
|
|
||||||
static Content getDataSource(BlackboardArtifact artifact) throws TskCoreException {
|
static Content getDataSource(BlackboardArtifact artifact) throws TskCoreException {
|
||||||
Content dataSource;
|
Content dataSource;
|
||||||
@ -71,10 +70,6 @@ public class ArtifactExtractor extends TextExtractor<Void, BlackboardArtifact> {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
void logWarning(String msg, Exception ex) {
|
|
||||||
logger.log(Level.WARNING, msg, ex); //NON-NLS }
|
|
||||||
}
|
|
||||||
@Override
|
@Override
|
||||||
Void newAppendixProvider() {
|
Void newAppendixProvider() {
|
||||||
return null;
|
return null;
|
@ -33,7 +33,6 @@ import org.sleuthkit.autopsy.datamodel.ContentUtils;
|
|||||||
import org.sleuthkit.autopsy.ingest.IngestJobContext;
|
import org.sleuthkit.autopsy.ingest.IngestJobContext;
|
||||||
import org.sleuthkit.datamodel.AbstractFile;
|
import org.sleuthkit.datamodel.AbstractFile;
|
||||||
import org.sleuthkit.datamodel.BlackboardArtifact;
|
import org.sleuthkit.datamodel.BlackboardArtifact;
|
||||||
import org.sleuthkit.datamodel.Content;
|
|
||||||
import org.sleuthkit.datamodel.DerivedFile;
|
import org.sleuthkit.datamodel.DerivedFile;
|
||||||
import org.sleuthkit.datamodel.Directory;
|
import org.sleuthkit.datamodel.Directory;
|
||||||
import org.sleuthkit.datamodel.File;
|
import org.sleuthkit.datamodel.File;
|
||||||
@ -47,16 +46,17 @@ import org.sleuthkit.datamodel.TskCoreException;
|
|||||||
/**
|
/**
|
||||||
* Handles indexing files on a Solr core.
|
* Handles indexing files on a Solr core.
|
||||||
*/
|
*/
|
||||||
|
//JMTODO: Should this class really be a singleton?
|
||||||
class Ingester {
|
class Ingester {
|
||||||
|
|
||||||
private static final Logger logger = Logger.getLogger(Ingester.class.getName());
|
private static final Logger logger = Logger.getLogger(Ingester.class.getName());
|
||||||
private volatile boolean uncommitedIngests = false;
|
private volatile boolean uncommitedIngests = false;
|
||||||
private final Server solrServer = KeywordSearch.getServer();
|
private final Server solrServer = KeywordSearch.getServer();
|
||||||
private static final GetContentFieldsV getContentFieldsV = new GetContentFieldsV();
|
private static final SolrFieldsVisitor SOLR_FIELDS_VISITOR = new SolrFieldsVisitor();
|
||||||
private static Ingester instance;
|
private static Ingester instance;
|
||||||
|
|
||||||
//for ingesting chunk as SolrInputDocument (non-content-streaming, by-pass tika)
|
private static final int MAX_EXTR_TEXT_CHARS = 512 * 1024; //chars
|
||||||
//TODO use a streaming way to add content to /update handler
|
private static final int SINGLE_READ_CHARS = 1024;
|
||||||
private static final int MAX_DOC_CHUNK_SIZE = 1024 * 1024;
|
private static final int MAX_DOC_CHUNK_SIZE = 1024 * 1024;
|
||||||
|
|
||||||
private Ingester() {
|
private Ingester() {
|
||||||
@ -69,6 +69,7 @@ class Ingester {
|
|||||||
return instance;
|
return instance;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//JMTODO: this is probably useless
|
||||||
@Override
|
@Override
|
||||||
@SuppressWarnings("FinalizeDeclaration")
|
@SuppressWarnings("FinalizeDeclaration")
|
||||||
protected void finalize() throws Throwable {
|
protected void finalize() throws Throwable {
|
||||||
@ -81,14 +82,11 @@ class Ingester {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sends a file to Solr to have its content extracted and added to the
|
* Sends the metadata (name, MAC times, image id, etc) for the given file to
|
||||||
* index. commit() should be called once you're done ingesting files. If the
|
* Solr to be added to the index. commit() should be called once you're done
|
||||||
* file is a directory or ingestContent is set to false, the file name is
|
* indexing.
|
||||||
* indexed only.
|
|
||||||
*
|
*
|
||||||
* @param file File to ingest
|
* @param file File to index.
|
||||||
* @param ingestContent if true, index the file and the content, otherwise
|
|
||||||
* index metadata only
|
|
||||||
*
|
*
|
||||||
* @throws IngesterException if there was an error processing a specific
|
* @throws IngesterException if there was an error processing a specific
|
||||||
* file, but the Solr server is probably fine.
|
* file, but the Solr server is probably fine.
|
||||||
@ -97,25 +95,35 @@ class Ingester {
|
|||||||
indexChunk(null, file.getName(), getContentFields(file), 0);
|
indexChunk(null, file.getName(), getContentFields(file), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sends the metadata (artifact id, image id, etc) for the given artifact to
|
||||||
|
* Solr to be added to the index. commit() should be called once you're done
|
||||||
|
* indexing.
|
||||||
|
*
|
||||||
|
* @param artifact The artifact to index.
|
||||||
|
*
|
||||||
|
* @throws IngesterException if there was an error processing a specific
|
||||||
|
* artifact, but the Solr server is probably fine.
|
||||||
|
*/
|
||||||
void indexMetaDataOnly(BlackboardArtifact artifact) throws IngesterException {
|
void indexMetaDataOnly(BlackboardArtifact artifact) throws IngesterException {
|
||||||
indexChunk(null, artifact.getDisplayName() + "_" + artifact.getArtifactID(), getContentFields(artifact), 0);
|
indexChunk(null, new ArtifactTextExtractor().getName(artifact), getContentFields(artifact), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a field map from FsContent, that is later sent to Solr
|
* Creates a field map from FsContent, that is later sent to Solr
|
||||||
*
|
*
|
||||||
* @param fsc FsContent to get fields from
|
* @param item SleuthkitVisitableItem to get fields from
|
||||||
*
|
*
|
||||||
* @return the map
|
* @return the map
|
||||||
*/
|
*/
|
||||||
Map<String, String> getContentFields(SleuthkitVisitableItem fsc) {
|
Map<String, String> getContentFields(SleuthkitVisitableItem item) {
|
||||||
return fsc.accept(getContentFieldsV);
|
return item.accept(SOLR_FIELDS_VISITOR);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Visitor used to create param list to send to SOLR index.
|
* Visitor used to create fields to send to SOLR index.
|
||||||
*/
|
*/
|
||||||
static private class GetContentFieldsV extends SleuthkitItemVisitor.Default<Map<String, String>> {
|
static private class SolrFieldsVisitor extends SleuthkitItemVisitor.Default<Map<String, String>> {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Map<String, String> defaultVisit(SleuthkitVisitableItem svi) {
|
protected Map<String, String> defaultVisit(SleuthkitVisitableItem svi) {
|
||||||
@ -124,17 +132,17 @@ class Ingester {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Map<String, String> visit(File f) {
|
public Map<String, String> visit(File f) {
|
||||||
return getCommonFileContentFields(f);
|
return getFileFields(f);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Map<String, String> visit(DerivedFile df) {
|
public Map<String, String> visit(DerivedFile df) {
|
||||||
return getCommonFileContentFields(df);
|
return getFileFields(df);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Map<String, String> visit(Directory d) {
|
public Map<String, String> visit(Directory d) {
|
||||||
return getCommonFileContentFields(d);
|
return getFileFields(d);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -145,15 +153,15 @@ class Ingester {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Map<String, String> visit(LocalFile lf) {
|
public Map<String, String> visit(LocalFile lf) {
|
||||||
return getCommonFileContentFields(lf);
|
return getFileFields(lf);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Map<String, String> visit(SlackFile f) {
|
public Map<String, String> visit(SlackFile f) {
|
||||||
return getCommonFileContentFields(f);
|
return getFileFields(f);
|
||||||
}
|
}
|
||||||
|
|
||||||
private Map<String, String> getCommonFileContentFields(AbstractFile file) {
|
private Map<String, String> getFileFields(AbstractFile file) {
|
||||||
Map<String, String> params = getCommonFields(file);
|
Map<String, String> params = getCommonFields(file);
|
||||||
params.put(Server.Schema.CTIME.toString(), ContentUtils.getStringTimeISO8601(file.getCtime(), file));
|
params.put(Server.Schema.CTIME.toString(), ContentUtils.getStringTimeISO8601(file.getCtime(), file));
|
||||||
params.put(Server.Schema.ATIME.toString(), ContentUtils.getStringTimeISO8601(file.getAtime(), file));
|
params.put(Server.Schema.ATIME.toString(), ContentUtils.getStringTimeISO8601(file.getAtime(), file));
|
||||||
@ -166,10 +174,9 @@ class Ingester {
|
|||||||
Map<String, String> params = new HashMap<>();
|
Map<String, String> params = new HashMap<>();
|
||||||
params.put(Server.Schema.ID.toString(), Long.toString(af.getId()));
|
params.put(Server.Schema.ID.toString(), Long.toString(af.getId()));
|
||||||
try {
|
try {
|
||||||
long dataSourceId = af.getDataSource().getId();
|
params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(af.getDataSource().getId()));
|
||||||
params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(dataSourceId));
|
|
||||||
} catch (TskCoreException ex) {
|
} catch (TskCoreException ex) {
|
||||||
logger.log(Level.SEVERE, "Could not get data source id to properly index the file {0}", af.getId()); //NON-NLS
|
logger.log(Level.SEVERE, "Could not get data source id to properly index the file " + af.getId(), ex); //NON-NLS
|
||||||
params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(-1));
|
params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(-1));
|
||||||
}
|
}
|
||||||
params.put(Server.Schema.FILE_NAME.toString(), af.getName());
|
params.put(Server.Schema.FILE_NAME.toString(), af.getName());
|
||||||
@ -181,29 +188,26 @@ class Ingester {
|
|||||||
Map<String, String> params = new HashMap<>();
|
Map<String, String> params = new HashMap<>();
|
||||||
params.put(Server.Schema.ID.toString(), Long.toString(artifact.getArtifactID()));
|
params.put(Server.Schema.ID.toString(), Long.toString(artifact.getArtifactID()));
|
||||||
try {
|
try {
|
||||||
Content dataSource = ArtifactExtractor.getDataSource(artifact);
|
params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(ArtifactTextExtractor.getDataSource(artifact).getId()));
|
||||||
params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(dataSource.getId()));
|
|
||||||
} catch (TskCoreException ex) {
|
} catch (TskCoreException ex) {
|
||||||
logger.log(Level.SEVERE, "Could not get data source id to properly index the artifact {0}", artifact.getArtifactID()); //NON-NLS
|
logger.log(Level.SEVERE, "Could not get data source id to properly index the artifact " + artifact.getArtifactID(), ex); //NON-NLS
|
||||||
params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(-1));
|
params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(-1));
|
||||||
}
|
}
|
||||||
|
|
||||||
return params;
|
return params;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final int MAX_EXTR_TEXT_CHARS = 512 * 1024; //chars
|
|
||||||
private static final int SINGLE_READ_CHARS = 1024;
|
|
||||||
private static final int EXTRA_CHARS = 128; //for whitespace
|
|
||||||
|
|
||||||
public <A, T extends SleuthkitVisitableItem> boolean indexText(TextExtractor<A, T> extractor, T source, IngestJobContext context) throws Ingester.IngesterException {
|
public <A, T extends SleuthkitVisitableItem> boolean indexText(TextExtractor<A, T> extractor, T source, IngestJobContext context) throws Ingester.IngesterException {
|
||||||
|
final long sourceID = extractor.getID(source);
|
||||||
|
final String sourceName = extractor.getName(source);
|
||||||
|
|
||||||
int numChunks = 0; //unknown until chunking is done
|
int numChunks = 0; //unknown until chunking is done
|
||||||
|
|
||||||
if (extractor.noExtractionOptionsAreEnabled()) {
|
if (extractor.noExtractionOptionsAreEnabled()) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
final long sourceID = extractor.getID(source);
|
|
||||||
final String sourceName = extractor.getName(source);
|
|
||||||
Map<String, String> fields = getContentFields(source);
|
Map<String, String> fields = getContentFields(source);
|
||||||
|
|
||||||
A appendix = extractor.newAppendixProvider();
|
A appendix = extractor.newAppendixProvider();
|
||||||
@ -212,64 +216,64 @@ class Ingester {
|
|||||||
|
|
||||||
//we read max 1024 chars at time, this seems to max what this Reader would return
|
//we read max 1024 chars at time, this seems to max what this Reader would return
|
||||||
char[] textChunkBuf = new char[MAX_EXTR_TEXT_CHARS];
|
char[] textChunkBuf = new char[MAX_EXTR_TEXT_CHARS];
|
||||||
long readSize;
|
|
||||||
boolean eof = false;
|
boolean eof = false;
|
||||||
while (!eof) {
|
while (!eof) {
|
||||||
int totalRead = 0;
|
int chunkSizeInChars = 0;
|
||||||
if (context != null && context.fileIngestIsCancelled()) {
|
if (context != null && context.fileIngestIsCancelled()) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
if ((readSize = reader.read(textChunkBuf, 0, SINGLE_READ_CHARS)) == -1) {
|
long charsRead = 0;
|
||||||
eof = true;
|
//consume bytes to fill entire chunk (but leave EXTRA_CHARS to end the word)
|
||||||
} else {
|
while ((chunkSizeInChars < MAX_EXTR_TEXT_CHARS - SINGLE_READ_CHARS - EXTRA_CHARS)
|
||||||
totalRead += readSize;
|
&& (charsRead = reader.read(textChunkBuf, chunkSizeInChars, SINGLE_READ_CHARS)) != -1) {
|
||||||
|
chunkSizeInChars += charsRead;
|
||||||
}
|
}
|
||||||
|
|
||||||
//consume more bytes to fill entire chunk (leave EXTRA_CHARS to end the word)
|
if (charsRead == -1) {
|
||||||
while ((totalRead < MAX_EXTR_TEXT_CHARS - SINGLE_READ_CHARS - EXTRA_CHARS)
|
|
||||||
&& (readSize = reader.read(textChunkBuf, totalRead, SINGLE_READ_CHARS)) != -1) {
|
|
||||||
totalRead += readSize;
|
|
||||||
}
|
|
||||||
if (readSize == -1) {
|
|
||||||
//this is the last chunk
|
//this is the last chunk
|
||||||
eof = true;
|
eof = true;
|
||||||
} else {
|
} else {
|
||||||
|
chunkSizeInChars += charsRead;
|
||||||
|
|
||||||
//try to read char-by-char until whitespace to not break words
|
//try to read char-by-char until whitespace to not break words
|
||||||
while ((totalRead < MAX_EXTR_TEXT_CHARS - 1)
|
while ((chunkSizeInChars < MAX_EXTR_TEXT_CHARS - 1)
|
||||||
&& !Character.isWhitespace(textChunkBuf[totalRead - 1])
|
&& (Character.isWhitespace(textChunkBuf[chunkSizeInChars - 1]) == false)
|
||||||
&& (readSize = reader.read(textChunkBuf, totalRead, 1)) != -1) {
|
&& (charsRead = reader.read(textChunkBuf, chunkSizeInChars, 1)) != -1) {
|
||||||
totalRead += readSize;
|
chunkSizeInChars += charsRead;
|
||||||
}
|
}
|
||||||
if (readSize == -1) {
|
if (charsRead == -1) {
|
||||||
//this is the last chunk
|
//this is the last chunk
|
||||||
eof = true;
|
eof = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
StringBuilder sb = new StringBuilder(totalRead + 1000)
|
StringBuilder sb;
|
||||||
.append(textChunkBuf, 0, totalRead);
|
|
||||||
|
|
||||||
if (eof) {
|
if (eof) {
|
||||||
|
//1000 char buffer is to allow for appendix data with out needing to resize the string builder.
|
||||||
|
sb = new StringBuilder(chunkSizeInChars + 1000)
|
||||||
|
.append(textChunkBuf, 0, chunkSizeInChars);
|
||||||
extractor.appendDataToFinalChunk(sb, appendix);
|
extractor.appendDataToFinalChunk(sb, appendix);
|
||||||
|
} else {
|
||||||
|
sb = new StringBuilder(chunkSizeInChars)
|
||||||
|
.append(textChunkBuf, 0, chunkSizeInChars);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
sanitizeToUTF8(sb);
|
sanitizeToUTF8(sb);
|
||||||
|
|
||||||
final String chunkString = sb.toString();
|
|
||||||
String chunkId = Server.getChunkIdString(sourceID, numChunks + 1);
|
String chunkId = Server.getChunkIdString(sourceID, numChunks + 1);
|
||||||
fields.put(Server.Schema.ID.toString(), chunkId);
|
fields.put(Server.Schema.ID.toString(), chunkId);
|
||||||
try {
|
try {
|
||||||
try {
|
indexChunk(sb.toString(), sourceName, fields, sb.length());
|
||||||
indexChunk(chunkString, sourceName, fields, chunkString.length());
|
|
||||||
} catch (Exception ex) {
|
|
||||||
throw new IngesterException(String.format("Error ingesting (indexing) file chunk: %s", chunkId), ex);
|
|
||||||
}
|
|
||||||
numChunks++;
|
numChunks++;
|
||||||
} catch (Ingester.IngesterException ingEx) {
|
} catch (Ingester.IngesterException ingEx) {
|
||||||
extractor.logWarning("Ingester had a problem with extracted string from file '" //NON-NLS
|
extractor.logWarning("Ingester had a problem with extracted string from file '" //NON-NLS
|
||||||
+ sourceName + "' (id: " + sourceID + ").", ingEx);//NON-NLS
|
+ sourceName + "' (id: " + sourceID + ").", ingEx);//NON-NLS
|
||||||
|
|
||||||
throw ingEx; //need to rethrow to signal error and move on
|
throw ingEx; //need to rethrow to signal error and move on
|
||||||
|
} catch (Exception ex) {
|
||||||
|
throw new IngesterException(String.format("Error ingesting (indexing) file chunk: %s", chunkId), ex);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (IOException ex) {
|
} catch (IOException ex) {
|
||||||
@ -325,15 +329,18 @@ class Ingester {
|
|||||||
*/
|
*/
|
||||||
void indexChunk(String chunk, String sourceName, Map<String, String> fields, int size) throws IngesterException {
|
void indexChunk(String chunk, String sourceName, Map<String, String> fields, int size) throws IngesterException {
|
||||||
if (fields.get(Server.Schema.IMAGE_ID.toString()) == null) {
|
if (fields.get(Server.Schema.IMAGE_ID.toString()) == null) {
|
||||||
|
//JMTODO: actually if the we couldn't get the image id it is set to -1,
|
||||||
|
// but does this really mean we don't want to index it?
|
||||||
|
|
||||||
//skip the file, image id unknown
|
//skip the file, image id unknown
|
||||||
|
//JMTODO: does this need to ne internationalized?
|
||||||
String msg = NbBundle.getMessage(Ingester.class,
|
String msg = NbBundle.getMessage(Ingester.class,
|
||||||
"Ingester.ingest.exception.unknownImgId.msg", sourceName);
|
"Ingester.ingest.exception.unknownImgId.msg", sourceName); //JMTODO: does this need to ne internationalized?
|
||||||
logger.log(Level.SEVERE, msg);
|
logger.log(Level.SEVERE, msg);
|
||||||
throw new IngesterException(msg);
|
throw new IngesterException(msg);
|
||||||
}
|
}
|
||||||
|
|
||||||
SolrInputDocument updateDoc = new SolrInputDocument();
|
SolrInputDocument updateDoc = new SolrInputDocument();
|
||||||
|
|
||||||
for (String key : fields.keySet()) {
|
for (String key : fields.keySet()) {
|
||||||
updateDoc.addField(key, fields.get(key));
|
updateDoc.addField(key, fields.get(key));
|
||||||
}
|
}
|
||||||
@ -343,38 +350,16 @@ class Ingester {
|
|||||||
updateDoc.addField(Server.Schema.CONTENT.toString(), (size > 0) ? chunk : "");
|
updateDoc.addField(Server.Schema.CONTENT.toString(), (size > 0) ? chunk : "");
|
||||||
|
|
||||||
try {
|
try {
|
||||||
//TODO consider timeout thread, or vary socket timeout based on size of indexed content
|
//TODO: consider timeout thread, or vary socket timeout based on size of indexed content
|
||||||
solrServer.addDocument(updateDoc);
|
solrServer.addDocument(updateDoc);
|
||||||
uncommitedIngests = true;
|
uncommitedIngests = true;
|
||||||
} catch (KeywordSearchModuleException ex) {
|
} catch (KeywordSearchModuleException ex) {
|
||||||
|
//JMTODO: does this need to ne internationalized?
|
||||||
throw new IngesterException(
|
throw new IngesterException(
|
||||||
NbBundle.getMessage(Ingester.class, "Ingester.ingest.exception.err.msg", sourceName), ex);
|
NbBundle.getMessage(Ingester.class, "Ingester.ingest.exception.err.msg", sourceName), ex);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* return timeout that should be used to index the content
|
|
||||||
*
|
|
||||||
* @param size size of the content
|
|
||||||
*
|
|
||||||
* @return time in seconds to use a timeout
|
|
||||||
*/
|
|
||||||
static int getTimeout(long size) {
|
|
||||||
if (size < 1024 * 1024L) //1MB
|
|
||||||
{
|
|
||||||
return 60;
|
|
||||||
} else if (size < 10 * 1024 * 1024L) //10MB
|
|
||||||
{
|
|
||||||
return 1200;
|
|
||||||
} else if (size < 100 * 1024 * 1024L) //100MB
|
|
||||||
{
|
|
||||||
return 3600;
|
|
||||||
} else {
|
|
||||||
return 3 * 3600;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tells Solr to commit (necessary before ingested files will appear in
|
* Tells Solr to commit (necessary before ingested files will appear in
|
||||||
* searches)
|
* searches)
|
||||||
|
@ -41,7 +41,7 @@ public class SolrSearchService implements KeywordSearchService {
|
|||||||
private static final String SERVER_REFUSED_CONNECTION = "server refused connection"; //NON-NLS
|
private static final String SERVER_REFUSED_CONNECTION = "server refused connection"; //NON-NLS
|
||||||
private static final int IS_REACHABLE_TIMEOUT_MS = 1000;
|
private static final int IS_REACHABLE_TIMEOUT_MS = 1000;
|
||||||
|
|
||||||
ArtifactExtractor extractor = new ArtifactExtractor();
|
ArtifactTextExtractor extractor = new ArtifactTextExtractor();
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void indexArtifact(BlackboardArtifact artifact) throws TskCoreException {
|
public void indexArtifact(BlackboardArtifact artifact) throws TskCoreException {
|
||||||
@ -54,17 +54,15 @@ public class SolrSearchService implements KeywordSearchService {
|
|||||||
if (artifact.getArtifactID() > 0) {
|
if (artifact.getArtifactID() > 0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
final Ingester ingester = Ingester.getDefault();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
Ingester.getDefault().indexMetaDataOnly(artifact);
|
ingester.indexMetaDataOnly(artifact);
|
||||||
} catch (Ingester.IngesterException ex) {
|
ingester.indexText(extractor, artifact, null);
|
||||||
throw new TskCoreException(ex.getCause().getMessage(), ex);
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
Ingester.getDefault().indexText(extractor, artifact, null);
|
|
||||||
} catch (Ingester.IngesterException ex) {
|
} catch (Ingester.IngesterException ex) {
|
||||||
throw new TskCoreException(ex.getCause().getMessage(), ex);
|
throw new TskCoreException(ex.getCause().getMessage(), ex);
|
||||||
|
} finally {
|
||||||
|
ingester.commit();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -74,6 +74,7 @@ class TikaTextExtractor extends FileTextExtractor<Metadata> {
|
|||||||
public void appendDataToFinalChunk(StringBuilder sb, Metadata meta) {
|
public void appendDataToFinalChunk(StringBuilder sb, Metadata meta) {
|
||||||
|
|
||||||
//TODO: How do we account for this in chunking algorithm...
|
//TODO: How do we account for this in chunking algorithm...
|
||||||
|
//JM: what if we always append it as a separate chunk?
|
||||||
sb.append("\n\n------------------------------METADATA------------------------------\n\n"); //NON-NLS
|
sb.append("\n\n------------------------------METADATA------------------------------\n\n"); //NON-NLS
|
||||||
Stream.of(meta.names()).sorted().forEach(key -> {
|
Stream.of(meta.names()).sorted().forEach(key -> {
|
||||||
sb.append(key).append(": ").append(meta.get(key)).append("\n");
|
sb.append(key).append(": ").append(meta.get(key)).append("\n");
|
||||||
@ -85,7 +86,7 @@ class TikaTextExtractor extends FileTextExtractor<Metadata> {
|
|||||||
//Parse the file in a task
|
//Parse the file in a task
|
||||||
final Future<Reader> future = tikaParseExecutor.submit(() -> new Tika().parse(stream, meta));
|
final Future<Reader> future = tikaParseExecutor.submit(() -> new Tika().parse(stream, meta));
|
||||||
try {
|
try {
|
||||||
return future.get(Ingester.getTimeout(sourceFile.getSize()), TimeUnit.SECONDS);
|
return future.get(getTimeout(sourceFile.getSize()), TimeUnit.SECONDS);
|
||||||
} catch (TimeoutException te) {
|
} catch (TimeoutException te) {
|
||||||
final String msg = NbBundle.getMessage(this.getClass(), "AbstractFileTikaTextExtract.index.tikaParseTimeout.text", sourceFile.getId(), sourceFile.getName());
|
final String msg = NbBundle.getMessage(this.getClass(), "AbstractFileTikaTextExtract.index.tikaParseTimeout.text", sourceFile.getId(), sourceFile.getName());
|
||||||
logWarning(msg, te);
|
logWarning(msg, te);
|
||||||
@ -129,5 +130,26 @@ class TikaTextExtractor extends FileTextExtractor<Metadata> {
|
|||||||
boolean noExtractionOptionsAreEnabled() {
|
boolean noExtractionOptionsAreEnabled() {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
/**
|
||||||
|
* return timeout that should be used to index the content
|
||||||
|
*
|
||||||
|
* @param size size of the content
|
||||||
|
*
|
||||||
|
* @return time in seconds to use a timeout
|
||||||
|
*/
|
||||||
|
static int getTimeout(long size) {
|
||||||
|
if (size < 1024 * 1024L) //1MB
|
||||||
|
{
|
||||||
|
return 60;
|
||||||
|
} else if (size < 10 * 1024 * 1024L) //10MB
|
||||||
|
{
|
||||||
|
return 1200;
|
||||||
|
} else if (size < 100 * 1024 * 1024L) //100MB
|
||||||
|
{
|
||||||
|
return 3600;
|
||||||
|
} else {
|
||||||
|
return 3 * 3600;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user