mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-12 16:06:15 +00:00
This commit is contained in:
commit
a16a2ee15a
@ -22,10 +22,12 @@ import java.io.IOException;
|
|||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.nio.charset.Charset;
|
import java.nio.charset.Charset;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
import java.util.concurrent.ExecutorService;
|
import java.util.concurrent.ExecutorService;
|
||||||
import java.util.concurrent.Executors;
|
import java.util.concurrent.Executors;
|
||||||
import java.util.concurrent.Future;
|
import java.util.concurrent.Future;
|
||||||
@ -38,6 +40,8 @@ import org.sleuthkit.datamodel.AbstractFile;
|
|||||||
import org.sleuthkit.datamodel.ReadContentInputStream;
|
import org.sleuthkit.datamodel.ReadContentInputStream;
|
||||||
import org.apache.tika.Tika;
|
import org.apache.tika.Tika;
|
||||||
import org.apache.tika.metadata.Metadata;
|
import org.apache.tika.metadata.Metadata;
|
||||||
|
import org.apache.tika.mime.MediaType;
|
||||||
|
import org.apache.tika.parser.ParseContext;
|
||||||
import org.sleuthkit.autopsy.coreutils.StringExtract;
|
import org.sleuthkit.autopsy.coreutils.StringExtract;
|
||||||
import org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException;
|
import org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException;
|
||||||
|
|
||||||
@ -66,11 +70,18 @@ public class AbstractFileTikaTextExtract implements AbstractFileExtract {
|
|||||||
private int numChunks = 0;
|
private int numChunks = 0;
|
||||||
//private static final String UTF16BOM = "\uFEFF"; disabled prepending of BOM
|
//private static final String UTF16BOM = "\uFEFF"; disabled prepending of BOM
|
||||||
private final ExecutorService tikaParseExecutor = Executors.newSingleThreadExecutor();
|
private final ExecutorService tikaParseExecutor = Executors.newSingleThreadExecutor();
|
||||||
|
private final List<String> TIKA_SUPPORTED_TYPES = new ArrayList<String>();
|
||||||
|
|
||||||
AbstractFileTikaTextExtract() {
|
AbstractFileTikaTextExtract() {
|
||||||
this.module = KeywordSearchIngestModule.getDefault();
|
this.module = KeywordSearchIngestModule.getDefault();
|
||||||
ingester = Server.getIngester();
|
ingester = Server.getIngester();
|
||||||
|
|
||||||
|
Set<MediaType> mediaTypes = new Tika().getParser().getSupportedTypes(new ParseContext());
|
||||||
|
for (MediaType mt : mediaTypes) {
|
||||||
|
TIKA_SUPPORTED_TYPES.add(mt.getType() + "/" + mt.getSubtype());
|
||||||
|
}
|
||||||
|
logger.log(Level.INFO, "Tika supported media types: " + TIKA_SUPPORTED_TYPES);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -272,12 +283,11 @@ public class AbstractFileTikaTextExtract implements AbstractFileExtract {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
//TODO might need to add more mime-types to ignore
|
//TODO might need to add more mime-types to ignore
|
||||||
|
|
||||||
//default to true, which includes
|
//then accept all formats supported by Tika
|
||||||
//text, docs, pdf and others
|
return TIKA_SUPPORTED_TYPES.contains(detectedFormat);
|
||||||
|
|
||||||
return true;
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -106,7 +106,7 @@
|
|||||||
</zip>
|
</zip>
|
||||||
|
|
||||||
<delete dir="${zip-tmp}"/>
|
<delete dir="${zip-tmp}"/>
|
||||||
<delete file="${nbdist.dir}/${app.name}.zip"/>
|
<!-- <delete file="${nbdist.dir}/${app.name}.zip"/> -->
|
||||||
|
|
||||||
<echo message=" "/>
|
<echo message=" "/>
|
||||||
<echo message="cleaning and finalizing release" />
|
<echo message="cleaning and finalizing release" />
|
||||||
|
Loading…
x
Reference in New Issue
Block a user