mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-19 11:07:43 +00:00
Merge remote-tracking branch 'upstream/develop' into jenkinsTests
This commit is contained in:
commit
c4e6425e9a
@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* Autopsy Forensic Browser
|
* Autopsy Forensic Browser
|
||||||
*
|
*
|
||||||
* Copyright 2012 Basis Technology Corp.
|
* Copyright 2012-2013 Basis Technology Corp.
|
||||||
* Contact: carrier <at> sleuthkit <dot> org
|
* Contact: carrier <at> sleuthkit <dot> org
|
||||||
*
|
*
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
@ -59,10 +59,8 @@ import org.sleuthkit.datamodel.ReadContentInputStream;
|
|||||||
"text/javascript" //"application/xml",
|
"text/javascript" //"application/xml",
|
||||||
//"application/xml-dtd",
|
//"application/xml-dtd",
|
||||||
);
|
);
|
||||||
private final TikaLanguageIdentifier tikaLanguageIdentifier;
|
|
||||||
|
|
||||||
AbstractFileHtmlExtract() {
|
AbstractFileHtmlExtract() {
|
||||||
tikaLanguageIdentifier = new TikaLanguageIdentifier();
|
|
||||||
this.module = KeywordSearchIngestModule.getDefault();
|
this.module = KeywordSearchIngestModule.getDefault();
|
||||||
ingester = Server.getIngester();
|
ingester = Server.getIngester();
|
||||||
}
|
}
|
||||||
@ -166,11 +164,6 @@ import org.sleuthkit.datamodel.ReadContentInputStream;
|
|||||||
totalRead = 0;
|
totalRead = 0;
|
||||||
extracted = sb.toString();
|
extracted = sb.toString();
|
||||||
|
|
||||||
|
|
||||||
//attempt to identify language of extracted text and post it to the blackboard
|
|
||||||
tikaLanguageIdentifier.addLanguageToBlackBoard(extracted, sourceFile);
|
|
||||||
|
|
||||||
|
|
||||||
//converts BOM automatically to charSet encoding
|
//converts BOM automatically to charSet encoding
|
||||||
byte[] encodedBytes = extracted.getBytes(outCharset);
|
byte[] encodedBytes = extracted.getBytes(outCharset);
|
||||||
AbstractFileChunk chunk = new AbstractFileChunk(this, this.numChunks + 1);
|
AbstractFileChunk chunk = new AbstractFileChunk(this, this.numChunks + 1);
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* Autopsy Forensic Browser
|
* Autopsy Forensic Browser
|
||||||
*
|
*
|
||||||
* Copyright 2012 Basis Technology Corp.
|
* Copyright 2012-2013 Basis Technology Corp.
|
||||||
* Contact: carrier <at> sleuthkit <dot> org
|
* Contact: carrier <at> sleuthkit <dot> org
|
||||||
*
|
*
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
@ -39,16 +39,11 @@ import org.sleuthkit.autopsy.ingest.IngestModuleAbstractFile;
|
|||||||
import org.sleuthkit.datamodel.AbstractFile;
|
import org.sleuthkit.datamodel.AbstractFile;
|
||||||
import org.sleuthkit.datamodel.ReadContentInputStream;
|
import org.sleuthkit.datamodel.ReadContentInputStream;
|
||||||
import org.apache.tika.Tika;
|
import org.apache.tika.Tika;
|
||||||
import org.apache.tika.language.LanguageIdentifier;
|
|
||||||
import org.apache.tika.metadata.Metadata;
|
import org.apache.tika.metadata.Metadata;
|
||||||
import org.apache.tika.mime.MediaType;
|
import org.apache.tika.mime.MediaType;
|
||||||
import org.apache.tika.parser.ParseContext;
|
import org.apache.tika.parser.ParseContext;
|
||||||
import org.sleuthkit.autopsy.casemodule.Case;
|
|
||||||
import org.sleuthkit.autopsy.coreutils.StringExtract;
|
import org.sleuthkit.autopsy.coreutils.StringExtract;
|
||||||
import org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException;
|
import org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException;
|
||||||
import org.sleuthkit.datamodel.BlackboardArtifact;
|
|
||||||
import org.sleuthkit.datamodel.BlackboardAttribute;
|
|
||||||
import org.sleuthkit.datamodel.TskCoreException;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extractor of text from TIKA supported AbstractFile content. Extracted text is
|
* Extractor of text from TIKA supported AbstractFile content. Extracted text is
|
||||||
@ -75,11 +70,9 @@ class AbstractFileTikaTextExtract implements AbstractFileExtract {
|
|||||||
private int numChunks = 0;
|
private int numChunks = 0;
|
||||||
//private static final String UTF16BOM = "\uFEFF"; disabled prepending of BOM
|
//private static final String UTF16BOM = "\uFEFF"; disabled prepending of BOM
|
||||||
private final ExecutorService tikaParseExecutor = Executors.newSingleThreadExecutor();
|
private final ExecutorService tikaParseExecutor = Executors.newSingleThreadExecutor();
|
||||||
private final List<String> TIKA_SUPPORTED_TYPES = new ArrayList<String>();
|
private final List<String> TIKA_SUPPORTED_TYPES = new ArrayList<>();
|
||||||
private final TikaLanguageIdentifier tikaLanguageIdentifier;
|
|
||||||
|
|
||||||
AbstractFileTikaTextExtract() {
|
AbstractFileTikaTextExtract() {
|
||||||
tikaLanguageIdentifier = new TikaLanguageIdentifier();
|
|
||||||
this.module = KeywordSearchIngestModule.getDefault();
|
this.module = KeywordSearchIngestModule.getDefault();
|
||||||
ingester = Server.getIngester();
|
ingester = Server.getIngester();
|
||||||
|
|
||||||
@ -87,7 +80,7 @@ class AbstractFileTikaTextExtract implements AbstractFileExtract {
|
|||||||
for (MediaType mt : mediaTypes) {
|
for (MediaType mt : mediaTypes) {
|
||||||
TIKA_SUPPORTED_TYPES.add(mt.getType() + "/" + mt.getSubtype());
|
TIKA_SUPPORTED_TYPES.add(mt.getType() + "/" + mt.getSubtype());
|
||||||
}
|
}
|
||||||
logger.log(Level.INFO, "Tika supported media types: " + TIKA_SUPPORTED_TYPES);
|
logger.log(Level.INFO, "Tika supported media types: {0}", TIKA_SUPPORTED_TYPES);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -138,13 +131,11 @@ class AbstractFileTikaTextExtract implements AbstractFileExtract {
|
|||||||
try {
|
try {
|
||||||
future.get(Ingester.getTimeout(sourceFile.getSize()), TimeUnit.SECONDS);
|
future.get(Ingester.getTimeout(sourceFile.getSize()), TimeUnit.SECONDS);
|
||||||
} catch (TimeoutException te) {
|
} catch (TimeoutException te) {
|
||||||
tika = null;
|
|
||||||
final String msg = "Exception: Tika parse timeout for content: " + sourceFile.getId() + ", " + sourceFile.getName();
|
final String msg = "Exception: Tika parse timeout for content: " + sourceFile.getId() + ", " + sourceFile.getName();
|
||||||
KeywordSearch.getTikaLogger().log(Level.WARNING, msg, te);
|
KeywordSearch.getTikaLogger().log(Level.WARNING, msg, te);
|
||||||
logger.log(Level.WARNING, msg);
|
logger.log(Level.WARNING, msg);
|
||||||
throw new IngesterException(msg);
|
throw new IngesterException(msg);
|
||||||
} catch (Exception ex) {
|
} catch (Exception ex) {
|
||||||
tika = null;
|
|
||||||
final String msg = "Exception: Unexpected exception from Tika parse task execution for file: " + sourceFile.getId() + ", " + sourceFile.getName();
|
final String msg = "Exception: Unexpected exception from Tika parse task execution for file: " + sourceFile.getId() + ", " + sourceFile.getName();
|
||||||
KeywordSearch.getTikaLogger().log(Level.WARNING, msg, ex);
|
KeywordSearch.getTikaLogger().log(Level.WARNING, msg, ex);
|
||||||
logger.log(Level.WARNING, msg);
|
logger.log(Level.WARNING, msg);
|
||||||
@ -221,9 +212,6 @@ class AbstractFileTikaTextExtract implements AbstractFileExtract {
|
|||||||
|
|
||||||
extracted = sb.toString();
|
extracted = sb.toString();
|
||||||
|
|
||||||
//attempt to identify language of extracted text and post it to the blackboard
|
|
||||||
tikaLanguageIdentifier.addLanguageToBlackBoard(extracted, sourceFile);
|
|
||||||
|
|
||||||
//converts BOM automatically to charSet encoding
|
//converts BOM automatically to charSet encoding
|
||||||
byte[] encodedBytes = extracted.getBytes(OUTPUT_CHARSET);
|
byte[] encodedBytes = extracted.getBytes(OUTPUT_CHARSET);
|
||||||
AbstractFileChunk chunk = new AbstractFileChunk(this, this.numChunks + 1);
|
AbstractFileChunk chunk = new AbstractFileChunk(this, this.numChunks + 1);
|
||||||
|
@ -91,7 +91,8 @@ public abstract class KeywordSearchListsAbstract {
|
|||||||
ips.add(new Keyword("(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])", false, BlackboardAttribute.ATTRIBUTE_TYPE.TSK_IP_ADDRESS));
|
ips.add(new Keyword("(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])", false, BlackboardAttribute.ATTRIBUTE_TYPE.TSK_IP_ADDRESS));
|
||||||
//email
|
//email
|
||||||
List<Keyword> emails = new ArrayList<Keyword>();
|
List<Keyword> emails = new ArrayList<Keyword>();
|
||||||
emails.add(new Keyword("[A-Z0-9._%-]+@[A-Z0-9.-]+\\.[A-Z]{2,4}", false, BlackboardAttribute.ATTRIBUTE_TYPE.TSK_EMAIL));
|
emails.add(new Keyword("(?=.{8})[a-z0-9%+_-]+(?:\\.[a-z0-9%+_-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\\.)+[a-z]{2,4}(?<!\\.txt|\\.exe|\\.dll|\\.jpg|\\.xml)", false, BlackboardAttribute.ATTRIBUTE_TYPE.TSK_EMAIL));
|
||||||
|
//emails.add(new Keyword("[A-Z0-9._%-]+@[A-Z0-9.-]+\\.[A-Z]{2,4}", false, BlackboardAttribute.ATTRIBUTE_TYPE.TSK_EMAIL));
|
||||||
//URL
|
//URL
|
||||||
List<Keyword> urls = new ArrayList<Keyword>();
|
List<Keyword> urls = new ArrayList<Keyword>();
|
||||||
//urls.add(new Keyword("http://|https://|^www\\.", false, BlackboardAttribute.ATTRIBUTE_TYPE.TSK_URL));
|
//urls.add(new Keyword("http://|https://|^www\\.", false, BlackboardAttribute.ATTRIBUTE_TYPE.TSK_URL));
|
||||||
|
@ -1,61 +0,0 @@
|
|||||||
/*
|
|
||||||
* Autopsy Forensic Browser
|
|
||||||
*
|
|
||||||
* Copyright 2013 Basis Technology Corp.
|
|
||||||
* Contact: carrier <at> sleuthkit <dot> org
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
package org.sleuthkit.autopsy.keywordsearch;
|
|
||||||
|
|
||||||
import java.util.logging.Level;
|
|
||||||
import org.sleuthkit.autopsy.coreutils.Logger;
|
|
||||||
import org.sleuthkit.datamodel.AbstractFile;
|
|
||||||
import org.sleuthkit.datamodel.BlackboardArtifact;
|
|
||||||
import org.sleuthkit.datamodel.BlackboardAttribute;
|
|
||||||
import org.sleuthkit.datamodel.TskCoreException;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* TextLanguageIdentifier implementation based on a wrapped Tike
|
|
||||||
* LanguageIdentifier
|
|
||||||
*/
|
|
||||||
class TikaLanguageIdentifier implements TextLanguageIdentifier {
|
|
||||||
|
|
||||||
private static final Logger logger = Logger.getLogger(TikaLanguageIdentifier.class.getName());
|
|
||||||
private static final int MIN_STRING_LENGTH = 1000;
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void addLanguageToBlackBoard(String extracted, AbstractFile sourceFile) {
|
|
||||||
if (extracted.length() > MIN_STRING_LENGTH) {
|
|
||||||
org.apache.tika.language.LanguageIdentifier li = new org.apache.tika.language.LanguageIdentifier(extracted);
|
|
||||||
|
|
||||||
//logger.log(Level.INFO, sourceFile.getName() + " detected language: " + li.getLanguage()
|
|
||||||
// + " with " + ((li.isReasonablyCertain()) ? "HIGH" : "LOW") + " confidence");
|
|
||||||
|
|
||||||
BlackboardArtifact genInfo;
|
|
||||||
try {
|
|
||||||
genInfo = sourceFile.getGenInfoArtifact();
|
|
||||||
|
|
||||||
BlackboardAttribute textLang = new BlackboardAttribute(
|
|
||||||
BlackboardAttribute.ATTRIBUTE_TYPE.TSK_TEXT_LANGUAGE.getTypeID(),
|
|
||||||
KeywordSearchIngestModule.MODULE_NAME, li.getLanguage());
|
|
||||||
|
|
||||||
genInfo.addAttribute(textLang);
|
|
||||||
|
|
||||||
} catch (TskCoreException ex) {
|
|
||||||
logger.log(Level.WARNING, "failed to add TSK_TEXT_LANGUAGE attribute to TSK_GEN_INFO artifact for file: " + sourceFile.getName(), ex);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -182,7 +182,7 @@ public class RegressionTest extends TestCase {
|
|||||||
|
|
||||||
public void testConfigureHash() {
|
public void testConfigureHash() {
|
||||||
logger.info("Hash Configure");
|
logger.info("Hash Configure");
|
||||||
JDialog hashMainDialog = JDialogOperator.waitJDialog("Hash Database Configuration", false, false);
|
JDialog hashMainDialog = JDialogOperator.waitJDialog("Hash Set Configuration", false, false);
|
||||||
JDialogOperator hashMainDialogOperator = new JDialogOperator(hashMainDialog);
|
JDialogOperator hashMainDialogOperator = new JDialogOperator(hashMainDialog);
|
||||||
List<String> databases = new ArrayList<String>();
|
List<String> databases = new ArrayList<String>();
|
||||||
databases.add(System.getProperty("nsrl_path"));
|
databases.add(System.getProperty("nsrl_path"));
|
||||||
@ -190,7 +190,7 @@ public class RegressionTest extends TestCase {
|
|||||||
for (String database : databases) {
|
for (String database : databases) {
|
||||||
JButtonOperator importButtonOperator = new JButtonOperator(hashMainDialogOperator, "Import");
|
JButtonOperator importButtonOperator = new JButtonOperator(hashMainDialogOperator, "Import");
|
||||||
importButtonOperator.pushNoBlock();
|
importButtonOperator.pushNoBlock();
|
||||||
JDialog addDatabaseDialog = JDialogOperator.waitJDialog("Add Hash Database", false, false);
|
JDialog addDatabaseDialog = JDialogOperator.waitJDialog("Import Hash Database", false, false);
|
||||||
JDialogOperator addDatabaseDialogOperator = new JDialogOperator(addDatabaseDialog);
|
JDialogOperator addDatabaseDialogOperator = new JDialogOperator(addDatabaseDialog);
|
||||||
JButtonOperator browseButtonOperator = new JButtonOperator(addDatabaseDialogOperator, "Browse", 0);
|
JButtonOperator browseButtonOperator = new JButtonOperator(addDatabaseDialogOperator, "Browse", 0);
|
||||||
browseButtonOperator.pushNoBlock();
|
browseButtonOperator.pushNoBlock();
|
||||||
@ -339,4 +339,4 @@ public class RegressionTest extends TestCase {
|
|||||||
KeywordSearchListsXML curr = KeywordSearchListsXML.getCurrent();
|
KeywordSearchListsXML curr = KeywordSearchListsXML.getCurrent();
|
||||||
curr.setUseForIngest("URLs", true);
|
curr.setUseForIngest("URLs", true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user