Merge branch 'develop' of https://github.com/sleuthkit/autopsy into 3788-intercase-correlation

This commit is contained in:
Brian Sweeney 2018-05-14 14:22:33 -06:00
commit 83d91d3e33
107 changed files with 376590 additions and 129 deletions

1
.gitignore vendored
View File

@ -12,6 +12,7 @@
/Core/build/ /Core/build/
/Core/dist/ /Core/dist/
/Core/nbproject/* /Core/nbproject/*
/Core/test/qa-functional/data/*
!/Core/nbproject/project.xml !/Core/nbproject/project.xml
!/Core/nbproject/project.properties !/Core/nbproject/project.properties

View File

@ -40,6 +40,11 @@
<fileset dir="${thirdparty.dir}/Volatility"/> <fileset dir="${thirdparty.dir}/Volatility"/>
</copy> </copy>
<!--Copy Tesseract OCR to release-->
<copy todir="${basedir}/release/Tesseract-OCR" >
<fileset dir="${thirdparty.dir}/Tesseract-OCR"/>
</copy>
<!--Copy other jars--> <!--Copy other jars-->
<copy file="${thirdparty.dir}/rejistry/Rejistry-1.0-SNAPSHOT.jar" todir="${ext.dir}" /> <copy file="${thirdparty.dir}/rejistry/Rejistry-1.0-SNAPSHOT.jar" todir="${ext.dir}" />
<copy file="${thirdparty.dir}/sevenzip/sevenzipjbinding.jar" todir="${ext.dir}" /> <copy file="${thirdparty.dir}/sevenzip/sevenzipjbinding.jar" todir="${ext.dir}" />
@ -84,14 +89,15 @@
<target name="getTestDataFiles"> <target name="getTestDataFiles">
<mkdir dir="${basedir}/test/qa-functional/data"/> <mkdir dir="${basedir}/test/qa-functional/data"/>
<get src="https://drive.google.com/uc?id=1dLYGctuvRQMmnzfXPppTM_9gB49eLc_g" dest="${test-input}/embedded.vhd" skipexisting="true"/> <get src="https://drive.google.com/uc?id=1dLYGctuvRQMmnzfXPppTM_9gB49eLc_g" dest="${test-input}/embedded.vhd" skipexisting="true"/>
<get src="https://drive.google.com/uc?id=1JACMDyH4y54ypGzFWl82ZzMQf3qbrioP" dest="${test-input}/encryption_detection_bitlocker_test.vhd" skipexisting="true"/> <get src="https://drive.google.com/uc?id=1JACMDyH4y54ypGzFWl82ZzMQf3qbrioP" dest="${test-input}/encryption_detection_bitlocker_test.vhd" skipexisting="true"/>
<get src="https://drive.google.com/uc?id=0BxdBkzm5VKGNT0dGY0dqcHVsU3M" dest="${test-input}/filter_test1.img" skipexisting="true"/> <get src="https://drive.google.com/uc?id=0BxdBkzm5VKGNT0dGY0dqcHVsU3M" dest="${test-input}/filter_test1.img" skipexisting="true"/>
<get src="https://drive.google.com/uc?id=1bghoSm7z7nhmGIxlllyY1MMlbLntxm7n" dest="${test-input}/local_files_test.zip" skipexisting="true"/> <get src="https://drive.google.com/uc?id=1bghoSm7z7nhmGIxlllyY1MMlbLntxm7n" dest="${test-input}/local_files_test.zip" skipexisting="true"/>
<get src="https://drive.google.com/uc?id=1BrSiUQ1fzxFS9vIaK4mYKX6qIVp9kRWT" dest="${test-input}/password_detection_test.img" skipexisting="true"/> <get src="https://drive.google.com/uc?id=1BrSiUQ1fzxFS9vIaK4mYKX6qIVp9kRWT" dest="${test-input}/password_detection_test.img" skipexisting="true"/>
<get src="https://drive.google.com/uc?id=1HD8s4rculgHV1qZT5g80Kg7j4m1qccrN" dest="${test-input}/veracrypt_detection_test.vhd" skipexisting="true"/>
</target> </target>
<target name="get-deps" depends="init-ivy,getTSKJars,get-thirdparty-dependencies,get-InternalPythonModules, download-binlist, getTestDataFiles"> <target name="get-deps" depends="init-ivy,getTSKJars,get-thirdparty-dependencies,get-InternalPythonModules, download-binlist,getTestDataFiles">
<mkdir dir="${ext.dir}"/> <mkdir dir="${ext.dir}"/>
<copy file="${thirdparty.dir}/LICENSE-2.0.txt" todir="${ext.dir}" /> <copy file="${thirdparty.dir}/LICENSE-2.0.txt" todir="${ext.dir}" />

View File

@ -218,7 +218,7 @@ class SevenZipExtractor {
*/ */
private ArchiveFormat get7ZipOptions(AbstractFile archiveFile) { private ArchiveFormat get7ZipOptions(AbstractFile archiveFile) {
// try to get the file type from the BB // try to get the file type from the BB
String detectedFormat = null; String detectedFormat;
detectedFormat = archiveFile.getMIMEType(); detectedFormat = archiveFile.getMIMEType();
if (detectedFormat == null) { if (detectedFormat == null) {
@ -434,11 +434,11 @@ class SevenZipExtractor {
result = item.extractSlow(unpackStream, password); result = item.extractSlow(unpackStream, password);
} }
if (result != ExtractOperationResult.OK) { if (result != ExtractOperationResult.OK) {
logger.log(Level.WARNING, "Extraction of : " + localAbsPath + " encountered error " + result); //NON-NLS logger.log(Level.WARNING, "Extraction of : {0} encountered error {1}", new Object[]{localAbsPath, result}); //NON-NLS
return null; return null;
} }
} catch (Exception e) { } catch (SevenZipException e) {
//could be something unexpected with this file, move on //could be something unexpected with this file, move on
logger.log(Level.WARNING, "Could not extract file from archive: " + localAbsPath, e); //NON-NLS logger.log(Level.WARNING, "Could not extract file from archive: " + localAbsPath, e); //NON-NLS
} finally { } finally {
@ -492,7 +492,7 @@ class SevenZipExtractor {
final ProgressHandle progress = ProgressHandle.createHandle(Bundle.EmbeddedFileExtractorIngestModule_ArchiveExtractor_moduleName()); final ProgressHandle progress = ProgressHandle.createHandle(Bundle.EmbeddedFileExtractorIngestModule_ArchiveExtractor_moduleName());
//recursion depth check for zip bomb //recursion depth check for zip bomb
final long archiveId = archiveFile.getId(); final long archiveId = archiveFile.getId();
SevenZipExtractor.ArchiveDepthCountTree.Archive parentAr = null; SevenZipExtractor.ArchiveDepthCountTree.Archive parentAr;
try { try {
blackboard = Case.getCurrentCaseThrows().getServices().getBlackboard(); blackboard = Case.getCurrentCaseThrows().getServices().getBlackboard();
} catch (NoCurrentCaseException ex) { } catch (NoCurrentCaseException ex) {
@ -717,7 +717,7 @@ class SevenZipExtractor {
String encryptionType = fullEncryption ? ENCRYPTION_FULL : ENCRYPTION_FILE_LEVEL; String encryptionType = fullEncryption ? ENCRYPTION_FULL : ENCRYPTION_FILE_LEVEL;
try { try {
BlackboardArtifact artifact = archiveFile.newArtifact(BlackboardArtifact.ARTIFACT_TYPE.TSK_ENCRYPTION_DETECTED); BlackboardArtifact artifact = archiveFile.newArtifact(BlackboardArtifact.ARTIFACT_TYPE.TSK_ENCRYPTION_DETECTED);
artifact.addAttribute(new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_NAME, EmbeddedFileExtractorModuleFactory.getModuleName(), encryptionType)); artifact.addAttribute(new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_COMMENT, EmbeddedFileExtractorModuleFactory.getModuleName(), encryptionType));
try { try {
// index the artifact for keyword search // index the artifact for keyword search

View File

@ -62,7 +62,7 @@ public class EmbeddedFileTest extends NbTestCase {
@Override @Override
public void setUp() { public void setUp() {
CaseUtils.createCase(CASE_DIRECTORY_PATH, CASE_NAME); CaseUtils.createCase(CASE_NAME);
ImageDSProcessor dataSourceProcessor = new ImageDSProcessor(); ImageDSProcessor dataSourceProcessor = new ImageDSProcessor();
IngestUtils.addDataSource(dataSourceProcessor, IMAGE_PATH); IngestUtils.addDataSource(dataSourceProcessor, IMAGE_PATH);
@ -92,7 +92,6 @@ public class EmbeddedFileTest extends NbTestCase {
@Override @Override
public void tearDown() { public void tearDown() {
CaseUtils.closeCase(); CaseUtils.closeCase();
CaseUtils.deleteCaseDir(CASE_DIRECTORY_PATH);
} }
public void testEncryption() { public void testEncryption() {

View File

@ -72,8 +72,7 @@ public class IngestFileFiltersTest extends NbTestCase {
} }
public void testBasicDir() { public void testBasicDir() {
Path casePath = Paths.get(System.getProperty("java.io.tmpdir"), "testBasicDir"); CaseUtils.createCase("testBasicDir");
CaseUtils.createCase(casePath, "testBasicDir");
ImageDSProcessor dataSourceProcessor = new ImageDSProcessor(); ImageDSProcessor dataSourceProcessor = new ImageDSProcessor();
IngestUtils.addDataSource(dataSourceProcessor, IMAGE_PATH); IngestUtils.addDataSource(dataSourceProcessor, IMAGE_PATH);
@ -115,8 +114,7 @@ public class IngestFileFiltersTest extends NbTestCase {
} }
public void testExtAndDirWithOneRule() { public void testExtAndDirWithOneRule() {
Path casePath = Paths.get(System.getProperty("java.io.tmpdir"), "testExtAndDirWithOneRule"); CaseUtils.createCase("testExtAndDirWithOneRule");
CaseUtils.createCase(casePath, "testExtAndDirWithOneRule");
ImageDSProcessor dataSourceProcessor = new ImageDSProcessor(); ImageDSProcessor dataSourceProcessor = new ImageDSProcessor();
IngestUtils.addDataSource(dataSourceProcessor, IMAGE_PATH); IngestUtils.addDataSource(dataSourceProcessor, IMAGE_PATH);
@ -151,8 +149,7 @@ public class IngestFileFiltersTest extends NbTestCase {
} }
public void testExtAndDirWithTwoRules() { public void testExtAndDirWithTwoRules() {
Path casePath = Paths.get(System.getProperty("java.io.tmpdir"), "testExtAndDirWithTwoRules"); CaseUtils.createCase("testExtAndDirWithTwoRules");
CaseUtils.createCase(casePath, "testExtAndDirWithTwoRules");
ImageDSProcessor dataSourceProcessor = new ImageDSProcessor(); ImageDSProcessor dataSourceProcessor = new ImageDSProcessor();
IngestUtils.addDataSource(dataSourceProcessor, IMAGE_PATH); IngestUtils.addDataSource(dataSourceProcessor, IMAGE_PATH);
@ -196,8 +193,7 @@ public class IngestFileFiltersTest extends NbTestCase {
} }
public void testFullFileNameRule() { public void testFullFileNameRule() {
Path casePath = Paths.get(System.getProperty("java.io.tmpdir"), "testFullFileNameRule"); CaseUtils.createCase("testFullFileNameRule");
CaseUtils.createCase(casePath, "testFullFileNameRule");
ImageDSProcessor dataSourceProcessor = new ImageDSProcessor(); ImageDSProcessor dataSourceProcessor = new ImageDSProcessor();
IngestUtils.addDataSource(dataSourceProcessor, IMAGE_PATH); IngestUtils.addDataSource(dataSourceProcessor, IMAGE_PATH);
@ -232,8 +228,7 @@ public class IngestFileFiltersTest extends NbTestCase {
} }
public void testCarvingWithExtRuleAndUnallocSpace() { public void testCarvingWithExtRuleAndUnallocSpace() {
Path casePath = Paths.get(System.getProperty("java.io.tmpdir"), "testCarvingWithExtRuleAndUnallocSpace"); CaseUtils.createCase("testCarvingWithExtRuleAndUnallocSpace");
CaseUtils.createCase(casePath, "testCarvingWithExtRuleAndUnallocSpace");
ImageDSProcessor dataSourceProcessor = new ImageDSProcessor(); ImageDSProcessor dataSourceProcessor = new ImageDSProcessor();
IngestUtils.addDataSource(dataSourceProcessor, IMAGE_PATH); IngestUtils.addDataSource(dataSourceProcessor, IMAGE_PATH);
@ -281,8 +276,7 @@ public class IngestFileFiltersTest extends NbTestCase {
} }
public void testCarvingNoUnallocatedSpace() { public void testCarvingNoUnallocatedSpace() {
Path casePath = Paths.get(System.getProperty("java.io.tmpdir"), "testCarvingNoUnallocatedSpace"); CaseUtils.createCase("testCarvingNoUnallocatedSpace");
CaseUtils.createCase(casePath, "testCarvingNoUnallocatedSpace");
ImageDSProcessor dataSourceProcessor = new ImageDSProcessor(); ImageDSProcessor dataSourceProcessor = new ImageDSProcessor();
IngestUtils.addDataSource(dataSourceProcessor, IMAGE_PATH); IngestUtils.addDataSource(dataSourceProcessor, IMAGE_PATH);
@ -315,8 +309,7 @@ public class IngestFileFiltersTest extends NbTestCase {
} }
public void testEmbeddedModule() { public void testEmbeddedModule() {
Path casePath = Paths.get(System.getProperty("java.io.tmpdir"), "testEmbeddedModule"); CaseUtils.createCase("testEmbeddedModule");
CaseUtils.createCase(casePath, "testEmbeddedModule");
LocalFilesDSProcessor dataSourceProcessor = new LocalFilesDSProcessor(); LocalFilesDSProcessor dataSourceProcessor = new LocalFilesDSProcessor();
IngestUtils.addDataSource(dataSourceProcessor, ZIPFILE_PATH); IngestUtils.addDataSource(dataSourceProcessor, ZIPFILE_PATH);

View File

@ -44,6 +44,7 @@ import org.sleuthkit.datamodel.BlackboardAttribute;
import org.sleuthkit.datamodel.Image; import org.sleuthkit.datamodel.Image;
import org.sleuthkit.datamodel.TskCoreException; import org.sleuthkit.datamodel.TskCoreException;
import org.sleuthkit.datamodel.TskData; import org.sleuthkit.datamodel.TskData;
import org.sleuthkit.datamodel.Content;
import org.sleuthkit.datamodel.Volume; import org.sleuthkit.datamodel.Volume;
import org.sleuthkit.datamodel.VolumeSystem; import org.sleuthkit.datamodel.VolumeSystem;
@ -58,6 +59,12 @@ public class EncryptionDetectionTest extends NbTestCase {
private final Path BITLOCKER_IMAGE_PATH = Paths.get(this.getDataDir().toString(), "encryption_detection_bitlocker_test.vhd"); private final Path BITLOCKER_IMAGE_PATH = Paths.get(this.getDataDir().toString(), "encryption_detection_bitlocker_test.vhd");
private final Path PASSWORD_IMAGE_PATH = Paths.get(this.getDataDir().toString(), "password_detection_test.img"); private final Path PASSWORD_IMAGE_PATH = Paths.get(this.getDataDir().toString(), "password_detection_test.img");
private static final String PASSWORD_DETECTION_CASE_NAME = "PasswordDetectionTest";
private static final String VERACRYPT_DETECTION_CASE_NAME = "VeraCryptDetectionTest";
private final Path PASSWORD_DETECTION_IMAGE_PATH = Paths.get(this.getDataDir().toString(), "password_detection_test.img");
private final Path VERACRYPT_DETECTION_IMAGE_PATH = Paths.get(this.getDataDir().toString(), "veracrypt_detection_test.vhd");
public static Test suite() { public static Test suite() {
NbModuleSuite.Configuration conf = NbModuleSuite.createConfiguration(EncryptionDetectionTest.class). NbModuleSuite.Configuration conf = NbModuleSuite.createConfiguration(EncryptionDetectionTest.class).
clusters(".*"). clusters(".*").
@ -79,7 +86,7 @@ public class EncryptionDetectionTest extends NbTestCase {
*/ */
public void testBitlockerEncryption() { public void testBitlockerEncryption() {
try { try {
CaseUtils.createCase(BITLOCKER_CASE_DIRECTORY_PATH, BITLOCKER_CASE_NAME); CaseUtils.createCase(BITLOCKER_CASE_NAME);
ImageDSProcessor dataSourceProcessor = new ImageDSProcessor(); ImageDSProcessor dataSourceProcessor = new ImageDSProcessor();
IngestUtils.addDataSource(dataSourceProcessor, BITLOCKER_IMAGE_PATH); IngestUtils.addDataSource(dataSourceProcessor, BITLOCKER_IMAGE_PATH);
Case openCase = Case.getCurrentCaseThrows(); Case openCase = Case.getCurrentCaseThrows();
@ -150,21 +157,22 @@ public class EncryptionDetectionTest extends NbTestCase {
*/ */
public void testPasswordProtection() { public void testPasswordProtection() {
try { try {
CaseUtils.createCase(PASSWORD_CASE_DIRECTORY_PATH, PASSWORD_CASE_NAME); CaseUtils.createCase(PASSWORD_DETECTION_CASE_NAME);
ImageDSProcessor dataSourceProcessor = new ImageDSProcessor(); ImageDSProcessor dataSourceProcessor = new ImageDSProcessor();
IngestUtils.addDataSource(dataSourceProcessor, PASSWORD_IMAGE_PATH); List<String> errorMessages = IngestUtils.addDataSource(dataSourceProcessor, PASSWORD_DETECTION_IMAGE_PATH);
String joinedErrors = String.join(System.lineSeparator(), errorMessages);
assertEquals(joinedErrors, 0, errorMessages.size());
Case openCase = Case.getCurrentCaseThrows(); Case openCase = Case.getCurrentCaseThrows();
/* /*
* Create ingest job settings. * Create ingest job settings.
*/ */
IngestModuleFactory ingestModuleFactory = new EncryptionDetectionModuleFactory();
IngestModuleIngestJobSettings settings = ingestModuleFactory.getDefaultIngestJobSettings(); ArrayList<IngestModuleTemplate> templates = new ArrayList<>();
IngestModuleTemplate template = new IngestModuleTemplate(ingestModuleFactory, settings); templates.add(IngestUtils.getIngestModuleTemplate(new EncryptionDetectionModuleFactory()));
template.setEnabled(true); IngestJobSettings ingestJobSettings = new IngestJobSettings(PASSWORD_DETECTION_CASE_NAME, IngestType.FILES_ONLY, templates);
List<IngestModuleTemplate> templates = new ArrayList<>();
templates.add(template);
IngestJobSettings ingestJobSettings = new IngestJobSettings(EncryptionDetectionTest.class.getCanonicalName(), IngestType.FILES_ONLY, templates);
IngestUtils.runIngestJob(openCase.getDataSources(), ingestJobSettings); IngestUtils.runIngestJob(openCase.getDataSources(), ingestJobSettings);
/* /*
@ -201,8 +209,8 @@ public class EncryptionDetectionTest extends NbTestCase {
*/ */
if (file.isFile() && !file.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.SLACK)) { if (file.isFile() && !file.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.SLACK)) {
/* /*
* Determine which assertions to use for the file based on * Determine which assertions to use for the file based
* its name. * on its name.
*/ */
boolean fileProtected = file.getName().split("\\.")[0].endsWith("-protected"); boolean fileProtected = file.getName().split("\\.")[0].endsWith("-protected");
List<BlackboardArtifact> artifactsList = file.getAllArtifacts(); List<BlackboardArtifact> artifactsList = file.getAllArtifacts();
@ -234,4 +242,67 @@ public class EncryptionDetectionTest extends NbTestCase {
Assert.fail(ex); Assert.fail(ex);
} }
} }
/**
* Test the Encryption Detection module's detection of veracrypt encrypted
* container files and partitions.
*
* Test passes if the following are true.
*
* 1. A partition was detected without a file system by checking for the
* error. 2. Only 1 data source exsists in the case, to ensure a stale case
* did not get used. 3. One volume has a TSK_ENCRYPTION_SUSPECTED artifact
* associated with it. 4. A single file named veracrpytContainerFile exists.
* 5. The file named veracrpytContainerFile has a TSK_ENCRYPTION_SUSPECTED
* artifact associated with it.
*/
public void testVeraCryptSupport() {
try {
CaseUtils.createCase(VERACRYPT_DETECTION_CASE_NAME);
ImageDSProcessor dataSourceProcessor = new ImageDSProcessor();
List<String> errorMessages = IngestUtils.addDataSource(dataSourceProcessor, VERACRYPT_DETECTION_IMAGE_PATH);
String joinedErrors;
if (errorMessages.isEmpty()) {
joinedErrors = "Encrypted partition did not cause error, it was expected to";
} else {
joinedErrors = String.join(System.lineSeparator(), errorMessages);
}
//there will be 1 expected error regarding the encrypted partition not having a file system
assertEquals(joinedErrors, 1, errorMessages.size());
Case openCase = Case.getCurrentCaseThrows();
ArrayList<IngestModuleTemplate> templates = new ArrayList<>();
templates.add(IngestUtils.getIngestModuleTemplate(new EncryptionDetectionModuleFactory()));
//image includes an encrypted container file with size greater than 5 mb so default settings detect it
IngestJobSettings ingestJobSettings = new IngestJobSettings(VERACRYPT_DETECTION_CASE_NAME, IngestType.ALL_MODULES, templates);
assertEquals("Expected only one data source to exist in the Case", 1, openCase.getDataSources().size());
IngestUtils.runIngestJob(openCase.getDataSources(), ingestJobSettings);
//check that one of the partitions has an encrypted volume
int numberOfEncryptedVolumes = 0;
for (Content datasource : openCase.getDataSources()) { //data source
for (Content volumeSystem : datasource.getChildren()) { //volume system
for (Content volume : volumeSystem.getChildren()) { //volumes
numberOfEncryptedVolumes += volume.getArtifacts(BlackboardArtifact.ARTIFACT_TYPE.TSK_ENCRYPTION_SUSPECTED).size();
}
}
}
assertEquals("One volume should exist with an encryption suspsected artifact", 1, numberOfEncryptedVolumes);
//ensure the encrypyted container file was also detected correctly
FileManager fileManager = openCase.getServices().getFileManager();
List<AbstractFile> results = fileManager.findFiles("veracryptContainerFile");
assertEquals("Expected 1 file named veracryptContainerFile to exist in test image", 1, results.size());
int numberOfEncryptedContainers = 0;
for (AbstractFile file : results) {
numberOfEncryptedContainers += file.getArtifacts(BlackboardArtifact.ARTIFACT_TYPE.TSK_ENCRYPTION_SUSPECTED).size();
}
assertEquals("Encrypted Container file should have one encyption suspected artifact", 1, numberOfEncryptedContainers);
} catch (NoCurrentCaseException | TskCoreException ex) {
Exceptions.printStackTrace(ex);
Assert.fail(ex);
}
}
} }

View File

@ -18,8 +18,10 @@
*/ */
package org.sleuthkit.autopsy.testutils; package org.sleuthkit.autopsy.testutils;
import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.nio.file.Path; import java.nio.file.Path;
import java.nio.file.Paths;
import static junit.framework.Assert.assertFalse; import static junit.framework.Assert.assertFalse;
import static junit.framework.Assert.assertTrue; import static junit.framework.Assert.assertTrue;
import org.apache.commons.io.FileUtils; import org.apache.commons.io.FileUtils;
@ -30,44 +32,43 @@ import org.sleuthkit.autopsy.casemodule.CaseActionException;
import org.sleuthkit.autopsy.casemodule.CaseDetails; import org.sleuthkit.autopsy.casemodule.CaseDetails;
/** /**
* Common case utility methods. * Class with common methods for testing related to the creation and elimination
* of cases.
*/ */
public final class CaseUtils { public final class CaseUtils {
/** /**
* CaseUtils constructor. Since this class is not meant to allow for * Create a case case directory and case for the given case name.
* instantiation, this constructor is 'private'.
*/
private CaseUtils() {
}
/**
* Create a new case. If the case already exists at the specified path, the
* existing case will be removed prior to creation of the new case.
* *
* @param caseDirectoryPath The path to the case data. * @param caseName the name for the case and case directory to have
* @param caseDisplayName The display name for the case.
*/ */
public static void createCase(Path caseDirectoryPath, String caseDisplayName) { public static void createCase(String caseName) {
//Make sure the test is starting with a clean state. So delete the test directory, if it exists. //Make sure the case is starting with a clean state. So delete the case directory, if it exists.
deleteCaseDir(caseDirectoryPath); Path caseDirectoryPath = Paths.get(System.getProperty("java.io.tmpdir"), caseName);
assertFalse("Unable to delete existing test directory", caseDirectoryPath.toFile().exists()); File caseDir = new File(caseDirectoryPath.toString());
try {
deleteCaseDir(caseDir);
} catch (IOException ex) {
Exceptions.printStackTrace(ex);
Assert.fail(ex);
}
assertFalse("Unable to delete existing test directory", caseDir.exists());
// Create the test directory // Create the test directory
caseDirectoryPath.toFile().mkdirs(); caseDir.mkdirs();
assertTrue("Unable to create test directory", caseDirectoryPath.toFile().exists()); assertTrue("Unable to create test directory", caseDir.exists());
try { try {
Case.createAsCurrentCase(Case.CaseType.SINGLE_USER_CASE, caseDirectoryPath.toString(), new CaseDetails(caseDisplayName)); Case.createAsCurrentCase(Case.CaseType.SINGLE_USER_CASE, caseDirectoryPath.toString(), new CaseDetails(caseName));
} catch (CaseActionException ex) { } catch (CaseActionException ex) {
Exceptions.printStackTrace(ex); Exceptions.printStackTrace(ex);
Assert.fail(ex); Assert.fail(ex);
} }
assertTrue(caseDirectoryPath.toFile().exists());
assertTrue(caseDir.exists());
} }
/** /**
* Close the currently opened case. * Close the current case, fails test if case was unable to be closed.
*/ */
public static void closeCase() { public static void closeCase() {
try { try {
@ -85,20 +86,25 @@ public final class CaseUtils {
} }
/** /**
* Delete a case at the specified path. * Delete the case directory if it exists, thows exception if unable to
* delete case dir to allow the user to determine failure with.
* *
* @param caseDirectoryPath The path to the case to be removed. * @param caseDirectory the case directory to delete
*
* @throws IOException thrown if there was an problem deleting the case
* directory
*/ */
public static void deleteCaseDir(Path caseDirectoryPath) { public static void deleteCaseDir(File caseDirectory) throws IOException {
if (!caseDirectoryPath.toFile().exists()) { if (!caseDirectory.exists()) {
return; return;
} }
try { FileUtils.deleteDirectory(caseDirectory);
FileUtils.deleteDirectory(caseDirectoryPath.toFile()); }
} catch (IOException ex) {
//We just want to make sure the case directory doesn't exist when the test starts. It shouldn't cause failure if the case directory couldn't be deleted after a test finished. /**
System.out.println("INFO: Unable to delete case directory: " + caseDirectoryPath.toString()); * Private constructor to prevent utility class instantiation.
} */
private CaseUtils() {
} }
} }

View File

@ -19,6 +19,7 @@
package org.sleuthkit.autopsy.testutils; package org.sleuthkit.autopsy.testutils;
import java.nio.file.Path; import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List; import java.util.List;
import static junit.framework.Assert.assertEquals; import static junit.framework.Assert.assertEquals;
import org.openide.util.Exceptions; import org.openide.util.Exceptions;
@ -26,13 +27,14 @@ import org.python.icu.impl.Assert;
import org.sleuthkit.autopsy.datasourceprocessors.AutoIngestDataSourceProcessor; import org.sleuthkit.autopsy.datasourceprocessors.AutoIngestDataSourceProcessor;
import org.sleuthkit.autopsy.ingest.IngestJobSettings; import org.sleuthkit.autopsy.ingest.IngestJobSettings;
import org.sleuthkit.autopsy.ingest.IngestModuleError; import org.sleuthkit.autopsy.ingest.IngestModuleError;
import org.sleuthkit.autopsy.ingest.IngestModuleFactoryAdapter; import org.sleuthkit.autopsy.ingest.IngestModuleFactory;
import org.sleuthkit.autopsy.ingest.IngestModuleIngestJobSettings; import org.sleuthkit.autopsy.ingest.IngestModuleIngestJobSettings;
import org.sleuthkit.autopsy.ingest.IngestModuleTemplate; import org.sleuthkit.autopsy.ingest.IngestModuleTemplate;
import org.sleuthkit.datamodel.Content; import org.sleuthkit.datamodel.Content;
/** /**
* Common image utility methods. * Class with common methods for testing related to adding and ingesting
* datasources.
*/ */
public final class IngestUtils { public final class IngestUtils {
@ -44,39 +46,48 @@ public final class IngestUtils {
} }
/** /**
* Add a data source for the data source processor. * Add the specified datasource to the case current case and processes it.
* Causes failure if it was unable to add and process the datasource.
* *
* @param dataSourceProcessor The data source processor. * @param dataSourceProcessor the datasource processer to use to process the
* @param dataSourcePath The path to the data source to be added. * datasource
* @param dataSourcePath the path to the datasource which is being
* added
*
* @return errorMessages a list of all error messages as strings which
* encountered while processing the data source
*/ */
public static void addDataSource(AutoIngestDataSourceProcessor dataSourceProcessor, Path dataSourcePath) { public static List<String> addDataSource(AutoIngestDataSourceProcessor dataSourceProcessor, Path dataSourcePath) {
List<String> errorMessages = new ArrayList<>();
try { try {
if (!dataSourcePath.toFile().exists()) {
Assert.fail("Data source not found: " + dataSourcePath.toString());
}
DataSourceProcessorRunner.ProcessorCallback callBack = DataSourceProcessorRunner.runDataSourceProcessor(dataSourceProcessor, dataSourcePath); DataSourceProcessorRunner.ProcessorCallback callBack = DataSourceProcessorRunner.runDataSourceProcessor(dataSourceProcessor, dataSourcePath);
/* errorMessages = callBack.getErrorMessages();
* Ignore the callback error messages. Sometimes it's perfectly
* valid for it to not be able to detect a file system, which is one
* of the errors that can be returned.
*/
} catch (AutoIngestDataSourceProcessor.AutoIngestDataSourceProcessorException | InterruptedException ex) { } catch (AutoIngestDataSourceProcessor.AutoIngestDataSourceProcessorException | InterruptedException ex) {
Exceptions.printStackTrace(ex); Exceptions.printStackTrace(ex);
Assert.fail(ex); Assert.fail(ex);
} }
return errorMessages;
} }
/** /**
* Run an ingest job. * Run ingest on the specified datasources with the specified ingest job
* settings. Causes failure if there are any errors or other problems while
* running ingest.
* *
* @param dataSourceList The list of data sources to process. * @param datasources - the datasources to run ingest on
* @param ingestJobSettings The ingest job settings to use for ingest. * @param ingestJobSettings - the ingest job settings to use for ingest
*/ */
public static void runIngestJob(List<Content> dataSourceList, IngestJobSettings ingestJobSettings) { public static void runIngestJob(List<Content> datasources, IngestJobSettings ingestJobSettings) {
try { try {
List<IngestModuleError> ingestModuleErrorsList = IngestJobRunner.runIngestJob(dataSourceList, ingestJobSettings); List<IngestModuleError> errs = IngestJobRunner.runIngestJob(datasources, ingestJobSettings);
for (IngestModuleError err : ingestModuleErrorsList) { StringBuilder joinedErrors = new StringBuilder("");
System.out.println(String.format("Error: %s: %s.", err.getModuleDisplayName(), err.toString())); errs.forEach((err) -> {
} joinedErrors.append(String.format("Error: %s: %s.", err.getModuleDisplayName(), err.toString())).append(System.lineSeparator());
String errorMessage = String.format("The ingest job runner produced %d error messages.", ingestModuleErrorsList.size()); });
assertEquals(errorMessage, 0, ingestModuleErrorsList.size()); assertEquals(joinedErrors.toString(), 0, errs.size());
} catch (InterruptedException ex) { } catch (InterruptedException ex) {
Exceptions.printStackTrace(ex); Exceptions.printStackTrace(ex);
Assert.fail(ex); Assert.fail(ex);
@ -84,17 +95,18 @@ public final class IngestUtils {
} }
/** /**
* Build a new ingest module template based on the given factory. * Get the ingest module template for the the specified factories default
* ingest job settings.
* *
* @param factory The ingest module factory. * @param factory the factory to get the ingest job settings from
* *
* @return The ingest module template. * @return template - the IngestModuleTemplate created with the factory and
* it's default settings.
*/ */
public static IngestModuleTemplate getIngestModuleTemplate(IngestModuleFactoryAdapter factory) { public static IngestModuleTemplate getIngestModuleTemplate(IngestModuleFactory factory) {
IngestModuleIngestJobSettings settings = factory.getDefaultIngestJobSettings(); IngestModuleIngestJobSettings settings = factory.getDefaultIngestJobSettings();
IngestModuleTemplate template = new IngestModuleTemplate(factory, settings); IngestModuleTemplate template = new IngestModuleTemplate(factory, settings);
template.setEnabled(true); template.setEnabled(true);
return template; return template;
} }
} }

View File

@ -218,6 +218,7 @@ KeywordSearchJobSettingsPanel.languagesLabel.text=Scripts enabled for string ext
KeywordSearchGlobalLanguageSettingsPanel.enableUTF8Checkbox.text=Enable UTF8 text extraction KeywordSearchGlobalLanguageSettingsPanel.enableUTF8Checkbox.text=Enable UTF8 text extraction
KeywordSearchGlobalLanguageSettingsPanel.ingestSettingsLabel.text=Ingest settings for string extraction from unknown file types (changes effective on next ingest): KeywordSearchGlobalLanguageSettingsPanel.ingestSettingsLabel.text=Ingest settings for string extraction from unknown file types (changes effective on next ingest):
KeywordSearchGlobalLanguageSettingsPanel.enableUTF16Checkbox.text=Enable UTF16LE and UTF16BE string extraction KeywordSearchGlobalLanguageSettingsPanel.enableUTF16Checkbox.text=Enable UTF16LE and UTF16BE string extraction
KeywordSearchGlobalLanguageSettingsPanel.enableOcrCheckbox.text=Enable Optical Character Recognition (OCR)
KeywordSearchGlobalLanguageSettingsPanel.languagesLabel.text=Enabled scripts (languages): KeywordSearchGlobalLanguageSettingsPanel.languagesLabel.text=Enabled scripts (languages):
KeywordSearchGlobalSearchSettingsPanel.timeRadioButton1.toolTipText=20 mins. (fastest ingest time) KeywordSearchGlobalSearchSettingsPanel.timeRadioButton1.toolTipText=20 mins. (fastest ingest time)
KeywordSearchGlobalSearchSettingsPanel.timeRadioButton1.text=20 minutes (slowest feedback, fastest ingest) KeywordSearchGlobalSearchSettingsPanel.timeRadioButton1.text=20 minutes (slowest feedback, fastest ingest)
@ -309,3 +310,4 @@ ExtractedContentPanel.pageOfLabel.text=of
ExtractedContentPanel.pageTotalLabel.text=- ExtractedContentPanel.pageTotalLabel.text=-
ExtractedContentPanel.pageButtonsLabel.text=Page ExtractedContentPanel.pageButtonsLabel.text=Page
ExtractedContentPanel.pagesLabel.text=Page: ExtractedContentPanel.pagesLabel.text=Page:

View File

@ -16,21 +16,26 @@
<Layout> <Layout>
<DimensionLayout dim="0"> <DimensionLayout dim="0">
<Group type="103" groupAlignment="0" attributes="0"> <Group type="103" groupAlignment="0" attributes="0">
<Group type="102" alignment="0" attributes="0"> <Group type="102" attributes="0">
<EmptySpace max="-2" attributes="0"/>
<Group type="103" groupAlignment="0" attributes="0"> <Group type="103" groupAlignment="0" attributes="0">
<Component id="ingestSettingsLabel" alignment="0" min="-2" max="-2" attributes="0"/> <Group type="102" alignment="0" attributes="0">
<Group type="102" attributes="0"> <EmptySpace max="-2" attributes="0"/>
<EmptySpace min="10" pref="10" max="-2" attributes="0"/> <Group type="103" groupAlignment="0" attributes="0">
<Component id="ingestSettingsLabel" alignment="0" min="-2" max="-2" attributes="0"/>
<Group type="103" alignment="0" groupAlignment="1" attributes="0">
<Component id="languagesLabel" alignment="0" min="-2" max="-2" attributes="0"/>
<Component id="langPanel" min="-2" max="-2" attributes="0"/>
</Group>
</Group>
</Group>
<Group type="102" alignment="0" attributes="0">
<EmptySpace min="-2" pref="26" max="-2" attributes="0"/>
<Group type="103" groupAlignment="0" max="-2" attributes="0"> <Group type="103" groupAlignment="0" max="-2" attributes="0">
<Component id="enableUTF16Checkbox" min="-2" max="-2" attributes="0"/> <Component id="enableUTF16Checkbox" min="-2" max="-2" attributes="0"/>
<Component id="enableUTF8Checkbox" alignment="0" min="-2" max="-2" attributes="0"/> <Component id="enableUTF8Checkbox" alignment="0" min="-2" max="-2" attributes="0"/>
<Component id="enableOcrCheckbox" alignment="0" min="-2" max="-2" attributes="0"/>
</Group> </Group>
</Group> </Group>
<Group type="103" alignment="0" groupAlignment="1" attributes="0">
<Component id="languagesLabel" alignment="0" min="-2" max="-2" attributes="0"/>
<Component id="langPanel" min="-2" max="-2" attributes="0"/>
</Group>
</Group> </Group>
<EmptySpace pref="255" max="32767" attributes="0"/> <EmptySpace pref="255" max="32767" attributes="0"/>
</Group> </Group>
@ -42,13 +47,15 @@
<EmptySpace max="-2" attributes="0"/> <EmptySpace max="-2" attributes="0"/>
<Component id="ingestSettingsLabel" min="-2" max="-2" attributes="0"/> <Component id="ingestSettingsLabel" min="-2" max="-2" attributes="0"/>
<EmptySpace type="unrelated" max="-2" attributes="0"/> <EmptySpace type="unrelated" max="-2" attributes="0"/>
<Component id="enableOcrCheckbox" min="-2" max="-2" attributes="0"/>
<EmptySpace max="32767" attributes="0"/>
<Component id="enableUTF16Checkbox" min="-2" max="-2" attributes="0"/> <Component id="enableUTF16Checkbox" min="-2" max="-2" attributes="0"/>
<EmptySpace max="-2" attributes="0"/> <EmptySpace max="-2" attributes="0"/>
<Component id="enableUTF8Checkbox" min="-2" max="-2" attributes="0"/> <Component id="enableUTF8Checkbox" min="-2" max="-2" attributes="0"/>
<EmptySpace type="separate" max="-2" attributes="0"/>
<Component id="languagesLabel" min="-2" max="-2" attributes="0"/>
<EmptySpace type="unrelated" max="-2" attributes="0"/> <EmptySpace type="unrelated" max="-2" attributes="0"/>
<Component id="langPanel" pref="397" max="32767" attributes="0"/> <Component id="languagesLabel" min="-2" max="-2" attributes="0"/>
<EmptySpace max="-2" attributes="0"/>
<Component id="langPanel" min="-2" pref="380" max="-2" attributes="0"/>
<EmptySpace max="-2" attributes="0"/> <EmptySpace max="-2" attributes="0"/>
</Group> </Group>
</Group> </Group>
@ -86,7 +93,7 @@
</DimensionLayout> </DimensionLayout>
<DimensionLayout dim="1"> <DimensionLayout dim="1">
<Group type="103" groupAlignment="0" attributes="0"> <Group type="103" groupAlignment="0" attributes="0">
<EmptySpace min="0" pref="395" max="32767" attributes="0"/> <EmptySpace min="0" pref="378" max="32767" attributes="0"/>
</Group> </Group>
</DimensionLayout> </DimensionLayout>
</Layout> </Layout>
@ -120,5 +127,15 @@
</Property> </Property>
</Properties> </Properties>
</Component> </Component>
<Component class="javax.swing.JCheckBox" name="enableOcrCheckbox">
<Properties>
<Property name="text" type="java.lang.String" editor="org.netbeans.modules.i18n.form.FormI18nStringEditor">
<ResourceString bundle="org/sleuthkit/autopsy/keywordsearch/Bundle.properties" key="KeywordSearchGlobalLanguageSettingsPanel.enableOcrCheckbox.text" replaceFormat="org.openide.util.NbBundle.getMessage({sourceFileName}.class, &quot;{key}&quot;)"/>
</Property>
</Properties>
<Events>
<EventHandler event="actionPerformed" listener="java.awt.event.ActionListener" parameters="java.awt.event.ActionEvent" handler="enableOcrCheckboxActionPerformed"/>
</Events>
</Component>
</SubComponents> </SubComponents>
</Form> </Form>

View File

@ -111,6 +111,9 @@ class KeywordSearchGlobalLanguageSettingsPanel extends javax.swing.JPanel implem
= Boolean.parseBoolean(KeywordSearchSettings.getStringExtractOption(StringsTextExtractor.ExtractOptions.EXTRACT_UTF8.toString())); = Boolean.parseBoolean(KeywordSearchSettings.getStringExtractOption(StringsTextExtractor.ExtractOptions.EXTRACT_UTF8.toString()));
enableUTF8Checkbox.setSelected(utf8); enableUTF8Checkbox.setSelected(utf8);
boolean ocr = KeywordSearchSettings.getOcrOption();
enableOcrCheckbox.setSelected(ocr);
final List<SCRIPT> serviceScripts = KeywordSearchSettings.getStringExtractScripts(); final List<SCRIPT> serviceScripts = KeywordSearchSettings.getStringExtractScripts();
final int components = checkPanel.getComponentCount(); final int components = checkPanel.getComponentCount();
@ -141,6 +144,7 @@ class KeywordSearchGlobalLanguageSettingsPanel extends javax.swing.JPanel implem
activateScriptsCheckboxes(extractEnabled && ingestNotRunning); activateScriptsCheckboxes(extractEnabled && ingestNotRunning);
enableUTF16Checkbox.setEnabled(ingestNotRunning); enableUTF16Checkbox.setEnabled(ingestNotRunning);
enableUTF8Checkbox.setEnabled(ingestNotRunning); enableUTF8Checkbox.setEnabled(ingestNotRunning);
enableOcrCheckbox.setEnabled(ingestNotRunning);
} }
/** /**
@ -158,6 +162,7 @@ class KeywordSearchGlobalLanguageSettingsPanel extends javax.swing.JPanel implem
enableUTF8Checkbox = new javax.swing.JCheckBox(); enableUTF8Checkbox = new javax.swing.JCheckBox();
enableUTF16Checkbox = new javax.swing.JCheckBox(); enableUTF16Checkbox = new javax.swing.JCheckBox();
ingestSettingsLabel = new javax.swing.JLabel(); ingestSettingsLabel = new javax.swing.JLabel();
enableOcrCheckbox = new javax.swing.JCheckBox();
org.openide.awt.Mnemonics.setLocalizedText(languagesLabel, org.openide.util.NbBundle.getMessage(KeywordSearchGlobalLanguageSettingsPanel.class, "KeywordSearchGlobalLanguageSettingsPanel.languagesLabel.text")); // NOI18N org.openide.awt.Mnemonics.setLocalizedText(languagesLabel, org.openide.util.NbBundle.getMessage(KeywordSearchGlobalLanguageSettingsPanel.class, "KeywordSearchGlobalLanguageSettingsPanel.languagesLabel.text")); // NOI18N
@ -173,7 +178,7 @@ class KeywordSearchGlobalLanguageSettingsPanel extends javax.swing.JPanel implem
); );
checkPanelLayout.setVerticalGroup( checkPanelLayout.setVerticalGroup(
checkPanelLayout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING) checkPanelLayout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
.addGap(0, 395, Short.MAX_VALUE) .addGap(0, 378, Short.MAX_VALUE)
); );
langPanel.setViewportView(checkPanel); langPanel.setViewportView(checkPanel);
@ -194,22 +199,32 @@ class KeywordSearchGlobalLanguageSettingsPanel extends javax.swing.JPanel implem
org.openide.awt.Mnemonics.setLocalizedText(ingestSettingsLabel, org.openide.util.NbBundle.getMessage(KeywordSearchGlobalLanguageSettingsPanel.class, "KeywordSearchGlobalLanguageSettingsPanel.ingestSettingsLabel.text")); // NOI18N org.openide.awt.Mnemonics.setLocalizedText(ingestSettingsLabel, org.openide.util.NbBundle.getMessage(KeywordSearchGlobalLanguageSettingsPanel.class, "KeywordSearchGlobalLanguageSettingsPanel.ingestSettingsLabel.text")); // NOI18N
org.openide.awt.Mnemonics.setLocalizedText(enableOcrCheckbox, org.openide.util.NbBundle.getMessage(KeywordSearchGlobalLanguageSettingsPanel.class, "KeywordSearchGlobalLanguageSettingsPanel.enableOcrCheckbox.text")); // NOI18N
enableOcrCheckbox.addActionListener(new java.awt.event.ActionListener() {
public void actionPerformed(java.awt.event.ActionEvent evt) {
enableOcrCheckboxActionPerformed(evt);
}
});
javax.swing.GroupLayout layout = new javax.swing.GroupLayout(this); javax.swing.GroupLayout layout = new javax.swing.GroupLayout(this);
this.setLayout(layout); this.setLayout(layout);
layout.setHorizontalGroup( layout.setHorizontalGroup(
layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING) layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
.addGroup(layout.createSequentialGroup() .addGroup(layout.createSequentialGroup()
.addContainerGap()
.addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING) .addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
.addComponent(ingestSettingsLabel)
.addGroup(layout.createSequentialGroup() .addGroup(layout.createSequentialGroup()
.addGap(10, 10, 10) .addContainerGap()
.addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
.addComponent(ingestSettingsLabel)
.addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.TRAILING)
.addComponent(languagesLabel, javax.swing.GroupLayout.Alignment.LEADING)
.addComponent(langPanel, javax.swing.GroupLayout.PREFERRED_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.PREFERRED_SIZE))))
.addGroup(layout.createSequentialGroup()
.addGap(26, 26, 26)
.addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING, false) .addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING, false)
.addComponent(enableUTF16Checkbox) .addComponent(enableUTF16Checkbox)
.addComponent(enableUTF8Checkbox))) .addComponent(enableUTF8Checkbox)
.addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.TRAILING) .addComponent(enableOcrCheckbox))))
.addComponent(languagesLabel, javax.swing.GroupLayout.Alignment.LEADING)
.addComponent(langPanel, javax.swing.GroupLayout.PREFERRED_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.PREFERRED_SIZE)))
.addContainerGap(255, Short.MAX_VALUE)) .addContainerGap(255, Short.MAX_VALUE))
); );
layout.setVerticalGroup( layout.setVerticalGroup(
@ -218,13 +233,15 @@ class KeywordSearchGlobalLanguageSettingsPanel extends javax.swing.JPanel implem
.addContainerGap() .addContainerGap()
.addComponent(ingestSettingsLabel) .addComponent(ingestSettingsLabel)
.addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.UNRELATED) .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.UNRELATED)
.addComponent(enableOcrCheckbox)
.addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED, javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE)
.addComponent(enableUTF16Checkbox) .addComponent(enableUTF16Checkbox)
.addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED) .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
.addComponent(enableUTF8Checkbox) .addComponent(enableUTF8Checkbox)
.addGap(18, 18, 18)
.addComponent(languagesLabel)
.addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.UNRELATED) .addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.UNRELATED)
.addComponent(langPanel, javax.swing.GroupLayout.DEFAULT_SIZE, 397, Short.MAX_VALUE) .addComponent(languagesLabel)
.addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
.addComponent(langPanel, javax.swing.GroupLayout.PREFERRED_SIZE, 380, javax.swing.GroupLayout.PREFERRED_SIZE)
.addContainerGap()) .addContainerGap())
); );
}// </editor-fold>//GEN-END:initComponents }// </editor-fold>//GEN-END:initComponents
@ -246,8 +263,13 @@ class KeywordSearchGlobalLanguageSettingsPanel extends javax.swing.JPanel implem
firePropertyChange(OptionsPanelController.PROP_CHANGED, null, null); firePropertyChange(OptionsPanelController.PROP_CHANGED, null, null);
}//GEN-LAST:event_enableUTF16CheckboxActionPerformed }//GEN-LAST:event_enableUTF16CheckboxActionPerformed
private void enableOcrCheckboxActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_enableOcrCheckboxActionPerformed
firePropertyChange(OptionsPanelController.PROP_CHANGED, null, null);
}//GEN-LAST:event_enableOcrCheckboxActionPerformed
// Variables declaration - do not modify//GEN-BEGIN:variables // Variables declaration - do not modify//GEN-BEGIN:variables
private javax.swing.JPanel checkPanel; private javax.swing.JPanel checkPanel;
private javax.swing.JCheckBox enableOcrCheckbox;
private javax.swing.JCheckBox enableUTF16Checkbox; private javax.swing.JCheckBox enableUTF16Checkbox;
private javax.swing.JCheckBox enableUTF8Checkbox; private javax.swing.JCheckBox enableUTF8Checkbox;
private javax.swing.JLabel ingestSettingsLabel; private javax.swing.JLabel ingestSettingsLabel;
@ -261,6 +283,7 @@ class KeywordSearchGlobalLanguageSettingsPanel extends javax.swing.JPanel implem
Boolean.toString(enableUTF8Checkbox.isSelected())); Boolean.toString(enableUTF8Checkbox.isSelected()));
KeywordSearchSettings.setStringExtractOption(StringsTextExtractor.ExtractOptions.EXTRACT_UTF16.toString(), KeywordSearchSettings.setStringExtractOption(StringsTextExtractor.ExtractOptions.EXTRACT_UTF16.toString(),
Boolean.toString(enableUTF16Checkbox.isSelected())); Boolean.toString(enableUTF16Checkbox.isSelected()));
KeywordSearchSettings.setOcrOption(enableOcrCheckbox.isSelected());
if (toUpdate != null) { if (toUpdate != null) {
KeywordSearchSettings.setStringExtractScripts(toUpdate); KeywordSearchSettings.setStringExtractScripts(toUpdate);

View File

@ -39,6 +39,8 @@ class KeywordSearchSettings {
static final String PROPERTIES_SCRIPTS = NbBundle.getMessage(KeywordSearchSettings.class, "KeywordSearchSettings.propertiesScripts.text", MODULE_NAME); static final String PROPERTIES_SCRIPTS = NbBundle.getMessage(KeywordSearchSettings.class, "KeywordSearchSettings.propertiesScripts.text", MODULE_NAME);
static final String SHOW_SNIPPETS = "showSnippets"; //NON-NLS static final String SHOW_SNIPPETS = "showSnippets"; //NON-NLS
static final boolean DEFAULT_SHOW_SNIPPETS = true; static final boolean DEFAULT_SHOW_SNIPPETS = true;
static final String OCR_ENABLED = "ocrEnabled"; //NON-NLS
static final boolean OCR_ENABLED_DEFAULT = false; // NON-NLS
private static boolean skipKnown = true; private static boolean skipKnown = true;
private static final Logger logger = Logger.getLogger(KeywordSearchSettings.class.getName()); private static final Logger logger = Logger.getLogger(KeywordSearchSettings.class.getName());
private static UpdateFrequency UpdateFreq = UpdateFrequency.DEFAULT; private static UpdateFrequency UpdateFreq = UpdateFrequency.DEFAULT;
@ -128,6 +130,26 @@ class KeywordSearchSettings {
ModuleSettings.setConfigSetting(PROPERTIES_OPTIONS, key, val); ModuleSettings.setConfigSetting(PROPERTIES_OPTIONS, key, val);
} }
/**
* Save OCR setting to permanent storage
* @param enabled
*/
static void setOcrOption(boolean enabled) {
ModuleSettings.setConfigSetting(PROPERTIES_OPTIONS, OCR_ENABLED, (enabled ? "true" : "false")); //NON-NLS
}
/**
* Get OCR setting from permanent storage
* @return
*/
static boolean getOcrOption() {
if (ModuleSettings.settingExists(PROPERTIES_OPTIONS, OCR_ENABLED)) {
return ModuleSettings.getConfigSetting(PROPERTIES_OPTIONS, OCR_ENABLED).equals("true"); //NON-NLS
} else {
return OCR_ENABLED_DEFAULT;
}
}
static void setShowSnippets(boolean showSnippets) { static void setShowSnippets(boolean showSnippets) {
ModuleSettings.setConfigSetting(PROPERTIES_OPTIONS, SHOW_SNIPPETS, (showSnippets ? "true" : "false")); //NON-NLS ModuleSettings.setConfigSetting(PROPERTIES_OPTIONS, SHOW_SNIPPETS, (showSnippets ? "true" : "false")); //NON-NLS
} }
@ -219,6 +241,11 @@ class KeywordSearchSettings {
logger.log(Level.INFO, "No configuration for UTF16 found, generating defaults..."); //NON-NLS logger.log(Level.INFO, "No configuration for UTF16 found, generating defaults..."); //NON-NLS
KeywordSearchSettings.setStringExtractOption(StringsTextExtractor.ExtractOptions.EXTRACT_UTF16.toString(), Boolean.TRUE.toString()); KeywordSearchSettings.setStringExtractOption(StringsTextExtractor.ExtractOptions.EXTRACT_UTF16.toString(), Boolean.TRUE.toString());
} }
//setting OCR default (disabled by default)
if (!ModuleSettings.settingExists(KeywordSearchSettings.PROPERTIES_OPTIONS, OCR_ENABLED)) {
logger.log(Level.INFO, "No configuration for OCR found, generating defaults..."); //NON-NLS
KeywordSearchSettings.setOcrOption(OCR_ENABLED_DEFAULT);
}
//setting default Latin-1 Script //setting default Latin-1 Script
if (!ModuleSettings.settingExists(KeywordSearchSettings.PROPERTIES_SCRIPTS, SCRIPT.LATIN_1.name())) { if (!ModuleSettings.settingExists(KeywordSearchSettings.PROPERTIES_SCRIPTS, SCRIPT.LATIN_1.name())) {
logger.log(Level.INFO, "No configuration for Scripts found, generating defaults..."); //NON-NLS logger.log(Level.INFO, "No configuration for Scripts found, generating defaults..."); //NON-NLS

View File

@ -19,9 +19,11 @@
package org.sleuthkit.autopsy.keywordsearch; package org.sleuthkit.autopsy.keywordsearch;
import com.google.common.io.CharSource; import com.google.common.io.CharSource;
import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.io.PushbackReader; import java.io.PushbackReader;
import java.io.Reader; import java.io.Reader;
import java.nio.file.Paths;
import java.util.List; import java.util.List;
import java.util.concurrent.ExecutorService; import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors; import java.util.concurrent.Executors;
@ -38,8 +40,12 @@ import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser; import org.apache.tika.parser.Parser;
import org.apache.tika.parser.ParsingReader; import org.apache.tika.parser.ParsingReader;
import org.apache.tika.parser.microsoft.OfficeParserConfig; import org.apache.tika.parser.microsoft.OfficeParserConfig;
import org.apache.tika.parser.ocr.TesseractOCRConfig;
import org.apache.tika.parser.pdf.PDFParserConfig;
import org.openide.util.NbBundle; import org.openide.util.NbBundle;
import org.openide.modules.InstalledFileLocator;
import org.sleuthkit.autopsy.coreutils.Logger; import org.sleuthkit.autopsy.coreutils.Logger;
import org.sleuthkit.autopsy.coreutils.PlatformUtil;
import org.sleuthkit.datamodel.Content; import org.sleuthkit.datamodel.Content;
import org.sleuthkit.datamodel.ReadContentInputStream; import org.sleuthkit.datamodel.ReadContentInputStream;
@ -54,6 +60,10 @@ class TikaTextExtractor extends ContentTextExtractor {
private final AutoDetectParser parser = new AutoDetectParser(); private final AutoDetectParser parser = new AutoDetectParser();
private static final String TESSERACT_DIR_NAME = "Tesseract-OCR"; //NON-NLS
private static final String TESSERACT_EXECUTABLE = "tesseract.exe"; //NON-NLS
private static final File TESSERACT_PATH = locateTesseractExecutable();
private static final List<String> TIKA_SUPPORTED_TYPES private static final List<String> TIKA_SUPPORTED_TYPES
= new Tika().getParser().getSupportedTypes(new ParseContext()) = new Tika().getParser().getSupportedTypes(new ParseContext())
.stream() .stream()
@ -80,6 +90,30 @@ class TikaTextExtractor extends ContentTextExtractor {
officeParserConfig.setUseSAXDocxExtractor(true); officeParserConfig.setUseSAXDocxExtractor(true);
parseContext.set(OfficeParserConfig.class, officeParserConfig); parseContext.set(OfficeParserConfig.class, officeParserConfig);
// configure OCR if it is enabled in KWS settings and installed on the machine
if (TESSERACT_PATH != null && KeywordSearchSettings.getOcrOption() && PlatformUtil.isWindowsOS() == true) {
// configure PDFParser.
PDFParserConfig pdfConfig = new PDFParserConfig();
// Extracting the inline images and letting Tesseract run on each inline image.
// https://wiki.apache.org/tika/PDFParser%20%28Apache%20PDFBox%29
// https://tika.apache.org/1.7/api/org/apache/tika/parser/pdf/PDFParserConfig.html
pdfConfig.setExtractInlineImages(true);
// Multiple pages within a PDF file might refer to the same underlying image.
pdfConfig.setExtractUniqueInlineImagesOnly(true);
parseContext.set(PDFParserConfig.class, pdfConfig);
// Configure Tesseract parser to perform OCR
TesseractOCRConfig ocrConfig = new TesseractOCRConfig();
String tesseractFolder = TESSERACT_PATH.getParent();
ocrConfig.setTesseractPath(tesseractFolder);
// Tesseract expects language data packs to be in a subdirectory of tesseractFolder, in a folder called "tessdata".
// If they are stored somewhere else, use ocrConfig.setTessdataPath(String tessdataPath) to point to them
ocrConfig.setLanguage("eng");
parseContext.set(TesseractOCRConfig.class, ocrConfig);
}
//Parse the file in a task, a convenient way to have a timeout... //Parse the file in a task, a convenient way to have a timeout...
final Future<Reader> future = tikaParseExecutor.submit(() -> new ParsingReader(parser, stream, metadata, parseContext)); final Future<Reader> future = tikaParseExecutor.submit(() -> new ParsingReader(parser, stream, metadata, parseContext));
try { try {
@ -112,6 +146,29 @@ class TikaTextExtractor extends ContentTextExtractor {
} }
} }
/**
* Finds and returns the path to the Tesseract executable, if able.
*
* @return A File reference or null.
*/
private static File locateTesseractExecutable() {
if (!PlatformUtil.isWindowsOS()) {
return null;
}
String executableToFindName = Paths.get(TESSERACT_DIR_NAME, TESSERACT_EXECUTABLE).toString();
File exeFile = InstalledFileLocator.getDefault().locate(executableToFindName, TikaTextExtractor.class.getPackage().getName(), false);
if (null == exeFile) {
return null;
}
if (!exeFile.canExecute()) {
return null;
}
return exeFile;
}
/** /**
* Gets a CharSource that wraps a formated representation of the given * Gets a CharSource that wraps a formated representation of the given
* Metadata. * Metadata.

BIN
thirdparty/Tesseract-OCR/ambiguous_words.exe vendored Executable file

Binary file not shown.

BIN
thirdparty/Tesseract-OCR/classifier_tester.exe vendored Executable file

Binary file not shown.

BIN
thirdparty/Tesseract-OCR/cntraining.exe vendored Executable file

Binary file not shown.

BIN
thirdparty/Tesseract-OCR/combine_tessdata.exe vendored Executable file

Binary file not shown.

BIN
thirdparty/Tesseract-OCR/dawg2wordlist.exe vendored Executable file

Binary file not shown.

42
thirdparty/Tesseract-OCR/doc/AUTHORS vendored Executable file
View File

@ -0,0 +1,42 @@
Ray Smith (lead developer) <theraysmith@gmail.com>
Ahmad Abdulkader
Rika Antonova
Nicholas Beato
Jeff Breidenbach
Samuel Charron
Phil Cheatle
Simon Crouch
David Eger
Sheelagh Huddleston
Dan Johnson
Rajesh Katikam
Thomas Kielbus
Dar-Shyang Lee
Zongyi (Joe) Liu
Robert Moss
Chris Newton
Michael Reimer
Marius Renn
Raquel Romano
Christy Russon
Shobhit Saxena
Mark Seaman
Faisal Shafait
Hiroshi Takenaka
Ranjith Unnikrishnan
Joern Wanke
Ping Ping Xiu
Andrew Ziem
Oscar Zuniga
Community Contributors:
Zdenko Podobný (Maintainer)
Jim Regan (Maintainer)
James R Barlow
Amit Dovev
Martin Ettl
Tom Morris
Tobias Müller
Egor Pugin
Sundar M. Vaidya
Stefan Weil

21
thirdparty/Tesseract-OCR/doc/COPYING vendored Executable file
View File

@ -0,0 +1,21 @@
This package contains the Tesseract Open Source OCR Engine.
Originally developed at Hewlett Packard Laboratories Bristol and
at Hewlett Packard Co, Greeley Colorado, all the code
in this distribution is now licensed under the Apache License:
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
Other Dependencies and Licenses:
================================
Tesseract uses Leptonica library (http://leptonica.com/) which essentially
uses a BSD 2-clause license. (http://leptonica.com/about-the-license.html)

43
thirdparty/Tesseract-OCR/doc/README vendored Executable file
View File

@ -0,0 +1,43 @@
How to run UNLV tests.
The scripts in this directory make it possible to duplicate the tests
published in the Fourth Annual Test of OCR Accuracy.
See http://www.isri.unlv.edu/downloads/AT-1995.pdf
but first you have to get the tools and data from UNLV:
Step 1: to download the images goto
http://www.isri.unlv.edu/ISRI/OCRtk
and get 3b.tgz, Bb.tgz, Mb.tgz and Nb.tgz.
Step 2: extract the files. It doesn't really matter where
in your filesystem you put them, but they must go under a common
root so you have directories 3, B, M and N in, for example,
/users/me/ISRI-OCRtk.
Step 3: Reorg the files
The lack of tif extensions on the images is inconvenient, so there
is a script to reorganize the data to match the rest of the test
scripts.
cd to /users/me/ISRI-OCRtk or wherever 3, B, M and N ended up and run
/blah/blah/tesseract-ocr/testing/reorgdata.sh 3B
This makes directories doe3.3B, bus.3B, mag.3B and news.3B.
You can now get rid of 3, B, M, and N unless you want to get some of the
other scanning resolutions out of them.
Step 4: Download the ISRI toolkit from:
http://www.isri.unlv.edu/downloads/ftk-1.0.tgz
Step 5: If they work for you, use the binaries directly from the bin
directory and put them in tesseract-ocr/testing/unlv
otherwise build the tools for yourself and put them there.
Step 6: cd back to your main tesseract-ocr dir and Build tesseract.
Step 7: run testing/runalltests.sh with the root data dir and testname:
testing/runalltests.sh /users/me/ISRI-OCRtk tess2.0
and go to the gym, have lunch etc.
Step 8: There should be a file
testing/reports/tess2.0.summary that contains the final summarized accuracy
report and comparison with the 1995 results.

BIN
thirdparty/Tesseract-OCR/doc/eurotext.tif vendored Executable file

Binary file not shown.

BIN
thirdparty/Tesseract-OCR/doc/phototest.tif vendored Executable file

Binary file not shown.

BIN
thirdparty/Tesseract-OCR/iconv.dll vendored Executable file

Binary file not shown.

BIN
thirdparty/Tesseract-OCR/icudata51.dll vendored Executable file

Binary file not shown.

BIN
thirdparty/Tesseract-OCR/icui18n51.dll vendored Executable file

Binary file not shown.

BIN
thirdparty/Tesseract-OCR/icuuc51.dll vendored Executable file

Binary file not shown.

BIN
thirdparty/Tesseract-OCR/java/ScrollView.jar vendored Executable file

Binary file not shown.

Binary file not shown.

Binary file not shown.

BIN
thirdparty/Tesseract-OCR/libbz2-1.dll vendored Executable file

Binary file not shown.

BIN
thirdparty/Tesseract-OCR/libcairo-2.dll vendored Executable file

Binary file not shown.

BIN
thirdparty/Tesseract-OCR/libexpat-1.dll vendored Executable file

Binary file not shown.

BIN
thirdparty/Tesseract-OCR/libffi-6.dll vendored Executable file

Binary file not shown.

BIN
thirdparty/Tesseract-OCR/libfontconfig-1.dll vendored Executable file

Binary file not shown.

BIN
thirdparty/Tesseract-OCR/libfreetype-6.dll vendored Executable file

Binary file not shown.

BIN
thirdparty/Tesseract-OCR/libgcc_s_sjlj-1.dll vendored Executable file

Binary file not shown.

BIN
thirdparty/Tesseract-OCR/libgif-4.dll vendored Executable file

Binary file not shown.

BIN
thirdparty/Tesseract-OCR/libglib-2.0-0.dll vendored Executable file

Binary file not shown.

BIN
thirdparty/Tesseract-OCR/libgobject-2.0-0.dll vendored Executable file

Binary file not shown.

BIN
thirdparty/Tesseract-OCR/libgomp-1.dll vendored Executable file

Binary file not shown.

BIN
thirdparty/Tesseract-OCR/libharfbuzz-0.dll vendored Executable file

Binary file not shown.

BIN
thirdparty/Tesseract-OCR/libintl-8.dll vendored Executable file

Binary file not shown.

BIN
thirdparty/Tesseract-OCR/libjbig-2.dll vendored Executable file

Binary file not shown.

BIN
thirdparty/Tesseract-OCR/libjpeg-8.dll vendored Executable file

Binary file not shown.

BIN
thirdparty/Tesseract-OCR/liblept-5.dll vendored Executable file

Binary file not shown.

BIN
thirdparty/Tesseract-OCR/liblzma-5.dll vendored Executable file

Binary file not shown.

BIN
thirdparty/Tesseract-OCR/libopenjp2.dll vendored Executable file

Binary file not shown.

BIN
thirdparty/Tesseract-OCR/libpango-1.0-0.dll vendored Executable file

Binary file not shown.

Binary file not shown.

BIN
thirdparty/Tesseract-OCR/libpangoft2-1.0-0.dll vendored Executable file

Binary file not shown.

Binary file not shown.

BIN
thirdparty/Tesseract-OCR/libpixman-1-0.dll vendored Executable file

Binary file not shown.

BIN
thirdparty/Tesseract-OCR/libpng16-16.dll vendored Executable file

Binary file not shown.

BIN
thirdparty/Tesseract-OCR/libstdc++-6.dll vendored Executable file

Binary file not shown.

BIN
thirdparty/Tesseract-OCR/libtesseract-3.dll vendored Executable file

Binary file not shown.

BIN
thirdparty/Tesseract-OCR/libtiff-5.dll vendored Executable file

Binary file not shown.

BIN
thirdparty/Tesseract-OCR/libwebp-5.dll vendored Executable file

Binary file not shown.

BIN
thirdparty/Tesseract-OCR/libwinpthread-1.dll vendored Executable file

Binary file not shown.

BIN
thirdparty/Tesseract-OCR/mftraining.exe vendored Executable file

Binary file not shown.

Binary file not shown.

BIN
thirdparty/Tesseract-OCR/shapeclustering.exe vendored Executable file

Binary file not shown.

BIN
thirdparty/Tesseract-OCR/tar.exe vendored Executable file

Binary file not shown.

View File

@ -0,0 +1,7 @@
tessedit_ambigs_training 1
load_freq_dawg 0
load_punc_dawg 0
load_system_dawg 0
load_number_dawg 0
ambigs_debug_level 3
load_fixed_length_dawgs 0

View File

@ -0,0 +1 @@
tessedit_zero_rejection T

View File

@ -0,0 +1,5 @@
load_bigram_dawg True
tessedit_enable_bigram_correction True
tessedit_bigram_debug 3
save_raw_choices True
save_alt_choices True

View File

@ -0,0 +1,14 @@
disable_character_fragments T
file_type .bl
textord_fast_pitch_test T
tessedit_single_match 0
tessedit_zero_rejection T
tessedit_minimal_rejection F
tessedit_write_rep_codes F
il1_adaption_test 1
edges_children_fix F
edges_childarea 0.65
edges_boxarea 0.9
tessedit_resegment_from_boxes T
tessedit_train_from_boxes T
textord_no_rejects T

View File

@ -0,0 +1,15 @@
file_type .bl
#tessedit_use_nn F
textord_fast_pitch_test T
tessedit_single_match 0
tessedit_zero_rejection T
tessedit_minimal_rejection F
tessedit_write_rep_codes F
il1_adaption_test 1
edges_children_fix F
edges_childarea 0.65
edges_boxarea 0.9
tessedit_resegment_from_boxes T
tessedit_train_from_boxes T
#textord_repeat_extraction F
textord_no_rejects T

View File

@ -0,0 +1 @@
tessedit_char_whitelist 0123456789-.

View File

@ -0,0 +1,3 @@
tessedit_create_hocr 1
tessedit_pageseg_mode 1
hocr_font_info 0

View File

@ -0,0 +1,2 @@
interactive_display_mode T
tessedit_display_outwords T

View File

@ -0,0 +1,4 @@
textord_skewsmooth_offset 8
textord_skewsmooth_offset2 8
textord_merge_desc 0.5
textord_no_rejects 1

View File

@ -0,0 +1,2 @@
tessedit_resegment_from_line_boxes 1
tessedit_make_boxes_from_boxes 1

View File

@ -0,0 +1 @@
debug_file tesseract.log

View File

@ -0,0 +1 @@
tessedit_create_boxfile 1

View File

@ -0,0 +1,2 @@
tessedit_create_pdf 1
tessedit_pageseg_mode 1

View File

@ -0,0 +1 @@
debug_file /dev/null

View File

@ -0,0 +1,2 @@
tessedit_resegment_from_boxes 1
tessedit_make_boxes_from_boxes 1

View File

@ -0,0 +1,12 @@
textord_show_blobs 0
textord_debug_tabfind 3
textord_tabfind_show_partitions 1
textord_tabfind_show_initial_partitions 1
textord_tabfind_show_columns 1
textord_tabfind_show_blocks 1
textord_tabfind_show_initialtabs 1
textord_tabfind_show_finaltabs 1
textord_tabfind_show_strokewidths 1
textord_tabfind_show_vlines 0
textord_tabfind_show_images 1
tessedit_dump_pageseg_images 0

View File

@ -0,0 +1,2 @@
tessedit_create_tsv 1
tessedit_pageseg_mode 1

View File

@ -0,0 +1,3 @@
# This config file should be used with other cofig files which creates renderers.
# usage example: tesseract eurotext.tif eurotext txt hocr pdf
tessedit_create_txt 1

View File

@ -0,0 +1,2 @@
tessedit_write_unlv 1
tessedit_pageseg_mode 6

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,12 @@
0oO
lI1
cC
kK
pP
sS
uU
vV
wW
xX
yY
zZ

View File

@ -0,0 +1,7 @@
LeadPunc="({[`'
TrailPunc=}:;-]!?`,.)"'
NumLeadPunc=#({[@$
NumTrailPunc=}):;].,%
Operators=*+-/.:,()[]
Digits=0123456789
Alphas=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ

BIN
thirdparty/Tesseract-OCR/tessdata/eng.cube.nn vendored Executable file

Binary file not shown.

View File

@ -0,0 +1,14 @@
RecoWgt=1.0
SizeWgt=0.2435
OODWgt=0.0214
NumWgt=0.036
CharBigramsWgt=0.1567
MaxSegPerChar=8
BeamWidth=10
ConvGridSize=48
WordUnigramsWgt=0.01
MaxWordAspectRatio=20.0000
MinSpaceHeightRatio=0.5000
MaxSpaceHeightRatio=0.6000
HistWindWid=2
MinConCompSize=0

194633
thirdparty/Tesseract-OCR/tessdata/eng.cube.size vendored Executable file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,2 @@
1-\d\d\d-GOOG-411
www.\n\\\*.com

View File

@ -0,0 +1,5 @@
the
quick
brown
fox
jumped

Binary file not shown.

BIN
thirdparty/Tesseract-OCR/tessdata/pdf.ttf vendored Executable file

Binary file not shown.

View File

@ -0,0 +1,2 @@
# No content needed as all defaults are correct.

View File

@ -0,0 +1,2 @@
chop_enable 0
wordrec_enable_assoc 0

View File

@ -0,0 +1,7 @@
#################################################
# Adaptive Matcher Using PreAdapted Templates
#################################################
classify_enable_adaptive_debugger 1
matcher_debug_flags 6
matcher_debug_level 1

View File

@ -0,0 +1,13 @@
#################################################
# Adaptive Matcher Using PreAdapted Templates
#################################################
classify_enable_adaptive_debugger 1
matcher_debug_flags 6
matcher_debug_level 1
wordrec_display_splits 0
wordrec_display_all_words 1
wordrec_display_all_blobs 1
wordrec_display_segmentations 2
classify_debug_level 1

View File

@ -0,0 +1 @@

Some files were not shown because too many files have changed in this diff Show More