mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-15 09:17:42 +00:00
Merge branch 'develop' of https://github.com/sleuthkit/autopsy into 3788-intercase-correlation
This commit is contained in:
commit
83d91d3e33
1
.gitignore
vendored
1
.gitignore
vendored
@ -12,6 +12,7 @@
|
|||||||
/Core/build/
|
/Core/build/
|
||||||
/Core/dist/
|
/Core/dist/
|
||||||
/Core/nbproject/*
|
/Core/nbproject/*
|
||||||
|
/Core/test/qa-functional/data/*
|
||||||
!/Core/nbproject/project.xml
|
!/Core/nbproject/project.xml
|
||||||
!/Core/nbproject/project.properties
|
!/Core/nbproject/project.properties
|
||||||
|
|
||||||
|
@ -40,6 +40,11 @@
|
|||||||
<fileset dir="${thirdparty.dir}/Volatility"/>
|
<fileset dir="${thirdparty.dir}/Volatility"/>
|
||||||
</copy>
|
</copy>
|
||||||
|
|
||||||
|
<!--Copy Tesseract OCR to release-->
|
||||||
|
<copy todir="${basedir}/release/Tesseract-OCR" >
|
||||||
|
<fileset dir="${thirdparty.dir}/Tesseract-OCR"/>
|
||||||
|
</copy>
|
||||||
|
|
||||||
<!--Copy other jars-->
|
<!--Copy other jars-->
|
||||||
<copy file="${thirdparty.dir}/rejistry/Rejistry-1.0-SNAPSHOT.jar" todir="${ext.dir}" />
|
<copy file="${thirdparty.dir}/rejistry/Rejistry-1.0-SNAPSHOT.jar" todir="${ext.dir}" />
|
||||||
<copy file="${thirdparty.dir}/sevenzip/sevenzipjbinding.jar" todir="${ext.dir}" />
|
<copy file="${thirdparty.dir}/sevenzip/sevenzipjbinding.jar" todir="${ext.dir}" />
|
||||||
@ -89,6 +94,7 @@
|
|||||||
<get src="https://drive.google.com/uc?id=0BxdBkzm5VKGNT0dGY0dqcHVsU3M" dest="${test-input}/filter_test1.img" skipexisting="true"/>
|
<get src="https://drive.google.com/uc?id=0BxdBkzm5VKGNT0dGY0dqcHVsU3M" dest="${test-input}/filter_test1.img" skipexisting="true"/>
|
||||||
<get src="https://drive.google.com/uc?id=1bghoSm7z7nhmGIxlllyY1MMlbLntxm7n" dest="${test-input}/local_files_test.zip" skipexisting="true"/>
|
<get src="https://drive.google.com/uc?id=1bghoSm7z7nhmGIxlllyY1MMlbLntxm7n" dest="${test-input}/local_files_test.zip" skipexisting="true"/>
|
||||||
<get src="https://drive.google.com/uc?id=1BrSiUQ1fzxFS9vIaK4mYKX6qIVp9kRWT" dest="${test-input}/password_detection_test.img" skipexisting="true"/>
|
<get src="https://drive.google.com/uc?id=1BrSiUQ1fzxFS9vIaK4mYKX6qIVp9kRWT" dest="${test-input}/password_detection_test.img" skipexisting="true"/>
|
||||||
|
<get src="https://drive.google.com/uc?id=1HD8s4rculgHV1qZT5g80Kg7j4m1qccrN" dest="${test-input}/veracrypt_detection_test.vhd" skipexisting="true"/>
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
<target name="get-deps" depends="init-ivy,getTSKJars,get-thirdparty-dependencies,get-InternalPythonModules, download-binlist,getTestDataFiles">
|
<target name="get-deps" depends="init-ivy,getTSKJars,get-thirdparty-dependencies,get-InternalPythonModules, download-binlist,getTestDataFiles">
|
||||||
|
@ -218,7 +218,7 @@ class SevenZipExtractor {
|
|||||||
*/
|
*/
|
||||||
private ArchiveFormat get7ZipOptions(AbstractFile archiveFile) {
|
private ArchiveFormat get7ZipOptions(AbstractFile archiveFile) {
|
||||||
// try to get the file type from the BB
|
// try to get the file type from the BB
|
||||||
String detectedFormat = null;
|
String detectedFormat;
|
||||||
detectedFormat = archiveFile.getMIMEType();
|
detectedFormat = archiveFile.getMIMEType();
|
||||||
|
|
||||||
if (detectedFormat == null) {
|
if (detectedFormat == null) {
|
||||||
@ -434,11 +434,11 @@ class SevenZipExtractor {
|
|||||||
result = item.extractSlow(unpackStream, password);
|
result = item.extractSlow(unpackStream, password);
|
||||||
}
|
}
|
||||||
if (result != ExtractOperationResult.OK) {
|
if (result != ExtractOperationResult.OK) {
|
||||||
logger.log(Level.WARNING, "Extraction of : " + localAbsPath + " encountered error " + result); //NON-NLS
|
logger.log(Level.WARNING, "Extraction of : {0} encountered error {1}", new Object[]{localAbsPath, result}); //NON-NLS
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
} catch (Exception e) {
|
} catch (SevenZipException e) {
|
||||||
//could be something unexpected with this file, move on
|
//could be something unexpected with this file, move on
|
||||||
logger.log(Level.WARNING, "Could not extract file from archive: " + localAbsPath, e); //NON-NLS
|
logger.log(Level.WARNING, "Could not extract file from archive: " + localAbsPath, e); //NON-NLS
|
||||||
} finally {
|
} finally {
|
||||||
@ -492,7 +492,7 @@ class SevenZipExtractor {
|
|||||||
final ProgressHandle progress = ProgressHandle.createHandle(Bundle.EmbeddedFileExtractorIngestModule_ArchiveExtractor_moduleName());
|
final ProgressHandle progress = ProgressHandle.createHandle(Bundle.EmbeddedFileExtractorIngestModule_ArchiveExtractor_moduleName());
|
||||||
//recursion depth check for zip bomb
|
//recursion depth check for zip bomb
|
||||||
final long archiveId = archiveFile.getId();
|
final long archiveId = archiveFile.getId();
|
||||||
SevenZipExtractor.ArchiveDepthCountTree.Archive parentAr = null;
|
SevenZipExtractor.ArchiveDepthCountTree.Archive parentAr;
|
||||||
try {
|
try {
|
||||||
blackboard = Case.getCurrentCaseThrows().getServices().getBlackboard();
|
blackboard = Case.getCurrentCaseThrows().getServices().getBlackboard();
|
||||||
} catch (NoCurrentCaseException ex) {
|
} catch (NoCurrentCaseException ex) {
|
||||||
@ -717,7 +717,7 @@ class SevenZipExtractor {
|
|||||||
String encryptionType = fullEncryption ? ENCRYPTION_FULL : ENCRYPTION_FILE_LEVEL;
|
String encryptionType = fullEncryption ? ENCRYPTION_FULL : ENCRYPTION_FILE_LEVEL;
|
||||||
try {
|
try {
|
||||||
BlackboardArtifact artifact = archiveFile.newArtifact(BlackboardArtifact.ARTIFACT_TYPE.TSK_ENCRYPTION_DETECTED);
|
BlackboardArtifact artifact = archiveFile.newArtifact(BlackboardArtifact.ARTIFACT_TYPE.TSK_ENCRYPTION_DETECTED);
|
||||||
artifact.addAttribute(new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_NAME, EmbeddedFileExtractorModuleFactory.getModuleName(), encryptionType));
|
artifact.addAttribute(new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_COMMENT, EmbeddedFileExtractorModuleFactory.getModuleName(), encryptionType));
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// index the artifact for keyword search
|
// index the artifact for keyword search
|
||||||
|
@ -62,7 +62,7 @@ public class EmbeddedFileTest extends NbTestCase {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void setUp() {
|
public void setUp() {
|
||||||
CaseUtils.createCase(CASE_DIRECTORY_PATH, CASE_NAME);
|
CaseUtils.createCase(CASE_NAME);
|
||||||
ImageDSProcessor dataSourceProcessor = new ImageDSProcessor();
|
ImageDSProcessor dataSourceProcessor = new ImageDSProcessor();
|
||||||
IngestUtils.addDataSource(dataSourceProcessor, IMAGE_PATH);
|
IngestUtils.addDataSource(dataSourceProcessor, IMAGE_PATH);
|
||||||
|
|
||||||
@ -92,7 +92,6 @@ public class EmbeddedFileTest extends NbTestCase {
|
|||||||
@Override
|
@Override
|
||||||
public void tearDown() {
|
public void tearDown() {
|
||||||
CaseUtils.closeCase();
|
CaseUtils.closeCase();
|
||||||
CaseUtils.deleteCaseDir(CASE_DIRECTORY_PATH);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testEncryption() {
|
public void testEncryption() {
|
||||||
|
@ -72,8 +72,7 @@ public class IngestFileFiltersTest extends NbTestCase {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public void testBasicDir() {
|
public void testBasicDir() {
|
||||||
Path casePath = Paths.get(System.getProperty("java.io.tmpdir"), "testBasicDir");
|
CaseUtils.createCase("testBasicDir");
|
||||||
CaseUtils.createCase(casePath, "testBasicDir");
|
|
||||||
ImageDSProcessor dataSourceProcessor = new ImageDSProcessor();
|
ImageDSProcessor dataSourceProcessor = new ImageDSProcessor();
|
||||||
IngestUtils.addDataSource(dataSourceProcessor, IMAGE_PATH);
|
IngestUtils.addDataSource(dataSourceProcessor, IMAGE_PATH);
|
||||||
|
|
||||||
@ -115,8 +114,7 @@ public class IngestFileFiltersTest extends NbTestCase {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public void testExtAndDirWithOneRule() {
|
public void testExtAndDirWithOneRule() {
|
||||||
Path casePath = Paths.get(System.getProperty("java.io.tmpdir"), "testExtAndDirWithOneRule");
|
CaseUtils.createCase("testExtAndDirWithOneRule");
|
||||||
CaseUtils.createCase(casePath, "testExtAndDirWithOneRule");
|
|
||||||
ImageDSProcessor dataSourceProcessor = new ImageDSProcessor();
|
ImageDSProcessor dataSourceProcessor = new ImageDSProcessor();
|
||||||
IngestUtils.addDataSource(dataSourceProcessor, IMAGE_PATH);
|
IngestUtils.addDataSource(dataSourceProcessor, IMAGE_PATH);
|
||||||
|
|
||||||
@ -151,8 +149,7 @@ public class IngestFileFiltersTest extends NbTestCase {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public void testExtAndDirWithTwoRules() {
|
public void testExtAndDirWithTwoRules() {
|
||||||
Path casePath = Paths.get(System.getProperty("java.io.tmpdir"), "testExtAndDirWithTwoRules");
|
CaseUtils.createCase("testExtAndDirWithTwoRules");
|
||||||
CaseUtils.createCase(casePath, "testExtAndDirWithTwoRules");
|
|
||||||
ImageDSProcessor dataSourceProcessor = new ImageDSProcessor();
|
ImageDSProcessor dataSourceProcessor = new ImageDSProcessor();
|
||||||
IngestUtils.addDataSource(dataSourceProcessor, IMAGE_PATH);
|
IngestUtils.addDataSource(dataSourceProcessor, IMAGE_PATH);
|
||||||
|
|
||||||
@ -196,8 +193,7 @@ public class IngestFileFiltersTest extends NbTestCase {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public void testFullFileNameRule() {
|
public void testFullFileNameRule() {
|
||||||
Path casePath = Paths.get(System.getProperty("java.io.tmpdir"), "testFullFileNameRule");
|
CaseUtils.createCase("testFullFileNameRule");
|
||||||
CaseUtils.createCase(casePath, "testFullFileNameRule");
|
|
||||||
ImageDSProcessor dataSourceProcessor = new ImageDSProcessor();
|
ImageDSProcessor dataSourceProcessor = new ImageDSProcessor();
|
||||||
IngestUtils.addDataSource(dataSourceProcessor, IMAGE_PATH);
|
IngestUtils.addDataSource(dataSourceProcessor, IMAGE_PATH);
|
||||||
|
|
||||||
@ -232,8 +228,7 @@ public class IngestFileFiltersTest extends NbTestCase {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public void testCarvingWithExtRuleAndUnallocSpace() {
|
public void testCarvingWithExtRuleAndUnallocSpace() {
|
||||||
Path casePath = Paths.get(System.getProperty("java.io.tmpdir"), "testCarvingWithExtRuleAndUnallocSpace");
|
CaseUtils.createCase("testCarvingWithExtRuleAndUnallocSpace");
|
||||||
CaseUtils.createCase(casePath, "testCarvingWithExtRuleAndUnallocSpace");
|
|
||||||
ImageDSProcessor dataSourceProcessor = new ImageDSProcessor();
|
ImageDSProcessor dataSourceProcessor = new ImageDSProcessor();
|
||||||
IngestUtils.addDataSource(dataSourceProcessor, IMAGE_PATH);
|
IngestUtils.addDataSource(dataSourceProcessor, IMAGE_PATH);
|
||||||
|
|
||||||
@ -281,8 +276,7 @@ public class IngestFileFiltersTest extends NbTestCase {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public void testCarvingNoUnallocatedSpace() {
|
public void testCarvingNoUnallocatedSpace() {
|
||||||
Path casePath = Paths.get(System.getProperty("java.io.tmpdir"), "testCarvingNoUnallocatedSpace");
|
CaseUtils.createCase("testCarvingNoUnallocatedSpace");
|
||||||
CaseUtils.createCase(casePath, "testCarvingNoUnallocatedSpace");
|
|
||||||
ImageDSProcessor dataSourceProcessor = new ImageDSProcessor();
|
ImageDSProcessor dataSourceProcessor = new ImageDSProcessor();
|
||||||
IngestUtils.addDataSource(dataSourceProcessor, IMAGE_PATH);
|
IngestUtils.addDataSource(dataSourceProcessor, IMAGE_PATH);
|
||||||
|
|
||||||
@ -315,8 +309,7 @@ public class IngestFileFiltersTest extends NbTestCase {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public void testEmbeddedModule() {
|
public void testEmbeddedModule() {
|
||||||
Path casePath = Paths.get(System.getProperty("java.io.tmpdir"), "testEmbeddedModule");
|
CaseUtils.createCase("testEmbeddedModule");
|
||||||
CaseUtils.createCase(casePath, "testEmbeddedModule");
|
|
||||||
LocalFilesDSProcessor dataSourceProcessor = new LocalFilesDSProcessor();
|
LocalFilesDSProcessor dataSourceProcessor = new LocalFilesDSProcessor();
|
||||||
IngestUtils.addDataSource(dataSourceProcessor, ZIPFILE_PATH);
|
IngestUtils.addDataSource(dataSourceProcessor, ZIPFILE_PATH);
|
||||||
|
|
||||||
|
@ -44,6 +44,7 @@ import org.sleuthkit.datamodel.BlackboardAttribute;
|
|||||||
import org.sleuthkit.datamodel.Image;
|
import org.sleuthkit.datamodel.Image;
|
||||||
import org.sleuthkit.datamodel.TskCoreException;
|
import org.sleuthkit.datamodel.TskCoreException;
|
||||||
import org.sleuthkit.datamodel.TskData;
|
import org.sleuthkit.datamodel.TskData;
|
||||||
|
import org.sleuthkit.datamodel.Content;
|
||||||
import org.sleuthkit.datamodel.Volume;
|
import org.sleuthkit.datamodel.Volume;
|
||||||
import org.sleuthkit.datamodel.VolumeSystem;
|
import org.sleuthkit.datamodel.VolumeSystem;
|
||||||
|
|
||||||
@ -58,6 +59,12 @@ public class EncryptionDetectionTest extends NbTestCase {
|
|||||||
private final Path BITLOCKER_IMAGE_PATH = Paths.get(this.getDataDir().toString(), "encryption_detection_bitlocker_test.vhd");
|
private final Path BITLOCKER_IMAGE_PATH = Paths.get(this.getDataDir().toString(), "encryption_detection_bitlocker_test.vhd");
|
||||||
private final Path PASSWORD_IMAGE_PATH = Paths.get(this.getDataDir().toString(), "password_detection_test.img");
|
private final Path PASSWORD_IMAGE_PATH = Paths.get(this.getDataDir().toString(), "password_detection_test.img");
|
||||||
|
|
||||||
|
private static final String PASSWORD_DETECTION_CASE_NAME = "PasswordDetectionTest";
|
||||||
|
private static final String VERACRYPT_DETECTION_CASE_NAME = "VeraCryptDetectionTest";
|
||||||
|
|
||||||
|
private final Path PASSWORD_DETECTION_IMAGE_PATH = Paths.get(this.getDataDir().toString(), "password_detection_test.img");
|
||||||
|
private final Path VERACRYPT_DETECTION_IMAGE_PATH = Paths.get(this.getDataDir().toString(), "veracrypt_detection_test.vhd");
|
||||||
|
|
||||||
public static Test suite() {
|
public static Test suite() {
|
||||||
NbModuleSuite.Configuration conf = NbModuleSuite.createConfiguration(EncryptionDetectionTest.class).
|
NbModuleSuite.Configuration conf = NbModuleSuite.createConfiguration(EncryptionDetectionTest.class).
|
||||||
clusters(".*").
|
clusters(".*").
|
||||||
@ -79,7 +86,7 @@ public class EncryptionDetectionTest extends NbTestCase {
|
|||||||
*/
|
*/
|
||||||
public void testBitlockerEncryption() {
|
public void testBitlockerEncryption() {
|
||||||
try {
|
try {
|
||||||
CaseUtils.createCase(BITLOCKER_CASE_DIRECTORY_PATH, BITLOCKER_CASE_NAME);
|
CaseUtils.createCase(BITLOCKER_CASE_NAME);
|
||||||
ImageDSProcessor dataSourceProcessor = new ImageDSProcessor();
|
ImageDSProcessor dataSourceProcessor = new ImageDSProcessor();
|
||||||
IngestUtils.addDataSource(dataSourceProcessor, BITLOCKER_IMAGE_PATH);
|
IngestUtils.addDataSource(dataSourceProcessor, BITLOCKER_IMAGE_PATH);
|
||||||
Case openCase = Case.getCurrentCaseThrows();
|
Case openCase = Case.getCurrentCaseThrows();
|
||||||
@ -150,21 +157,22 @@ public class EncryptionDetectionTest extends NbTestCase {
|
|||||||
*/
|
*/
|
||||||
public void testPasswordProtection() {
|
public void testPasswordProtection() {
|
||||||
try {
|
try {
|
||||||
CaseUtils.createCase(PASSWORD_CASE_DIRECTORY_PATH, PASSWORD_CASE_NAME);
|
CaseUtils.createCase(PASSWORD_DETECTION_CASE_NAME);
|
||||||
|
|
||||||
ImageDSProcessor dataSourceProcessor = new ImageDSProcessor();
|
ImageDSProcessor dataSourceProcessor = new ImageDSProcessor();
|
||||||
IngestUtils.addDataSource(dataSourceProcessor, PASSWORD_IMAGE_PATH);
|
List<String> errorMessages = IngestUtils.addDataSource(dataSourceProcessor, PASSWORD_DETECTION_IMAGE_PATH);
|
||||||
|
String joinedErrors = String.join(System.lineSeparator(), errorMessages);
|
||||||
|
assertEquals(joinedErrors, 0, errorMessages.size());
|
||||||
|
|
||||||
Case openCase = Case.getCurrentCaseThrows();
|
Case openCase = Case.getCurrentCaseThrows();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Create ingest job settings.
|
* Create ingest job settings.
|
||||||
*/
|
*/
|
||||||
IngestModuleFactory ingestModuleFactory = new EncryptionDetectionModuleFactory();
|
|
||||||
IngestModuleIngestJobSettings settings = ingestModuleFactory.getDefaultIngestJobSettings();
|
ArrayList<IngestModuleTemplate> templates = new ArrayList<>();
|
||||||
IngestModuleTemplate template = new IngestModuleTemplate(ingestModuleFactory, settings);
|
templates.add(IngestUtils.getIngestModuleTemplate(new EncryptionDetectionModuleFactory()));
|
||||||
template.setEnabled(true);
|
IngestJobSettings ingestJobSettings = new IngestJobSettings(PASSWORD_DETECTION_CASE_NAME, IngestType.FILES_ONLY, templates);
|
||||||
List<IngestModuleTemplate> templates = new ArrayList<>();
|
|
||||||
templates.add(template);
|
|
||||||
IngestJobSettings ingestJobSettings = new IngestJobSettings(EncryptionDetectionTest.class.getCanonicalName(), IngestType.FILES_ONLY, templates);
|
|
||||||
IngestUtils.runIngestJob(openCase.getDataSources(), ingestJobSettings);
|
IngestUtils.runIngestJob(openCase.getDataSources(), ingestJobSettings);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -201,8 +209,8 @@ public class EncryptionDetectionTest extends NbTestCase {
|
|||||||
*/
|
*/
|
||||||
if (file.isFile() && !file.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.SLACK)) {
|
if (file.isFile() && !file.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.SLACK)) {
|
||||||
/*
|
/*
|
||||||
* Determine which assertions to use for the file based on
|
* Determine which assertions to use for the file based
|
||||||
* its name.
|
* on its name.
|
||||||
*/
|
*/
|
||||||
boolean fileProtected = file.getName().split("\\.")[0].endsWith("-protected");
|
boolean fileProtected = file.getName().split("\\.")[0].endsWith("-protected");
|
||||||
List<BlackboardArtifact> artifactsList = file.getAllArtifacts();
|
List<BlackboardArtifact> artifactsList = file.getAllArtifacts();
|
||||||
@ -234,4 +242,67 @@ public class EncryptionDetectionTest extends NbTestCase {
|
|||||||
Assert.fail(ex);
|
Assert.fail(ex);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test the Encryption Detection module's detection of veracrypt encrypted
|
||||||
|
* container files and partitions.
|
||||||
|
*
|
||||||
|
* Test passes if the following are true.
|
||||||
|
*
|
||||||
|
* 1. A partition was detected without a file system by checking for the
|
||||||
|
* error. 2. Only 1 data source exsists in the case, to ensure a stale case
|
||||||
|
* did not get used. 3. One volume has a TSK_ENCRYPTION_SUSPECTED artifact
|
||||||
|
* associated with it. 4. A single file named veracrpytContainerFile exists.
|
||||||
|
* 5. The file named veracrpytContainerFile has a TSK_ENCRYPTION_SUSPECTED
|
||||||
|
* artifact associated with it.
|
||||||
|
*/
|
||||||
|
public void testVeraCryptSupport() {
|
||||||
|
try {
|
||||||
|
CaseUtils.createCase(VERACRYPT_DETECTION_CASE_NAME);
|
||||||
|
ImageDSProcessor dataSourceProcessor = new ImageDSProcessor();
|
||||||
|
List<String> errorMessages = IngestUtils.addDataSource(dataSourceProcessor, VERACRYPT_DETECTION_IMAGE_PATH);
|
||||||
|
String joinedErrors;
|
||||||
|
if (errorMessages.isEmpty()) {
|
||||||
|
joinedErrors = "Encrypted partition did not cause error, it was expected to";
|
||||||
|
} else {
|
||||||
|
joinedErrors = String.join(System.lineSeparator(), errorMessages);
|
||||||
|
}
|
||||||
|
//there will be 1 expected error regarding the encrypted partition not having a file system
|
||||||
|
assertEquals(joinedErrors, 1, errorMessages.size());
|
||||||
|
|
||||||
|
Case openCase = Case.getCurrentCaseThrows();
|
||||||
|
ArrayList<IngestModuleTemplate> templates = new ArrayList<>();
|
||||||
|
templates.add(IngestUtils.getIngestModuleTemplate(new EncryptionDetectionModuleFactory()));
|
||||||
|
//image includes an encrypted container file with size greater than 5 mb so default settings detect it
|
||||||
|
IngestJobSettings ingestJobSettings = new IngestJobSettings(VERACRYPT_DETECTION_CASE_NAME, IngestType.ALL_MODULES, templates);
|
||||||
|
|
||||||
|
assertEquals("Expected only one data source to exist in the Case", 1, openCase.getDataSources().size());
|
||||||
|
IngestUtils.runIngestJob(openCase.getDataSources(), ingestJobSettings);
|
||||||
|
|
||||||
|
//check that one of the partitions has an encrypted volume
|
||||||
|
int numberOfEncryptedVolumes = 0;
|
||||||
|
for (Content datasource : openCase.getDataSources()) { //data source
|
||||||
|
for (Content volumeSystem : datasource.getChildren()) { //volume system
|
||||||
|
for (Content volume : volumeSystem.getChildren()) { //volumes
|
||||||
|
numberOfEncryptedVolumes += volume.getArtifacts(BlackboardArtifact.ARTIFACT_TYPE.TSK_ENCRYPTION_SUSPECTED).size();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assertEquals("One volume should exist with an encryption suspsected artifact", 1, numberOfEncryptedVolumes);
|
||||||
|
|
||||||
|
//ensure the encrypyted container file was also detected correctly
|
||||||
|
FileManager fileManager = openCase.getServices().getFileManager();
|
||||||
|
List<AbstractFile> results = fileManager.findFiles("veracryptContainerFile");
|
||||||
|
assertEquals("Expected 1 file named veracryptContainerFile to exist in test image", 1, results.size());
|
||||||
|
int numberOfEncryptedContainers = 0;
|
||||||
|
for (AbstractFile file : results) {
|
||||||
|
numberOfEncryptedContainers += file.getArtifacts(BlackboardArtifact.ARTIFACT_TYPE.TSK_ENCRYPTION_SUSPECTED).size();
|
||||||
|
}
|
||||||
|
assertEquals("Encrypted Container file should have one encyption suspected artifact", 1, numberOfEncryptedContainers);
|
||||||
|
} catch (NoCurrentCaseException | TskCoreException ex) {
|
||||||
|
Exceptions.printStackTrace(ex);
|
||||||
|
Assert.fail(ex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -18,8 +18,10 @@
|
|||||||
*/
|
*/
|
||||||
package org.sleuthkit.autopsy.testutils;
|
package org.sleuthkit.autopsy.testutils;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
|
import java.nio.file.Paths;
|
||||||
import static junit.framework.Assert.assertFalse;
|
import static junit.framework.Assert.assertFalse;
|
||||||
import static junit.framework.Assert.assertTrue;
|
import static junit.framework.Assert.assertTrue;
|
||||||
import org.apache.commons.io.FileUtils;
|
import org.apache.commons.io.FileUtils;
|
||||||
@ -30,44 +32,43 @@ import org.sleuthkit.autopsy.casemodule.CaseActionException;
|
|||||||
import org.sleuthkit.autopsy.casemodule.CaseDetails;
|
import org.sleuthkit.autopsy.casemodule.CaseDetails;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Common case utility methods.
|
* Class with common methods for testing related to the creation and elimination
|
||||||
|
* of cases.
|
||||||
*/
|
*/
|
||||||
public final class CaseUtils {
|
public final class CaseUtils {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* CaseUtils constructor. Since this class is not meant to allow for
|
* Create a case case directory and case for the given case name.
|
||||||
* instantiation, this constructor is 'private'.
|
|
||||||
*/
|
|
||||||
private CaseUtils() {
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create a new case. If the case already exists at the specified path, the
|
|
||||||
* existing case will be removed prior to creation of the new case.
|
|
||||||
*
|
*
|
||||||
* @param caseDirectoryPath The path to the case data.
|
* @param caseName the name for the case and case directory to have
|
||||||
* @param caseDisplayName The display name for the case.
|
|
||||||
*/
|
*/
|
||||||
public static void createCase(Path caseDirectoryPath, String caseDisplayName) {
|
public static void createCase(String caseName) {
|
||||||
//Make sure the test is starting with a clean state. So delete the test directory, if it exists.
|
//Make sure the case is starting with a clean state. So delete the case directory, if it exists.
|
||||||
deleteCaseDir(caseDirectoryPath);
|
Path caseDirectoryPath = Paths.get(System.getProperty("java.io.tmpdir"), caseName);
|
||||||
assertFalse("Unable to delete existing test directory", caseDirectoryPath.toFile().exists());
|
File caseDir = new File(caseDirectoryPath.toString());
|
||||||
|
try {
|
||||||
|
deleteCaseDir(caseDir);
|
||||||
|
} catch (IOException ex) {
|
||||||
|
Exceptions.printStackTrace(ex);
|
||||||
|
Assert.fail(ex);
|
||||||
|
}
|
||||||
|
assertFalse("Unable to delete existing test directory", caseDir.exists());
|
||||||
// Create the test directory
|
// Create the test directory
|
||||||
caseDirectoryPath.toFile().mkdirs();
|
caseDir.mkdirs();
|
||||||
assertTrue("Unable to create test directory", caseDirectoryPath.toFile().exists());
|
assertTrue("Unable to create test directory", caseDir.exists());
|
||||||
|
|
||||||
try {
|
try {
|
||||||
Case.createAsCurrentCase(Case.CaseType.SINGLE_USER_CASE, caseDirectoryPath.toString(), new CaseDetails(caseDisplayName));
|
Case.createAsCurrentCase(Case.CaseType.SINGLE_USER_CASE, caseDirectoryPath.toString(), new CaseDetails(caseName));
|
||||||
} catch (CaseActionException ex) {
|
} catch (CaseActionException ex) {
|
||||||
Exceptions.printStackTrace(ex);
|
Exceptions.printStackTrace(ex);
|
||||||
Assert.fail(ex);
|
Assert.fail(ex);
|
||||||
}
|
}
|
||||||
assertTrue(caseDirectoryPath.toFile().exists());
|
|
||||||
|
assertTrue(caseDir.exists());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Close the currently opened case.
|
* Close the current case, fails test if case was unable to be closed.
|
||||||
*/
|
*/
|
||||||
public static void closeCase() {
|
public static void closeCase() {
|
||||||
try {
|
try {
|
||||||
@ -85,20 +86,25 @@ public final class CaseUtils {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Delete a case at the specified path.
|
* Delete the case directory if it exists, thows exception if unable to
|
||||||
|
* delete case dir to allow the user to determine failure with.
|
||||||
*
|
*
|
||||||
* @param caseDirectoryPath The path to the case to be removed.
|
* @param caseDirectory the case directory to delete
|
||||||
|
*
|
||||||
|
* @throws IOException thrown if there was an problem deleting the case
|
||||||
|
* directory
|
||||||
*/
|
*/
|
||||||
public static void deleteCaseDir(Path caseDirectoryPath) {
|
public static void deleteCaseDir(File caseDirectory) throws IOException {
|
||||||
if (!caseDirectoryPath.toFile().exists()) {
|
if (!caseDirectory.exists()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
try {
|
FileUtils.deleteDirectory(caseDirectory);
|
||||||
FileUtils.deleteDirectory(caseDirectoryPath.toFile());
|
|
||||||
} catch (IOException ex) {
|
|
||||||
//We just want to make sure the case directory doesn't exist when the test starts. It shouldn't cause failure if the case directory couldn't be deleted after a test finished.
|
|
||||||
System.out.println("INFO: Unable to delete case directory: " + caseDirectoryPath.toString());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Private constructor to prevent utility class instantiation.
|
||||||
|
*/
|
||||||
|
private CaseUtils() {
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -19,6 +19,7 @@
|
|||||||
package org.sleuthkit.autopsy.testutils;
|
package org.sleuthkit.autopsy.testutils;
|
||||||
|
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import static junit.framework.Assert.assertEquals;
|
import static junit.framework.Assert.assertEquals;
|
||||||
import org.openide.util.Exceptions;
|
import org.openide.util.Exceptions;
|
||||||
@ -26,13 +27,14 @@ import org.python.icu.impl.Assert;
|
|||||||
import org.sleuthkit.autopsy.datasourceprocessors.AutoIngestDataSourceProcessor;
|
import org.sleuthkit.autopsy.datasourceprocessors.AutoIngestDataSourceProcessor;
|
||||||
import org.sleuthkit.autopsy.ingest.IngestJobSettings;
|
import org.sleuthkit.autopsy.ingest.IngestJobSettings;
|
||||||
import org.sleuthkit.autopsy.ingest.IngestModuleError;
|
import org.sleuthkit.autopsy.ingest.IngestModuleError;
|
||||||
import org.sleuthkit.autopsy.ingest.IngestModuleFactoryAdapter;
|
import org.sleuthkit.autopsy.ingest.IngestModuleFactory;
|
||||||
import org.sleuthkit.autopsy.ingest.IngestModuleIngestJobSettings;
|
import org.sleuthkit.autopsy.ingest.IngestModuleIngestJobSettings;
|
||||||
import org.sleuthkit.autopsy.ingest.IngestModuleTemplate;
|
import org.sleuthkit.autopsy.ingest.IngestModuleTemplate;
|
||||||
import org.sleuthkit.datamodel.Content;
|
import org.sleuthkit.datamodel.Content;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Common image utility methods.
|
* Class with common methods for testing related to adding and ingesting
|
||||||
|
* datasources.
|
||||||
*/
|
*/
|
||||||
public final class IngestUtils {
|
public final class IngestUtils {
|
||||||
|
|
||||||
@ -44,39 +46,48 @@ public final class IngestUtils {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Add a data source for the data source processor.
|
* Add the specified datasource to the case current case and processes it.
|
||||||
|
* Causes failure if it was unable to add and process the datasource.
|
||||||
*
|
*
|
||||||
* @param dataSourceProcessor The data source processor.
|
* @param dataSourceProcessor the datasource processer to use to process the
|
||||||
* @param dataSourcePath The path to the data source to be added.
|
* datasource
|
||||||
|
* @param dataSourcePath the path to the datasource which is being
|
||||||
|
* added
|
||||||
|
*
|
||||||
|
* @return errorMessages a list of all error messages as strings which
|
||||||
|
* encountered while processing the data source
|
||||||
*/
|
*/
|
||||||
public static void addDataSource(AutoIngestDataSourceProcessor dataSourceProcessor, Path dataSourcePath) {
|
public static List<String> addDataSource(AutoIngestDataSourceProcessor dataSourceProcessor, Path dataSourcePath) {
|
||||||
|
List<String> errorMessages = new ArrayList<>();
|
||||||
try {
|
try {
|
||||||
|
if (!dataSourcePath.toFile().exists()) {
|
||||||
|
Assert.fail("Data source not found: " + dataSourcePath.toString());
|
||||||
|
}
|
||||||
DataSourceProcessorRunner.ProcessorCallback callBack = DataSourceProcessorRunner.runDataSourceProcessor(dataSourceProcessor, dataSourcePath);
|
DataSourceProcessorRunner.ProcessorCallback callBack = DataSourceProcessorRunner.runDataSourceProcessor(dataSourceProcessor, dataSourcePath);
|
||||||
/*
|
errorMessages = callBack.getErrorMessages();
|
||||||
* Ignore the callback error messages. Sometimes it's perfectly
|
|
||||||
* valid for it to not be able to detect a file system, which is one
|
|
||||||
* of the errors that can be returned.
|
|
||||||
*/
|
|
||||||
} catch (AutoIngestDataSourceProcessor.AutoIngestDataSourceProcessorException | InterruptedException ex) {
|
} catch (AutoIngestDataSourceProcessor.AutoIngestDataSourceProcessorException | InterruptedException ex) {
|
||||||
Exceptions.printStackTrace(ex);
|
Exceptions.printStackTrace(ex);
|
||||||
Assert.fail(ex);
|
Assert.fail(ex);
|
||||||
}
|
}
|
||||||
|
return errorMessages;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Run an ingest job.
|
* Run ingest on the specified datasources with the specified ingest job
|
||||||
|
* settings. Causes failure if there are any errors or other problems while
|
||||||
|
* running ingest.
|
||||||
*
|
*
|
||||||
* @param dataSourceList The list of data sources to process.
|
* @param datasources - the datasources to run ingest on
|
||||||
* @param ingestJobSettings The ingest job settings to use for ingest.
|
* @param ingestJobSettings - the ingest job settings to use for ingest
|
||||||
*/
|
*/
|
||||||
public static void runIngestJob(List<Content> dataSourceList, IngestJobSettings ingestJobSettings) {
|
public static void runIngestJob(List<Content> datasources, IngestJobSettings ingestJobSettings) {
|
||||||
try {
|
try {
|
||||||
List<IngestModuleError> ingestModuleErrorsList = IngestJobRunner.runIngestJob(dataSourceList, ingestJobSettings);
|
List<IngestModuleError> errs = IngestJobRunner.runIngestJob(datasources, ingestJobSettings);
|
||||||
for (IngestModuleError err : ingestModuleErrorsList) {
|
StringBuilder joinedErrors = new StringBuilder("");
|
||||||
System.out.println(String.format("Error: %s: %s.", err.getModuleDisplayName(), err.toString()));
|
errs.forEach((err) -> {
|
||||||
}
|
joinedErrors.append(String.format("Error: %s: %s.", err.getModuleDisplayName(), err.toString())).append(System.lineSeparator());
|
||||||
String errorMessage = String.format("The ingest job runner produced %d error messages.", ingestModuleErrorsList.size());
|
});
|
||||||
assertEquals(errorMessage, 0, ingestModuleErrorsList.size());
|
assertEquals(joinedErrors.toString(), 0, errs.size());
|
||||||
} catch (InterruptedException ex) {
|
} catch (InterruptedException ex) {
|
||||||
Exceptions.printStackTrace(ex);
|
Exceptions.printStackTrace(ex);
|
||||||
Assert.fail(ex);
|
Assert.fail(ex);
|
||||||
@ -84,17 +95,18 @@ public final class IngestUtils {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Build a new ingest module template based on the given factory.
|
* Get the ingest module template for the the specified factories default
|
||||||
|
* ingest job settings.
|
||||||
*
|
*
|
||||||
* @param factory The ingest module factory.
|
* @param factory the factory to get the ingest job settings from
|
||||||
*
|
*
|
||||||
* @return The ingest module template.
|
* @return template - the IngestModuleTemplate created with the factory and
|
||||||
|
* it's default settings.
|
||||||
*/
|
*/
|
||||||
public static IngestModuleTemplate getIngestModuleTemplate(IngestModuleFactoryAdapter factory) {
|
public static IngestModuleTemplate getIngestModuleTemplate(IngestModuleFactory factory) {
|
||||||
IngestModuleIngestJobSettings settings = factory.getDefaultIngestJobSettings();
|
IngestModuleIngestJobSettings settings = factory.getDefaultIngestJobSettings();
|
||||||
IngestModuleTemplate template = new IngestModuleTemplate(factory, settings);
|
IngestModuleTemplate template = new IngestModuleTemplate(factory, settings);
|
||||||
template.setEnabled(true);
|
template.setEnabled(true);
|
||||||
return template;
|
return template;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -218,6 +218,7 @@ KeywordSearchJobSettingsPanel.languagesLabel.text=Scripts enabled for string ext
|
|||||||
KeywordSearchGlobalLanguageSettingsPanel.enableUTF8Checkbox.text=Enable UTF8 text extraction
|
KeywordSearchGlobalLanguageSettingsPanel.enableUTF8Checkbox.text=Enable UTF8 text extraction
|
||||||
KeywordSearchGlobalLanguageSettingsPanel.ingestSettingsLabel.text=Ingest settings for string extraction from unknown file types (changes effective on next ingest):
|
KeywordSearchGlobalLanguageSettingsPanel.ingestSettingsLabel.text=Ingest settings for string extraction from unknown file types (changes effective on next ingest):
|
||||||
KeywordSearchGlobalLanguageSettingsPanel.enableUTF16Checkbox.text=Enable UTF16LE and UTF16BE string extraction
|
KeywordSearchGlobalLanguageSettingsPanel.enableUTF16Checkbox.text=Enable UTF16LE and UTF16BE string extraction
|
||||||
|
KeywordSearchGlobalLanguageSettingsPanel.enableOcrCheckbox.text=Enable Optical Character Recognition (OCR)
|
||||||
KeywordSearchGlobalLanguageSettingsPanel.languagesLabel.text=Enabled scripts (languages):
|
KeywordSearchGlobalLanguageSettingsPanel.languagesLabel.text=Enabled scripts (languages):
|
||||||
KeywordSearchGlobalSearchSettingsPanel.timeRadioButton1.toolTipText=20 mins. (fastest ingest time)
|
KeywordSearchGlobalSearchSettingsPanel.timeRadioButton1.toolTipText=20 mins. (fastest ingest time)
|
||||||
KeywordSearchGlobalSearchSettingsPanel.timeRadioButton1.text=20 minutes (slowest feedback, fastest ingest)
|
KeywordSearchGlobalSearchSettingsPanel.timeRadioButton1.text=20 minutes (slowest feedback, fastest ingest)
|
||||||
@ -309,3 +310,4 @@ ExtractedContentPanel.pageOfLabel.text=of
|
|||||||
ExtractedContentPanel.pageTotalLabel.text=-
|
ExtractedContentPanel.pageTotalLabel.text=-
|
||||||
ExtractedContentPanel.pageButtonsLabel.text=Page
|
ExtractedContentPanel.pageButtonsLabel.text=Page
|
||||||
ExtractedContentPanel.pagesLabel.text=Page:
|
ExtractedContentPanel.pagesLabel.text=Page:
|
||||||
|
|
||||||
|
@ -15,23 +15,28 @@
|
|||||||
|
|
||||||
<Layout>
|
<Layout>
|
||||||
<DimensionLayout dim="0">
|
<DimensionLayout dim="0">
|
||||||
|
<Group type="103" groupAlignment="0" attributes="0">
|
||||||
|
<Group type="102" attributes="0">
|
||||||
<Group type="103" groupAlignment="0" attributes="0">
|
<Group type="103" groupAlignment="0" attributes="0">
|
||||||
<Group type="102" alignment="0" attributes="0">
|
<Group type="102" alignment="0" attributes="0">
|
||||||
<EmptySpace max="-2" attributes="0"/>
|
<EmptySpace max="-2" attributes="0"/>
|
||||||
<Group type="103" groupAlignment="0" attributes="0">
|
<Group type="103" groupAlignment="0" attributes="0">
|
||||||
<Component id="ingestSettingsLabel" alignment="0" min="-2" max="-2" attributes="0"/>
|
<Component id="ingestSettingsLabel" alignment="0" min="-2" max="-2" attributes="0"/>
|
||||||
<Group type="102" attributes="0">
|
|
||||||
<EmptySpace min="10" pref="10" max="-2" attributes="0"/>
|
|
||||||
<Group type="103" groupAlignment="0" max="-2" attributes="0">
|
|
||||||
<Component id="enableUTF16Checkbox" min="-2" max="-2" attributes="0"/>
|
|
||||||
<Component id="enableUTF8Checkbox" alignment="0" min="-2" max="-2" attributes="0"/>
|
|
||||||
</Group>
|
|
||||||
</Group>
|
|
||||||
<Group type="103" alignment="0" groupAlignment="1" attributes="0">
|
<Group type="103" alignment="0" groupAlignment="1" attributes="0">
|
||||||
<Component id="languagesLabel" alignment="0" min="-2" max="-2" attributes="0"/>
|
<Component id="languagesLabel" alignment="0" min="-2" max="-2" attributes="0"/>
|
||||||
<Component id="langPanel" min="-2" max="-2" attributes="0"/>
|
<Component id="langPanel" min="-2" max="-2" attributes="0"/>
|
||||||
</Group>
|
</Group>
|
||||||
</Group>
|
</Group>
|
||||||
|
</Group>
|
||||||
|
<Group type="102" alignment="0" attributes="0">
|
||||||
|
<EmptySpace min="-2" pref="26" max="-2" attributes="0"/>
|
||||||
|
<Group type="103" groupAlignment="0" max="-2" attributes="0">
|
||||||
|
<Component id="enableUTF16Checkbox" min="-2" max="-2" attributes="0"/>
|
||||||
|
<Component id="enableUTF8Checkbox" alignment="0" min="-2" max="-2" attributes="0"/>
|
||||||
|
<Component id="enableOcrCheckbox" alignment="0" min="-2" max="-2" attributes="0"/>
|
||||||
|
</Group>
|
||||||
|
</Group>
|
||||||
|
</Group>
|
||||||
<EmptySpace pref="255" max="32767" attributes="0"/>
|
<EmptySpace pref="255" max="32767" attributes="0"/>
|
||||||
</Group>
|
</Group>
|
||||||
</Group>
|
</Group>
|
||||||
@ -42,13 +47,15 @@
|
|||||||
<EmptySpace max="-2" attributes="0"/>
|
<EmptySpace max="-2" attributes="0"/>
|
||||||
<Component id="ingestSettingsLabel" min="-2" max="-2" attributes="0"/>
|
<Component id="ingestSettingsLabel" min="-2" max="-2" attributes="0"/>
|
||||||
<EmptySpace type="unrelated" max="-2" attributes="0"/>
|
<EmptySpace type="unrelated" max="-2" attributes="0"/>
|
||||||
|
<Component id="enableOcrCheckbox" min="-2" max="-2" attributes="0"/>
|
||||||
|
<EmptySpace max="32767" attributes="0"/>
|
||||||
<Component id="enableUTF16Checkbox" min="-2" max="-2" attributes="0"/>
|
<Component id="enableUTF16Checkbox" min="-2" max="-2" attributes="0"/>
|
||||||
<EmptySpace max="-2" attributes="0"/>
|
<EmptySpace max="-2" attributes="0"/>
|
||||||
<Component id="enableUTF8Checkbox" min="-2" max="-2" attributes="0"/>
|
<Component id="enableUTF8Checkbox" min="-2" max="-2" attributes="0"/>
|
||||||
<EmptySpace type="separate" max="-2" attributes="0"/>
|
|
||||||
<Component id="languagesLabel" min="-2" max="-2" attributes="0"/>
|
|
||||||
<EmptySpace type="unrelated" max="-2" attributes="0"/>
|
<EmptySpace type="unrelated" max="-2" attributes="0"/>
|
||||||
<Component id="langPanel" pref="397" max="32767" attributes="0"/>
|
<Component id="languagesLabel" min="-2" max="-2" attributes="0"/>
|
||||||
|
<EmptySpace max="-2" attributes="0"/>
|
||||||
|
<Component id="langPanel" min="-2" pref="380" max="-2" attributes="0"/>
|
||||||
<EmptySpace max="-2" attributes="0"/>
|
<EmptySpace max="-2" attributes="0"/>
|
||||||
</Group>
|
</Group>
|
||||||
</Group>
|
</Group>
|
||||||
@ -86,7 +93,7 @@
|
|||||||
</DimensionLayout>
|
</DimensionLayout>
|
||||||
<DimensionLayout dim="1">
|
<DimensionLayout dim="1">
|
||||||
<Group type="103" groupAlignment="0" attributes="0">
|
<Group type="103" groupAlignment="0" attributes="0">
|
||||||
<EmptySpace min="0" pref="395" max="32767" attributes="0"/>
|
<EmptySpace min="0" pref="378" max="32767" attributes="0"/>
|
||||||
</Group>
|
</Group>
|
||||||
</DimensionLayout>
|
</DimensionLayout>
|
||||||
</Layout>
|
</Layout>
|
||||||
@ -120,5 +127,15 @@
|
|||||||
</Property>
|
</Property>
|
||||||
</Properties>
|
</Properties>
|
||||||
</Component>
|
</Component>
|
||||||
|
<Component class="javax.swing.JCheckBox" name="enableOcrCheckbox">
|
||||||
|
<Properties>
|
||||||
|
<Property name="text" type="java.lang.String" editor="org.netbeans.modules.i18n.form.FormI18nStringEditor">
|
||||||
|
<ResourceString bundle="org/sleuthkit/autopsy/keywordsearch/Bundle.properties" key="KeywordSearchGlobalLanguageSettingsPanel.enableOcrCheckbox.text" replaceFormat="org.openide.util.NbBundle.getMessage({sourceFileName}.class, "{key}")"/>
|
||||||
|
</Property>
|
||||||
|
</Properties>
|
||||||
|
<Events>
|
||||||
|
<EventHandler event="actionPerformed" listener="java.awt.event.ActionListener" parameters="java.awt.event.ActionEvent" handler="enableOcrCheckboxActionPerformed"/>
|
||||||
|
</Events>
|
||||||
|
</Component>
|
||||||
</SubComponents>
|
</SubComponents>
|
||||||
</Form>
|
</Form>
|
||||||
|
@ -111,6 +111,9 @@ class KeywordSearchGlobalLanguageSettingsPanel extends javax.swing.JPanel implem
|
|||||||
= Boolean.parseBoolean(KeywordSearchSettings.getStringExtractOption(StringsTextExtractor.ExtractOptions.EXTRACT_UTF8.toString()));
|
= Boolean.parseBoolean(KeywordSearchSettings.getStringExtractOption(StringsTextExtractor.ExtractOptions.EXTRACT_UTF8.toString()));
|
||||||
enableUTF8Checkbox.setSelected(utf8);
|
enableUTF8Checkbox.setSelected(utf8);
|
||||||
|
|
||||||
|
boolean ocr = KeywordSearchSettings.getOcrOption();
|
||||||
|
enableOcrCheckbox.setSelected(ocr);
|
||||||
|
|
||||||
final List<SCRIPT> serviceScripts = KeywordSearchSettings.getStringExtractScripts();
|
final List<SCRIPT> serviceScripts = KeywordSearchSettings.getStringExtractScripts();
|
||||||
final int components = checkPanel.getComponentCount();
|
final int components = checkPanel.getComponentCount();
|
||||||
|
|
||||||
@ -141,6 +144,7 @@ class KeywordSearchGlobalLanguageSettingsPanel extends javax.swing.JPanel implem
|
|||||||
activateScriptsCheckboxes(extractEnabled && ingestNotRunning);
|
activateScriptsCheckboxes(extractEnabled && ingestNotRunning);
|
||||||
enableUTF16Checkbox.setEnabled(ingestNotRunning);
|
enableUTF16Checkbox.setEnabled(ingestNotRunning);
|
||||||
enableUTF8Checkbox.setEnabled(ingestNotRunning);
|
enableUTF8Checkbox.setEnabled(ingestNotRunning);
|
||||||
|
enableOcrCheckbox.setEnabled(ingestNotRunning);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -158,6 +162,7 @@ class KeywordSearchGlobalLanguageSettingsPanel extends javax.swing.JPanel implem
|
|||||||
enableUTF8Checkbox = new javax.swing.JCheckBox();
|
enableUTF8Checkbox = new javax.swing.JCheckBox();
|
||||||
enableUTF16Checkbox = new javax.swing.JCheckBox();
|
enableUTF16Checkbox = new javax.swing.JCheckBox();
|
||||||
ingestSettingsLabel = new javax.swing.JLabel();
|
ingestSettingsLabel = new javax.swing.JLabel();
|
||||||
|
enableOcrCheckbox = new javax.swing.JCheckBox();
|
||||||
|
|
||||||
org.openide.awt.Mnemonics.setLocalizedText(languagesLabel, org.openide.util.NbBundle.getMessage(KeywordSearchGlobalLanguageSettingsPanel.class, "KeywordSearchGlobalLanguageSettingsPanel.languagesLabel.text")); // NOI18N
|
org.openide.awt.Mnemonics.setLocalizedText(languagesLabel, org.openide.util.NbBundle.getMessage(KeywordSearchGlobalLanguageSettingsPanel.class, "KeywordSearchGlobalLanguageSettingsPanel.languagesLabel.text")); // NOI18N
|
||||||
|
|
||||||
@ -173,7 +178,7 @@ class KeywordSearchGlobalLanguageSettingsPanel extends javax.swing.JPanel implem
|
|||||||
);
|
);
|
||||||
checkPanelLayout.setVerticalGroup(
|
checkPanelLayout.setVerticalGroup(
|
||||||
checkPanelLayout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
|
checkPanelLayout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
|
||||||
.addGap(0, 395, Short.MAX_VALUE)
|
.addGap(0, 378, Short.MAX_VALUE)
|
||||||
);
|
);
|
||||||
|
|
||||||
langPanel.setViewportView(checkPanel);
|
langPanel.setViewportView(checkPanel);
|
||||||
@ -194,22 +199,32 @@ class KeywordSearchGlobalLanguageSettingsPanel extends javax.swing.JPanel implem
|
|||||||
|
|
||||||
org.openide.awt.Mnemonics.setLocalizedText(ingestSettingsLabel, org.openide.util.NbBundle.getMessage(KeywordSearchGlobalLanguageSettingsPanel.class, "KeywordSearchGlobalLanguageSettingsPanel.ingestSettingsLabel.text")); // NOI18N
|
org.openide.awt.Mnemonics.setLocalizedText(ingestSettingsLabel, org.openide.util.NbBundle.getMessage(KeywordSearchGlobalLanguageSettingsPanel.class, "KeywordSearchGlobalLanguageSettingsPanel.ingestSettingsLabel.text")); // NOI18N
|
||||||
|
|
||||||
|
org.openide.awt.Mnemonics.setLocalizedText(enableOcrCheckbox, org.openide.util.NbBundle.getMessage(KeywordSearchGlobalLanguageSettingsPanel.class, "KeywordSearchGlobalLanguageSettingsPanel.enableOcrCheckbox.text")); // NOI18N
|
||||||
|
enableOcrCheckbox.addActionListener(new java.awt.event.ActionListener() {
|
||||||
|
public void actionPerformed(java.awt.event.ActionEvent evt) {
|
||||||
|
enableOcrCheckboxActionPerformed(evt);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
javax.swing.GroupLayout layout = new javax.swing.GroupLayout(this);
|
javax.swing.GroupLayout layout = new javax.swing.GroupLayout(this);
|
||||||
this.setLayout(layout);
|
this.setLayout(layout);
|
||||||
layout.setHorizontalGroup(
|
layout.setHorizontalGroup(
|
||||||
layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
|
layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
|
||||||
|
.addGroup(layout.createSequentialGroup()
|
||||||
|
.addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
|
||||||
.addGroup(layout.createSequentialGroup()
|
.addGroup(layout.createSequentialGroup()
|
||||||
.addContainerGap()
|
.addContainerGap()
|
||||||
.addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
|
.addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
|
||||||
.addComponent(ingestSettingsLabel)
|
.addComponent(ingestSettingsLabel)
|
||||||
.addGroup(layout.createSequentialGroup()
|
|
||||||
.addGap(10, 10, 10)
|
|
||||||
.addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING, false)
|
|
||||||
.addComponent(enableUTF16Checkbox)
|
|
||||||
.addComponent(enableUTF8Checkbox)))
|
|
||||||
.addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.TRAILING)
|
.addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.TRAILING)
|
||||||
.addComponent(languagesLabel, javax.swing.GroupLayout.Alignment.LEADING)
|
.addComponent(languagesLabel, javax.swing.GroupLayout.Alignment.LEADING)
|
||||||
.addComponent(langPanel, javax.swing.GroupLayout.PREFERRED_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.PREFERRED_SIZE)))
|
.addComponent(langPanel, javax.swing.GroupLayout.PREFERRED_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.PREFERRED_SIZE))))
|
||||||
|
.addGroup(layout.createSequentialGroup()
|
||||||
|
.addGap(26, 26, 26)
|
||||||
|
.addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING, false)
|
||||||
|
.addComponent(enableUTF16Checkbox)
|
||||||
|
.addComponent(enableUTF8Checkbox)
|
||||||
|
.addComponent(enableOcrCheckbox))))
|
||||||
.addContainerGap(255, Short.MAX_VALUE))
|
.addContainerGap(255, Short.MAX_VALUE))
|
||||||
);
|
);
|
||||||
layout.setVerticalGroup(
|
layout.setVerticalGroup(
|
||||||
@ -218,13 +233,15 @@ class KeywordSearchGlobalLanguageSettingsPanel extends javax.swing.JPanel implem
|
|||||||
.addContainerGap()
|
.addContainerGap()
|
||||||
.addComponent(ingestSettingsLabel)
|
.addComponent(ingestSettingsLabel)
|
||||||
.addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.UNRELATED)
|
.addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.UNRELATED)
|
||||||
|
.addComponent(enableOcrCheckbox)
|
||||||
|
.addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED, javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE)
|
||||||
.addComponent(enableUTF16Checkbox)
|
.addComponent(enableUTF16Checkbox)
|
||||||
.addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
|
.addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
|
||||||
.addComponent(enableUTF8Checkbox)
|
.addComponent(enableUTF8Checkbox)
|
||||||
.addGap(18, 18, 18)
|
|
||||||
.addComponent(languagesLabel)
|
|
||||||
.addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.UNRELATED)
|
.addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.UNRELATED)
|
||||||
.addComponent(langPanel, javax.swing.GroupLayout.DEFAULT_SIZE, 397, Short.MAX_VALUE)
|
.addComponent(languagesLabel)
|
||||||
|
.addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
|
||||||
|
.addComponent(langPanel, javax.swing.GroupLayout.PREFERRED_SIZE, 380, javax.swing.GroupLayout.PREFERRED_SIZE)
|
||||||
.addContainerGap())
|
.addContainerGap())
|
||||||
);
|
);
|
||||||
}// </editor-fold>//GEN-END:initComponents
|
}// </editor-fold>//GEN-END:initComponents
|
||||||
@ -246,8 +263,13 @@ class KeywordSearchGlobalLanguageSettingsPanel extends javax.swing.JPanel implem
|
|||||||
firePropertyChange(OptionsPanelController.PROP_CHANGED, null, null);
|
firePropertyChange(OptionsPanelController.PROP_CHANGED, null, null);
|
||||||
}//GEN-LAST:event_enableUTF16CheckboxActionPerformed
|
}//GEN-LAST:event_enableUTF16CheckboxActionPerformed
|
||||||
|
|
||||||
|
private void enableOcrCheckboxActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_enableOcrCheckboxActionPerformed
|
||||||
|
firePropertyChange(OptionsPanelController.PROP_CHANGED, null, null);
|
||||||
|
}//GEN-LAST:event_enableOcrCheckboxActionPerformed
|
||||||
|
|
||||||
// Variables declaration - do not modify//GEN-BEGIN:variables
|
// Variables declaration - do not modify//GEN-BEGIN:variables
|
||||||
private javax.swing.JPanel checkPanel;
|
private javax.swing.JPanel checkPanel;
|
||||||
|
private javax.swing.JCheckBox enableOcrCheckbox;
|
||||||
private javax.swing.JCheckBox enableUTF16Checkbox;
|
private javax.swing.JCheckBox enableUTF16Checkbox;
|
||||||
private javax.swing.JCheckBox enableUTF8Checkbox;
|
private javax.swing.JCheckBox enableUTF8Checkbox;
|
||||||
private javax.swing.JLabel ingestSettingsLabel;
|
private javax.swing.JLabel ingestSettingsLabel;
|
||||||
@ -261,6 +283,7 @@ class KeywordSearchGlobalLanguageSettingsPanel extends javax.swing.JPanel implem
|
|||||||
Boolean.toString(enableUTF8Checkbox.isSelected()));
|
Boolean.toString(enableUTF8Checkbox.isSelected()));
|
||||||
KeywordSearchSettings.setStringExtractOption(StringsTextExtractor.ExtractOptions.EXTRACT_UTF16.toString(),
|
KeywordSearchSettings.setStringExtractOption(StringsTextExtractor.ExtractOptions.EXTRACT_UTF16.toString(),
|
||||||
Boolean.toString(enableUTF16Checkbox.isSelected()));
|
Boolean.toString(enableUTF16Checkbox.isSelected()));
|
||||||
|
KeywordSearchSettings.setOcrOption(enableOcrCheckbox.isSelected());
|
||||||
|
|
||||||
if (toUpdate != null) {
|
if (toUpdate != null) {
|
||||||
KeywordSearchSettings.setStringExtractScripts(toUpdate);
|
KeywordSearchSettings.setStringExtractScripts(toUpdate);
|
||||||
|
@ -39,6 +39,8 @@ class KeywordSearchSettings {
|
|||||||
static final String PROPERTIES_SCRIPTS = NbBundle.getMessage(KeywordSearchSettings.class, "KeywordSearchSettings.propertiesScripts.text", MODULE_NAME);
|
static final String PROPERTIES_SCRIPTS = NbBundle.getMessage(KeywordSearchSettings.class, "KeywordSearchSettings.propertiesScripts.text", MODULE_NAME);
|
||||||
static final String SHOW_SNIPPETS = "showSnippets"; //NON-NLS
|
static final String SHOW_SNIPPETS = "showSnippets"; //NON-NLS
|
||||||
static final boolean DEFAULT_SHOW_SNIPPETS = true;
|
static final boolean DEFAULT_SHOW_SNIPPETS = true;
|
||||||
|
static final String OCR_ENABLED = "ocrEnabled"; //NON-NLS
|
||||||
|
static final boolean OCR_ENABLED_DEFAULT = false; // NON-NLS
|
||||||
private static boolean skipKnown = true;
|
private static boolean skipKnown = true;
|
||||||
private static final Logger logger = Logger.getLogger(KeywordSearchSettings.class.getName());
|
private static final Logger logger = Logger.getLogger(KeywordSearchSettings.class.getName());
|
||||||
private static UpdateFrequency UpdateFreq = UpdateFrequency.DEFAULT;
|
private static UpdateFrequency UpdateFreq = UpdateFrequency.DEFAULT;
|
||||||
@ -128,6 +130,26 @@ class KeywordSearchSettings {
|
|||||||
ModuleSettings.setConfigSetting(PROPERTIES_OPTIONS, key, val);
|
ModuleSettings.setConfigSetting(PROPERTIES_OPTIONS, key, val);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Save OCR setting to permanent storage
|
||||||
|
* @param enabled
|
||||||
|
*/
|
||||||
|
static void setOcrOption(boolean enabled) {
|
||||||
|
ModuleSettings.setConfigSetting(PROPERTIES_OPTIONS, OCR_ENABLED, (enabled ? "true" : "false")); //NON-NLS
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get OCR setting from permanent storage
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
static boolean getOcrOption() {
|
||||||
|
if (ModuleSettings.settingExists(PROPERTIES_OPTIONS, OCR_ENABLED)) {
|
||||||
|
return ModuleSettings.getConfigSetting(PROPERTIES_OPTIONS, OCR_ENABLED).equals("true"); //NON-NLS
|
||||||
|
} else {
|
||||||
|
return OCR_ENABLED_DEFAULT;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void setShowSnippets(boolean showSnippets) {
|
static void setShowSnippets(boolean showSnippets) {
|
||||||
ModuleSettings.setConfigSetting(PROPERTIES_OPTIONS, SHOW_SNIPPETS, (showSnippets ? "true" : "false")); //NON-NLS
|
ModuleSettings.setConfigSetting(PROPERTIES_OPTIONS, SHOW_SNIPPETS, (showSnippets ? "true" : "false")); //NON-NLS
|
||||||
}
|
}
|
||||||
@ -219,6 +241,11 @@ class KeywordSearchSettings {
|
|||||||
logger.log(Level.INFO, "No configuration for UTF16 found, generating defaults..."); //NON-NLS
|
logger.log(Level.INFO, "No configuration for UTF16 found, generating defaults..."); //NON-NLS
|
||||||
KeywordSearchSettings.setStringExtractOption(StringsTextExtractor.ExtractOptions.EXTRACT_UTF16.toString(), Boolean.TRUE.toString());
|
KeywordSearchSettings.setStringExtractOption(StringsTextExtractor.ExtractOptions.EXTRACT_UTF16.toString(), Boolean.TRUE.toString());
|
||||||
}
|
}
|
||||||
|
//setting OCR default (disabled by default)
|
||||||
|
if (!ModuleSettings.settingExists(KeywordSearchSettings.PROPERTIES_OPTIONS, OCR_ENABLED)) {
|
||||||
|
logger.log(Level.INFO, "No configuration for OCR found, generating defaults..."); //NON-NLS
|
||||||
|
KeywordSearchSettings.setOcrOption(OCR_ENABLED_DEFAULT);
|
||||||
|
}
|
||||||
//setting default Latin-1 Script
|
//setting default Latin-1 Script
|
||||||
if (!ModuleSettings.settingExists(KeywordSearchSettings.PROPERTIES_SCRIPTS, SCRIPT.LATIN_1.name())) {
|
if (!ModuleSettings.settingExists(KeywordSearchSettings.PROPERTIES_SCRIPTS, SCRIPT.LATIN_1.name())) {
|
||||||
logger.log(Level.INFO, "No configuration for Scripts found, generating defaults..."); //NON-NLS
|
logger.log(Level.INFO, "No configuration for Scripts found, generating defaults..."); //NON-NLS
|
||||||
|
@ -19,9 +19,11 @@
|
|||||||
package org.sleuthkit.autopsy.keywordsearch;
|
package org.sleuthkit.autopsy.keywordsearch;
|
||||||
|
|
||||||
import com.google.common.io.CharSource;
|
import com.google.common.io.CharSource;
|
||||||
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.PushbackReader;
|
import java.io.PushbackReader;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
|
import java.nio.file.Paths;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.concurrent.ExecutorService;
|
import java.util.concurrent.ExecutorService;
|
||||||
import java.util.concurrent.Executors;
|
import java.util.concurrent.Executors;
|
||||||
@ -38,8 +40,12 @@ import org.apache.tika.parser.ParseContext;
|
|||||||
import org.apache.tika.parser.Parser;
|
import org.apache.tika.parser.Parser;
|
||||||
import org.apache.tika.parser.ParsingReader;
|
import org.apache.tika.parser.ParsingReader;
|
||||||
import org.apache.tika.parser.microsoft.OfficeParserConfig;
|
import org.apache.tika.parser.microsoft.OfficeParserConfig;
|
||||||
|
import org.apache.tika.parser.ocr.TesseractOCRConfig;
|
||||||
|
import org.apache.tika.parser.pdf.PDFParserConfig;
|
||||||
import org.openide.util.NbBundle;
|
import org.openide.util.NbBundle;
|
||||||
|
import org.openide.modules.InstalledFileLocator;
|
||||||
import org.sleuthkit.autopsy.coreutils.Logger;
|
import org.sleuthkit.autopsy.coreutils.Logger;
|
||||||
|
import org.sleuthkit.autopsy.coreutils.PlatformUtil;
|
||||||
import org.sleuthkit.datamodel.Content;
|
import org.sleuthkit.datamodel.Content;
|
||||||
import org.sleuthkit.datamodel.ReadContentInputStream;
|
import org.sleuthkit.datamodel.ReadContentInputStream;
|
||||||
|
|
||||||
@ -54,6 +60,10 @@ class TikaTextExtractor extends ContentTextExtractor {
|
|||||||
|
|
||||||
private final AutoDetectParser parser = new AutoDetectParser();
|
private final AutoDetectParser parser = new AutoDetectParser();
|
||||||
|
|
||||||
|
private static final String TESSERACT_DIR_NAME = "Tesseract-OCR"; //NON-NLS
|
||||||
|
private static final String TESSERACT_EXECUTABLE = "tesseract.exe"; //NON-NLS
|
||||||
|
private static final File TESSERACT_PATH = locateTesseractExecutable();
|
||||||
|
|
||||||
private static final List<String> TIKA_SUPPORTED_TYPES
|
private static final List<String> TIKA_SUPPORTED_TYPES
|
||||||
= new Tika().getParser().getSupportedTypes(new ParseContext())
|
= new Tika().getParser().getSupportedTypes(new ParseContext())
|
||||||
.stream()
|
.stream()
|
||||||
@ -80,6 +90,30 @@ class TikaTextExtractor extends ContentTextExtractor {
|
|||||||
officeParserConfig.setUseSAXDocxExtractor(true);
|
officeParserConfig.setUseSAXDocxExtractor(true);
|
||||||
parseContext.set(OfficeParserConfig.class, officeParserConfig);
|
parseContext.set(OfficeParserConfig.class, officeParserConfig);
|
||||||
|
|
||||||
|
// configure OCR if it is enabled in KWS settings and installed on the machine
|
||||||
|
if (TESSERACT_PATH != null && KeywordSearchSettings.getOcrOption() && PlatformUtil.isWindowsOS() == true) {
|
||||||
|
|
||||||
|
// configure PDFParser.
|
||||||
|
PDFParserConfig pdfConfig = new PDFParserConfig();
|
||||||
|
|
||||||
|
// Extracting the inline images and letting Tesseract run on each inline image.
|
||||||
|
// https://wiki.apache.org/tika/PDFParser%20%28Apache%20PDFBox%29
|
||||||
|
// https://tika.apache.org/1.7/api/org/apache/tika/parser/pdf/PDFParserConfig.html
|
||||||
|
pdfConfig.setExtractInlineImages(true);
|
||||||
|
// Multiple pages within a PDF file might refer to the same underlying image.
|
||||||
|
pdfConfig.setExtractUniqueInlineImagesOnly(true);
|
||||||
|
parseContext.set(PDFParserConfig.class, pdfConfig);
|
||||||
|
|
||||||
|
// Configure Tesseract parser to perform OCR
|
||||||
|
TesseractOCRConfig ocrConfig = new TesseractOCRConfig();
|
||||||
|
String tesseractFolder = TESSERACT_PATH.getParent();
|
||||||
|
ocrConfig.setTesseractPath(tesseractFolder);
|
||||||
|
// Tesseract expects language data packs to be in a subdirectory of tesseractFolder, in a folder called "tessdata".
|
||||||
|
// If they are stored somewhere else, use ocrConfig.setTessdataPath(String tessdataPath) to point to them
|
||||||
|
ocrConfig.setLanguage("eng");
|
||||||
|
parseContext.set(TesseractOCRConfig.class, ocrConfig);
|
||||||
|
}
|
||||||
|
|
||||||
//Parse the file in a task, a convenient way to have a timeout...
|
//Parse the file in a task, a convenient way to have a timeout...
|
||||||
final Future<Reader> future = tikaParseExecutor.submit(() -> new ParsingReader(parser, stream, metadata, parseContext));
|
final Future<Reader> future = tikaParseExecutor.submit(() -> new ParsingReader(parser, stream, metadata, parseContext));
|
||||||
try {
|
try {
|
||||||
@ -112,6 +146,29 @@ class TikaTextExtractor extends ContentTextExtractor {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Finds and returns the path to the Tesseract executable, if able.
|
||||||
|
*
|
||||||
|
* @return A File reference or null.
|
||||||
|
*/
|
||||||
|
private static File locateTesseractExecutable() {
|
||||||
|
if (!PlatformUtil.isWindowsOS()) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
String executableToFindName = Paths.get(TESSERACT_DIR_NAME, TESSERACT_EXECUTABLE).toString();
|
||||||
|
File exeFile = InstalledFileLocator.getDefault().locate(executableToFindName, TikaTextExtractor.class.getPackage().getName(), false);
|
||||||
|
if (null == exeFile) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!exeFile.canExecute()) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return exeFile;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets a CharSource that wraps a formated representation of the given
|
* Gets a CharSource that wraps a formated representation of the given
|
||||||
* Metadata.
|
* Metadata.
|
||||||
|
BIN
thirdparty/Tesseract-OCR/ambiguous_words.exe
vendored
Executable file
BIN
thirdparty/Tesseract-OCR/ambiguous_words.exe
vendored
Executable file
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/classifier_tester.exe
vendored
Executable file
BIN
thirdparty/Tesseract-OCR/classifier_tester.exe
vendored
Executable file
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/cntraining.exe
vendored
Executable file
BIN
thirdparty/Tesseract-OCR/cntraining.exe
vendored
Executable file
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/combine_tessdata.exe
vendored
Executable file
BIN
thirdparty/Tesseract-OCR/combine_tessdata.exe
vendored
Executable file
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/dawg2wordlist.exe
vendored
Executable file
BIN
thirdparty/Tesseract-OCR/dawg2wordlist.exe
vendored
Executable file
Binary file not shown.
42
thirdparty/Tesseract-OCR/doc/AUTHORS
vendored
Executable file
42
thirdparty/Tesseract-OCR/doc/AUTHORS
vendored
Executable file
@ -0,0 +1,42 @@
|
|||||||
|
Ray Smith (lead developer) <theraysmith@gmail.com>
|
||||||
|
Ahmad Abdulkader
|
||||||
|
Rika Antonova
|
||||||
|
Nicholas Beato
|
||||||
|
Jeff Breidenbach
|
||||||
|
Samuel Charron
|
||||||
|
Phil Cheatle
|
||||||
|
Simon Crouch
|
||||||
|
David Eger
|
||||||
|
Sheelagh Huddleston
|
||||||
|
Dan Johnson
|
||||||
|
Rajesh Katikam
|
||||||
|
Thomas Kielbus
|
||||||
|
Dar-Shyang Lee
|
||||||
|
Zongyi (Joe) Liu
|
||||||
|
Robert Moss
|
||||||
|
Chris Newton
|
||||||
|
Michael Reimer
|
||||||
|
Marius Renn
|
||||||
|
Raquel Romano
|
||||||
|
Christy Russon
|
||||||
|
Shobhit Saxena
|
||||||
|
Mark Seaman
|
||||||
|
Faisal Shafait
|
||||||
|
Hiroshi Takenaka
|
||||||
|
Ranjith Unnikrishnan
|
||||||
|
Joern Wanke
|
||||||
|
Ping Ping Xiu
|
||||||
|
Andrew Ziem
|
||||||
|
Oscar Zuniga
|
||||||
|
|
||||||
|
Community Contributors:
|
||||||
|
Zdenko Podobný (Maintainer)
|
||||||
|
Jim Regan (Maintainer)
|
||||||
|
James R Barlow
|
||||||
|
Amit Dovev
|
||||||
|
Martin Ettl
|
||||||
|
Tom Morris
|
||||||
|
Tobias Müller
|
||||||
|
Egor Pugin
|
||||||
|
Sundar M. Vaidya
|
||||||
|
Stefan Weil
|
21
thirdparty/Tesseract-OCR/doc/COPYING
vendored
Executable file
21
thirdparty/Tesseract-OCR/doc/COPYING
vendored
Executable file
@ -0,0 +1,21 @@
|
|||||||
|
This package contains the Tesseract Open Source OCR Engine.
|
||||||
|
Originally developed at Hewlett Packard Laboratories Bristol and
|
||||||
|
at Hewlett Packard Co, Greeley Colorado, all the code
|
||||||
|
in this distribution is now licensed under the Apache License:
|
||||||
|
|
||||||
|
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
** you may not use this file except in compliance with the License.
|
||||||
|
** You may obtain a copy of the License at
|
||||||
|
** http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
** Unless required by applicable law or agreed to in writing, software
|
||||||
|
** distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
** See the License for the specific language governing permissions and
|
||||||
|
** limitations under the License.
|
||||||
|
|
||||||
|
|
||||||
|
Other Dependencies and Licenses:
|
||||||
|
================================
|
||||||
|
|
||||||
|
Tesseract uses Leptonica library (http://leptonica.com/) which essentially
|
||||||
|
uses a BSD 2-clause license. (http://leptonica.com/about-the-license.html)
|
43
thirdparty/Tesseract-OCR/doc/README
vendored
Executable file
43
thirdparty/Tesseract-OCR/doc/README
vendored
Executable file
@ -0,0 +1,43 @@
|
|||||||
|
How to run UNLV tests.
|
||||||
|
|
||||||
|
The scripts in this directory make it possible to duplicate the tests
|
||||||
|
published in the Fourth Annual Test of OCR Accuracy.
|
||||||
|
See http://www.isri.unlv.edu/downloads/AT-1995.pdf
|
||||||
|
but first you have to get the tools and data from UNLV:
|
||||||
|
|
||||||
|
Step 1: to download the images goto
|
||||||
|
http://www.isri.unlv.edu/ISRI/OCRtk
|
||||||
|
and get 3b.tgz, Bb.tgz, Mb.tgz and Nb.tgz.
|
||||||
|
|
||||||
|
Step 2: extract the files. It doesn't really matter where
|
||||||
|
in your filesystem you put them, but they must go under a common
|
||||||
|
root so you have directories 3, B, M and N in, for example,
|
||||||
|
/users/me/ISRI-OCRtk.
|
||||||
|
|
||||||
|
Step 3: Reorg the files
|
||||||
|
The lack of tif extensions on the images is inconvenient, so there
|
||||||
|
is a script to reorganize the data to match the rest of the test
|
||||||
|
scripts.
|
||||||
|
cd to /users/me/ISRI-OCRtk or wherever 3, B, M and N ended up and run
|
||||||
|
/blah/blah/tesseract-ocr/testing/reorgdata.sh 3B
|
||||||
|
This makes directories doe3.3B, bus.3B, mag.3B and news.3B.
|
||||||
|
You can now get rid of 3, B, M, and N unless you want to get some of the
|
||||||
|
other scanning resolutions out of them.
|
||||||
|
|
||||||
|
Step 4: Download the ISRI toolkit from:
|
||||||
|
http://www.isri.unlv.edu/downloads/ftk-1.0.tgz
|
||||||
|
|
||||||
|
Step 5: If they work for you, use the binaries directly from the bin
|
||||||
|
directory and put them in tesseract-ocr/testing/unlv
|
||||||
|
otherwise build the tools for yourself and put them there.
|
||||||
|
|
||||||
|
Step 6: cd back to your main tesseract-ocr dir and Build tesseract.
|
||||||
|
|
||||||
|
Step 7: run testing/runalltests.sh with the root data dir and testname:
|
||||||
|
testing/runalltests.sh /users/me/ISRI-OCRtk tess2.0
|
||||||
|
and go to the gym, have lunch etc.
|
||||||
|
|
||||||
|
Step 8: There should be a file
|
||||||
|
testing/reports/tess2.0.summary that contains the final summarized accuracy
|
||||||
|
report and comparison with the 1995 results.
|
||||||
|
|
BIN
thirdparty/Tesseract-OCR/doc/eurotext.tif
vendored
Executable file
BIN
thirdparty/Tesseract-OCR/doc/eurotext.tif
vendored
Executable file
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/doc/phototest.tif
vendored
Executable file
BIN
thirdparty/Tesseract-OCR/doc/phototest.tif
vendored
Executable file
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/iconv.dll
vendored
Executable file
BIN
thirdparty/Tesseract-OCR/iconv.dll
vendored
Executable file
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/icudata51.dll
vendored
Executable file
BIN
thirdparty/Tesseract-OCR/icudata51.dll
vendored
Executable file
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/icui18n51.dll
vendored
Executable file
BIN
thirdparty/Tesseract-OCR/icui18n51.dll
vendored
Executable file
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/icuuc51.dll
vendored
Executable file
BIN
thirdparty/Tesseract-OCR/icuuc51.dll
vendored
Executable file
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/java/ScrollView.jar
vendored
Executable file
BIN
thirdparty/Tesseract-OCR/java/ScrollView.jar
vendored
Executable file
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/java/piccolo2d-core-3.0.jar
vendored
Executable file
BIN
thirdparty/Tesseract-OCR/java/piccolo2d-core-3.0.jar
vendored
Executable file
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/java/piccolo2d-extras-3.0.jar
vendored
Executable file
BIN
thirdparty/Tesseract-OCR/java/piccolo2d-extras-3.0.jar
vendored
Executable file
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/libbz2-1.dll
vendored
Executable file
BIN
thirdparty/Tesseract-OCR/libbz2-1.dll
vendored
Executable file
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/libcairo-2.dll
vendored
Executable file
BIN
thirdparty/Tesseract-OCR/libcairo-2.dll
vendored
Executable file
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/libexpat-1.dll
vendored
Executable file
BIN
thirdparty/Tesseract-OCR/libexpat-1.dll
vendored
Executable file
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/libffi-6.dll
vendored
Executable file
BIN
thirdparty/Tesseract-OCR/libffi-6.dll
vendored
Executable file
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/libfontconfig-1.dll
vendored
Executable file
BIN
thirdparty/Tesseract-OCR/libfontconfig-1.dll
vendored
Executable file
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/libfreetype-6.dll
vendored
Executable file
BIN
thirdparty/Tesseract-OCR/libfreetype-6.dll
vendored
Executable file
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/libgcc_s_sjlj-1.dll
vendored
Executable file
BIN
thirdparty/Tesseract-OCR/libgcc_s_sjlj-1.dll
vendored
Executable file
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/libgif-4.dll
vendored
Executable file
BIN
thirdparty/Tesseract-OCR/libgif-4.dll
vendored
Executable file
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/libglib-2.0-0.dll
vendored
Executable file
BIN
thirdparty/Tesseract-OCR/libglib-2.0-0.dll
vendored
Executable file
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/libgobject-2.0-0.dll
vendored
Executable file
BIN
thirdparty/Tesseract-OCR/libgobject-2.0-0.dll
vendored
Executable file
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/libgomp-1.dll
vendored
Executable file
BIN
thirdparty/Tesseract-OCR/libgomp-1.dll
vendored
Executable file
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/libharfbuzz-0.dll
vendored
Executable file
BIN
thirdparty/Tesseract-OCR/libharfbuzz-0.dll
vendored
Executable file
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/libintl-8.dll
vendored
Executable file
BIN
thirdparty/Tesseract-OCR/libintl-8.dll
vendored
Executable file
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/libjbig-2.dll
vendored
Executable file
BIN
thirdparty/Tesseract-OCR/libjbig-2.dll
vendored
Executable file
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/libjpeg-8.dll
vendored
Executable file
BIN
thirdparty/Tesseract-OCR/libjpeg-8.dll
vendored
Executable file
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/liblept-5.dll
vendored
Executable file
BIN
thirdparty/Tesseract-OCR/liblept-5.dll
vendored
Executable file
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/liblzma-5.dll
vendored
Executable file
BIN
thirdparty/Tesseract-OCR/liblzma-5.dll
vendored
Executable file
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/libopenjp2.dll
vendored
Executable file
BIN
thirdparty/Tesseract-OCR/libopenjp2.dll
vendored
Executable file
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/libpango-1.0-0.dll
vendored
Executable file
BIN
thirdparty/Tesseract-OCR/libpango-1.0-0.dll
vendored
Executable file
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/libpangocairo-1.0-0.dll
vendored
Executable file
BIN
thirdparty/Tesseract-OCR/libpangocairo-1.0-0.dll
vendored
Executable file
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/libpangoft2-1.0-0.dll
vendored
Executable file
BIN
thirdparty/Tesseract-OCR/libpangoft2-1.0-0.dll
vendored
Executable file
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/libpangowin32-1.0-0.dll
vendored
Executable file
BIN
thirdparty/Tesseract-OCR/libpangowin32-1.0-0.dll
vendored
Executable file
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/libpixman-1-0.dll
vendored
Executable file
BIN
thirdparty/Tesseract-OCR/libpixman-1-0.dll
vendored
Executable file
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/libpng16-16.dll
vendored
Executable file
BIN
thirdparty/Tesseract-OCR/libpng16-16.dll
vendored
Executable file
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/libstdc++-6.dll
vendored
Executable file
BIN
thirdparty/Tesseract-OCR/libstdc++-6.dll
vendored
Executable file
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/libtesseract-3.dll
vendored
Executable file
BIN
thirdparty/Tesseract-OCR/libtesseract-3.dll
vendored
Executable file
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/libtiff-5.dll
vendored
Executable file
BIN
thirdparty/Tesseract-OCR/libtiff-5.dll
vendored
Executable file
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/libwebp-5.dll
vendored
Executable file
BIN
thirdparty/Tesseract-OCR/libwebp-5.dll
vendored
Executable file
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/libwinpthread-1.dll
vendored
Executable file
BIN
thirdparty/Tesseract-OCR/libwinpthread-1.dll
vendored
Executable file
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/mftraining.exe
vendored
Executable file
BIN
thirdparty/Tesseract-OCR/mftraining.exe
vendored
Executable file
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/set_unicharset_properties.exe
vendored
Executable file
BIN
thirdparty/Tesseract-OCR/set_unicharset_properties.exe
vendored
Executable file
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/shapeclustering.exe
vendored
Executable file
BIN
thirdparty/Tesseract-OCR/shapeclustering.exe
vendored
Executable file
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/tar.exe
vendored
Executable file
BIN
thirdparty/Tesseract-OCR/tar.exe
vendored
Executable file
Binary file not shown.
7
thirdparty/Tesseract-OCR/tessdata/configs/ambigs.train
vendored
Executable file
7
thirdparty/Tesseract-OCR/tessdata/configs/ambigs.train
vendored
Executable file
@ -0,0 +1,7 @@
|
|||||||
|
tessedit_ambigs_training 1
|
||||||
|
load_freq_dawg 0
|
||||||
|
load_punc_dawg 0
|
||||||
|
load_system_dawg 0
|
||||||
|
load_number_dawg 0
|
||||||
|
ambigs_debug_level 3
|
||||||
|
load_fixed_length_dawgs 0
|
1
thirdparty/Tesseract-OCR/tessdata/configs/api_config
vendored
Executable file
1
thirdparty/Tesseract-OCR/tessdata/configs/api_config
vendored
Executable file
@ -0,0 +1 @@
|
|||||||
|
tessedit_zero_rejection T
|
5
thirdparty/Tesseract-OCR/tessdata/configs/bigram
vendored
Executable file
5
thirdparty/Tesseract-OCR/tessdata/configs/bigram
vendored
Executable file
@ -0,0 +1,5 @@
|
|||||||
|
load_bigram_dawg True
|
||||||
|
tessedit_enable_bigram_correction True
|
||||||
|
tessedit_bigram_debug 3
|
||||||
|
save_raw_choices True
|
||||||
|
save_alt_choices True
|
14
thirdparty/Tesseract-OCR/tessdata/configs/box.train
vendored
Executable file
14
thirdparty/Tesseract-OCR/tessdata/configs/box.train
vendored
Executable file
@ -0,0 +1,14 @@
|
|||||||
|
disable_character_fragments T
|
||||||
|
file_type .bl
|
||||||
|
textord_fast_pitch_test T
|
||||||
|
tessedit_single_match 0
|
||||||
|
tessedit_zero_rejection T
|
||||||
|
tessedit_minimal_rejection F
|
||||||
|
tessedit_write_rep_codes F
|
||||||
|
il1_adaption_test 1
|
||||||
|
edges_children_fix F
|
||||||
|
edges_childarea 0.65
|
||||||
|
edges_boxarea 0.9
|
||||||
|
tessedit_resegment_from_boxes T
|
||||||
|
tessedit_train_from_boxes T
|
||||||
|
textord_no_rejects T
|
15
thirdparty/Tesseract-OCR/tessdata/configs/box.train.stderr
vendored
Executable file
15
thirdparty/Tesseract-OCR/tessdata/configs/box.train.stderr
vendored
Executable file
@ -0,0 +1,15 @@
|
|||||||
|
file_type .bl
|
||||||
|
#tessedit_use_nn F
|
||||||
|
textord_fast_pitch_test T
|
||||||
|
tessedit_single_match 0
|
||||||
|
tessedit_zero_rejection T
|
||||||
|
tessedit_minimal_rejection F
|
||||||
|
tessedit_write_rep_codes F
|
||||||
|
il1_adaption_test 1
|
||||||
|
edges_children_fix F
|
||||||
|
edges_childarea 0.65
|
||||||
|
edges_boxarea 0.9
|
||||||
|
tessedit_resegment_from_boxes T
|
||||||
|
tessedit_train_from_boxes T
|
||||||
|
#textord_repeat_extraction F
|
||||||
|
textord_no_rejects T
|
1
thirdparty/Tesseract-OCR/tessdata/configs/digits
vendored
Executable file
1
thirdparty/Tesseract-OCR/tessdata/configs/digits
vendored
Executable file
@ -0,0 +1 @@
|
|||||||
|
tessedit_char_whitelist 0123456789-.
|
3
thirdparty/Tesseract-OCR/tessdata/configs/hocr
vendored
Executable file
3
thirdparty/Tesseract-OCR/tessdata/configs/hocr
vendored
Executable file
@ -0,0 +1,3 @@
|
|||||||
|
tessedit_create_hocr 1
|
||||||
|
tessedit_pageseg_mode 1
|
||||||
|
hocr_font_info 0
|
2
thirdparty/Tesseract-OCR/tessdata/configs/inter
vendored
Executable file
2
thirdparty/Tesseract-OCR/tessdata/configs/inter
vendored
Executable file
@ -0,0 +1,2 @@
|
|||||||
|
interactive_display_mode T
|
||||||
|
tessedit_display_outwords T
|
4
thirdparty/Tesseract-OCR/tessdata/configs/kannada
vendored
Executable file
4
thirdparty/Tesseract-OCR/tessdata/configs/kannada
vendored
Executable file
@ -0,0 +1,4 @@
|
|||||||
|
textord_skewsmooth_offset 8
|
||||||
|
textord_skewsmooth_offset2 8
|
||||||
|
textord_merge_desc 0.5
|
||||||
|
textord_no_rejects 1
|
2
thirdparty/Tesseract-OCR/tessdata/configs/linebox
vendored
Executable file
2
thirdparty/Tesseract-OCR/tessdata/configs/linebox
vendored
Executable file
@ -0,0 +1,2 @@
|
|||||||
|
tessedit_resegment_from_line_boxes 1
|
||||||
|
tessedit_make_boxes_from_boxes 1
|
1
thirdparty/Tesseract-OCR/tessdata/configs/logfile
vendored
Executable file
1
thirdparty/Tesseract-OCR/tessdata/configs/logfile
vendored
Executable file
@ -0,0 +1 @@
|
|||||||
|
debug_file tesseract.log
|
1
thirdparty/Tesseract-OCR/tessdata/configs/makebox
vendored
Executable file
1
thirdparty/Tesseract-OCR/tessdata/configs/makebox
vendored
Executable file
@ -0,0 +1 @@
|
|||||||
|
tessedit_create_boxfile 1
|
2
thirdparty/Tesseract-OCR/tessdata/configs/pdf
vendored
Executable file
2
thirdparty/Tesseract-OCR/tessdata/configs/pdf
vendored
Executable file
@ -0,0 +1,2 @@
|
|||||||
|
tessedit_create_pdf 1
|
||||||
|
tessedit_pageseg_mode 1
|
1
thirdparty/Tesseract-OCR/tessdata/configs/quiet
vendored
Executable file
1
thirdparty/Tesseract-OCR/tessdata/configs/quiet
vendored
Executable file
@ -0,0 +1 @@
|
|||||||
|
debug_file /dev/null
|
2
thirdparty/Tesseract-OCR/tessdata/configs/rebox
vendored
Executable file
2
thirdparty/Tesseract-OCR/tessdata/configs/rebox
vendored
Executable file
@ -0,0 +1,2 @@
|
|||||||
|
tessedit_resegment_from_boxes 1
|
||||||
|
tessedit_make_boxes_from_boxes 1
|
12
thirdparty/Tesseract-OCR/tessdata/configs/strokewidth
vendored
Executable file
12
thirdparty/Tesseract-OCR/tessdata/configs/strokewidth
vendored
Executable file
@ -0,0 +1,12 @@
|
|||||||
|
textord_show_blobs 0
|
||||||
|
textord_debug_tabfind 3
|
||||||
|
textord_tabfind_show_partitions 1
|
||||||
|
textord_tabfind_show_initial_partitions 1
|
||||||
|
textord_tabfind_show_columns 1
|
||||||
|
textord_tabfind_show_blocks 1
|
||||||
|
textord_tabfind_show_initialtabs 1
|
||||||
|
textord_tabfind_show_finaltabs 1
|
||||||
|
textord_tabfind_show_strokewidths 1
|
||||||
|
textord_tabfind_show_vlines 0
|
||||||
|
textord_tabfind_show_images 1
|
||||||
|
tessedit_dump_pageseg_images 0
|
2
thirdparty/Tesseract-OCR/tessdata/configs/tsv
vendored
Executable file
2
thirdparty/Tesseract-OCR/tessdata/configs/tsv
vendored
Executable file
@ -0,0 +1,2 @@
|
|||||||
|
tessedit_create_tsv 1
|
||||||
|
tessedit_pageseg_mode 1
|
3
thirdparty/Tesseract-OCR/tessdata/configs/txt
vendored
Executable file
3
thirdparty/Tesseract-OCR/tessdata/configs/txt
vendored
Executable file
@ -0,0 +1,3 @@
|
|||||||
|
# This config file should be used with other cofig files which creates renderers.
|
||||||
|
# usage example: tesseract eurotext.tif eurotext txt hocr pdf
|
||||||
|
tessedit_create_txt 1
|
2
thirdparty/Tesseract-OCR/tessdata/configs/unlv
vendored
Executable file
2
thirdparty/Tesseract-OCR/tessdata/configs/unlv
vendored
Executable file
@ -0,0 +1,2 @@
|
|||||||
|
tessedit_write_unlv 1
|
||||||
|
tessedit_pageseg_mode 6
|
9551
thirdparty/Tesseract-OCR/tessdata/eng.cube.bigrams
vendored
Executable file
9551
thirdparty/Tesseract-OCR/tessdata/eng.cube.bigrams
vendored
Executable file
File diff suppressed because it is too large
Load Diff
12
thirdparty/Tesseract-OCR/tessdata/eng.cube.fold
vendored
Executable file
12
thirdparty/Tesseract-OCR/tessdata/eng.cube.fold
vendored
Executable file
@ -0,0 +1,12 @@
|
|||||||
|
0oO
|
||||||
|
lI1
|
||||||
|
cC
|
||||||
|
kK
|
||||||
|
pP
|
||||||
|
sS
|
||||||
|
uU
|
||||||
|
vV
|
||||||
|
wW
|
||||||
|
xX
|
||||||
|
yY
|
||||||
|
zZ
|
7
thirdparty/Tesseract-OCR/tessdata/eng.cube.lm
vendored
Executable file
7
thirdparty/Tesseract-OCR/tessdata/eng.cube.lm
vendored
Executable file
@ -0,0 +1,7 @@
|
|||||||
|
LeadPunc="({[`'
|
||||||
|
TrailPunc=}:;-]!?`,.)"'
|
||||||
|
NumLeadPunc=#({[@$
|
||||||
|
NumTrailPunc=}):;].,%
|
||||||
|
Operators=*+-/.:,()[]
|
||||||
|
Digits=0123456789
|
||||||
|
Alphas=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
|
BIN
thirdparty/Tesseract-OCR/tessdata/eng.cube.nn
vendored
Executable file
BIN
thirdparty/Tesseract-OCR/tessdata/eng.cube.nn
vendored
Executable file
Binary file not shown.
14
thirdparty/Tesseract-OCR/tessdata/eng.cube.params
vendored
Executable file
14
thirdparty/Tesseract-OCR/tessdata/eng.cube.params
vendored
Executable file
@ -0,0 +1,14 @@
|
|||||||
|
RecoWgt=1.0
|
||||||
|
SizeWgt=0.2435
|
||||||
|
OODWgt=0.0214
|
||||||
|
NumWgt=0.036
|
||||||
|
CharBigramsWgt=0.1567
|
||||||
|
MaxSegPerChar=8
|
||||||
|
BeamWidth=10
|
||||||
|
ConvGridSize=48
|
||||||
|
WordUnigramsWgt=0.01
|
||||||
|
MaxWordAspectRatio=20.0000
|
||||||
|
MinSpaceHeightRatio=0.5000
|
||||||
|
MaxSpaceHeightRatio=0.6000
|
||||||
|
HistWindWid=2
|
||||||
|
MinConCompSize=0
|
194633
thirdparty/Tesseract-OCR/tessdata/eng.cube.size
vendored
Executable file
194633
thirdparty/Tesseract-OCR/tessdata/eng.cube.size
vendored
Executable file
File diff suppressed because it is too large
Load Diff
171802
thirdparty/Tesseract-OCR/tessdata/eng.cube.word-freq
vendored
Executable file
171802
thirdparty/Tesseract-OCR/tessdata/eng.cube.word-freq
vendored
Executable file
File diff suppressed because it is too large
Load Diff
BIN
thirdparty/Tesseract-OCR/tessdata/eng.tesseract_cube.nn
vendored
Executable file
BIN
thirdparty/Tesseract-OCR/tessdata/eng.tesseract_cube.nn
vendored
Executable file
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/tessdata/eng.traineddata
vendored
Executable file
BIN
thirdparty/Tesseract-OCR/tessdata/eng.traineddata
vendored
Executable file
Binary file not shown.
2
thirdparty/Tesseract-OCR/tessdata/eng.user-patterns
vendored
Executable file
2
thirdparty/Tesseract-OCR/tessdata/eng.user-patterns
vendored
Executable file
@ -0,0 +1,2 @@
|
|||||||
|
1-\d\d\d-GOOG-411
|
||||||
|
www.\n\\\*.com
|
5
thirdparty/Tesseract-OCR/tessdata/eng.user-words
vendored
Executable file
5
thirdparty/Tesseract-OCR/tessdata/eng.user-words
vendored
Executable file
@ -0,0 +1,5 @@
|
|||||||
|
the
|
||||||
|
quick
|
||||||
|
brown
|
||||||
|
fox
|
||||||
|
jumped
|
BIN
thirdparty/Tesseract-OCR/tessdata/enm.traineddata
vendored
Executable file
BIN
thirdparty/Tesseract-OCR/tessdata/enm.traineddata
vendored
Executable file
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/tessdata/pdf.ttf
vendored
Executable file
BIN
thirdparty/Tesseract-OCR/tessdata/pdf.ttf
vendored
Executable file
Binary file not shown.
2
thirdparty/Tesseract-OCR/tessdata/tessconfigs/batch
vendored
Executable file
2
thirdparty/Tesseract-OCR/tessdata/tessconfigs/batch
vendored
Executable file
@ -0,0 +1,2 @@
|
|||||||
|
# No content needed as all defaults are correct.
|
||||||
|
|
2
thirdparty/Tesseract-OCR/tessdata/tessconfigs/batch.nochop
vendored
Executable file
2
thirdparty/Tesseract-OCR/tessdata/tessconfigs/batch.nochop
vendored
Executable file
@ -0,0 +1,2 @@
|
|||||||
|
chop_enable 0
|
||||||
|
wordrec_enable_assoc 0
|
7
thirdparty/Tesseract-OCR/tessdata/tessconfigs/matdemo
vendored
Executable file
7
thirdparty/Tesseract-OCR/tessdata/tessconfigs/matdemo
vendored
Executable file
@ -0,0 +1,7 @@
|
|||||||
|
#################################################
|
||||||
|
# Adaptive Matcher Using PreAdapted Templates
|
||||||
|
#################################################
|
||||||
|
|
||||||
|
classify_enable_adaptive_debugger 1
|
||||||
|
matcher_debug_flags 6
|
||||||
|
matcher_debug_level 1
|
13
thirdparty/Tesseract-OCR/tessdata/tessconfigs/msdemo
vendored
Executable file
13
thirdparty/Tesseract-OCR/tessdata/tessconfigs/msdemo
vendored
Executable file
@ -0,0 +1,13 @@
|
|||||||
|
#################################################
|
||||||
|
# Adaptive Matcher Using PreAdapted Templates
|
||||||
|
#################################################
|
||||||
|
|
||||||
|
classify_enable_adaptive_debugger 1
|
||||||
|
matcher_debug_flags 6
|
||||||
|
matcher_debug_level 1
|
||||||
|
|
||||||
|
wordrec_display_splits 0
|
||||||
|
wordrec_display_all_words 1
|
||||||
|
wordrec_display_all_blobs 1
|
||||||
|
wordrec_display_segmentations 2
|
||||||
|
classify_debug_level 1
|
1
thirdparty/Tesseract-OCR/tessdata/tessconfigs/nobatch
vendored
Executable file
1
thirdparty/Tesseract-OCR/tessdata/tessconfigs/nobatch
vendored
Executable file
@ -0,0 +1 @@
|
|||||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user