mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-06 21:00:22 +00:00
Upgraded to Tesseract 4.0
This commit is contained in:
parent
2bf92ad219
commit
b2611465ef
@ -29,8 +29,8 @@ import java.io.InputStream;
|
|||||||
import java.io.PushbackReader;
|
import java.io.PushbackReader;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.nio.file.Paths;
|
import java.nio.file.Paths;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.concurrent.Callable;
|
import java.util.concurrent.Callable;
|
||||||
@ -60,6 +60,7 @@ import org.sleuthkit.autopsy.casemodule.Case;
|
|||||||
import org.sleuthkit.autopsy.casemodule.NoCurrentCaseException;
|
import org.sleuthkit.autopsy.casemodule.NoCurrentCaseException;
|
||||||
import org.sleuthkit.autopsy.coreutils.ExecUtil;
|
import org.sleuthkit.autopsy.coreutils.ExecUtil;
|
||||||
import org.sleuthkit.autopsy.coreutils.ExecUtil.ProcessTerminator;
|
import org.sleuthkit.autopsy.coreutils.ExecUtil.ProcessTerminator;
|
||||||
|
import org.sleuthkit.autopsy.coreutils.FileUtil;
|
||||||
import org.sleuthkit.autopsy.coreutils.PlatformUtil;
|
import org.sleuthkit.autopsy.coreutils.PlatformUtil;
|
||||||
import org.sleuthkit.autopsy.textreaders.textreaderconfigs.ImageConfig;
|
import org.sleuthkit.autopsy.textreaders.textreaderconfigs.ImageConfig;
|
||||||
import org.sleuthkit.autopsy.datamodel.ContentUtils;
|
import org.sleuthkit.autopsy.datamodel.ContentUtils;
|
||||||
@ -122,8 +123,8 @@ final class TikaTextExtractor extends TextExtractor {
|
|||||||
|
|
||||||
private static final java.util.logging.Logger tikaLogger = java.util.logging.Logger.getLogger("Tika"); //NON-NLS
|
private static final java.util.logging.Logger tikaLogger = java.util.logging.Logger.getLogger("Tika"); //NON-NLS
|
||||||
|
|
||||||
private final ThreadFactory tikaThreadFactory =
|
private final ThreadFactory tikaThreadFactory
|
||||||
new ThreadFactoryBuilder().setNameFormat("tika-reader-%d").build();
|
= new ThreadFactoryBuilder().setNameFormat("tika-reader-%d").build();
|
||||||
private final ExecutorService executorService = Executors.newSingleThreadExecutor(tikaThreadFactory);
|
private final ExecutorService executorService = Executors.newSingleThreadExecutor(tikaThreadFactory);
|
||||||
private static final String SQLITE_MIMETYPE = "application/x-sqlite3";
|
private static final String SQLITE_MIMETYPE = "application/x-sqlite3";
|
||||||
|
|
||||||
@ -135,8 +136,10 @@ final class TikaTextExtractor extends TextExtractor {
|
|||||||
private static final String TESSERACT_EXECUTABLE = "tesseract.exe"; //NON-NLS
|
private static final String TESSERACT_EXECUTABLE = "tesseract.exe"; //NON-NLS
|
||||||
private static final File TESSERACT_PATH = locateTesseractExecutable();
|
private static final File TESSERACT_PATH = locateTesseractExecutable();
|
||||||
private static final String LANGUAGE_PACKS = getLanguagePacks();
|
private static final String LANGUAGE_PACKS = getLanguagePacks();
|
||||||
|
private static final String TESSERACT_LANGUAGE_PACK_EXT = "traineddata"; //NON-NLS
|
||||||
|
private static final String TESSERACT_OUTPUT_FILE_NAME = "tess_output"; //NON-NLS
|
||||||
|
|
||||||
private ProcessTerminator processTerminator;
|
private ProcessTerminator processTerminator;
|
||||||
private static final String TESSERACT_OUTPUT_FILE_NAME = "output";
|
|
||||||
|
|
||||||
private static final List<String> TIKA_SUPPORTED_TYPES
|
private static final List<String> TIKA_SUPPORTED_TYPES
|
||||||
= new Tika().getParser().getSupportedTypes(new ParseContext())
|
= new Tika().getParser().getSupportedTypes(new ParseContext())
|
||||||
@ -182,7 +185,7 @@ final class TikaTextExtractor extends TextExtractor {
|
|||||||
AbstractFile file = ((AbstractFile) content);
|
AbstractFile file = ((AbstractFile) content);
|
||||||
//Run OCR on images with Tesseract directly.
|
//Run OCR on images with Tesseract directly.
|
||||||
if (file.getMIMEType().toLowerCase().startsWith("image/")) {
|
if (file.getMIMEType().toLowerCase().startsWith("image/")) {
|
||||||
stream = runOcrAndGetOutputStream(file);
|
stream = performOCR(file);
|
||||||
} else {
|
} else {
|
||||||
//Otherwise, go through Tika for PDFs so that it can
|
//Otherwise, go through Tika for PDFs so that it can
|
||||||
//extract images and run Tesseract on them.
|
//extract images and run Tesseract on them.
|
||||||
@ -201,15 +204,15 @@ final class TikaTextExtractor extends TextExtractor {
|
|||||||
String tesseractFolder = TESSERACT_PATH.getParent();
|
String tesseractFolder = TESSERACT_PATH.getParent();
|
||||||
ocrConfig.setTesseractPath(tesseractFolder);
|
ocrConfig.setTesseractPath(tesseractFolder);
|
||||||
/*
|
/*
|
||||||
* Tesseract expects language data packs to be in a
|
* Tesseract expects language data packs to be in a subdirectory
|
||||||
* subdirectory of tesseractFolder, in a folder called
|
* of tesseractFolder, in a folder called "tessdata". If they
|
||||||
* "tessdata". If they are stored somewhere else, use
|
* are stored somewhere else, use
|
||||||
* ocrConfig.setTessdataPath(String tessdataPath) to point
|
* ocrConfig.setTessdataPath(String tessdataPath) to point to
|
||||||
* to them
|
* them
|
||||||
*/
|
*/
|
||||||
ocrConfig.setLanguage(LANGUAGE_PACKS);
|
ocrConfig.setLanguage(LANGUAGE_PACKS);
|
||||||
parseContext.set(TesseractOCRConfig.class, ocrConfig);
|
parseContext.set(TesseractOCRConfig.class, ocrConfig);
|
||||||
|
|
||||||
stream = new ReadContentInputStream(content);
|
stream = new ReadContentInputStream(content);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -228,8 +231,7 @@ final class TikaTextExtractor extends TextExtractor {
|
|||||||
Future<Reader> future = executorService.submit(
|
Future<Reader> future = executorService.submit(
|
||||||
new GetTikaReader(parser, stream, metadata, parseContext));
|
new GetTikaReader(parser, stream, metadata, parseContext));
|
||||||
try {
|
try {
|
||||||
final Reader tikaReader = future.get(getTimeout(content.getSize()),
|
final Reader tikaReader = future.get(getTimeout(content.getSize()), TimeUnit.SECONDS);
|
||||||
TimeUnit.SECONDS);
|
|
||||||
//check if the reader is empty
|
//check if the reader is empty
|
||||||
PushbackReader pushbackReader = new PushbackReader(tikaReader);
|
PushbackReader pushbackReader = new PushbackReader(tikaReader);
|
||||||
int read = pushbackReader.read();
|
int read = pushbackReader.read();
|
||||||
@ -238,11 +240,9 @@ final class TikaTextExtractor extends TextExtractor {
|
|||||||
+ "Tika returned empty reader for " + content);
|
+ "Tika returned empty reader for " + content);
|
||||||
}
|
}
|
||||||
pushbackReader.unread(read);
|
pushbackReader.unread(read);
|
||||||
|
|
||||||
//concatenate parsed content and meta data into a single reader.
|
//concatenate parsed content and meta data into a single reader.
|
||||||
CharSource metaDataCharSource = getMetaDataCharSource(metadata);
|
CharSource metaDataCharSource = getMetaDataCharSource(metadata);
|
||||||
return CharSource.concat(new ReaderCharSource(pushbackReader),
|
return CharSource.concat(new ReaderCharSource(pushbackReader), metaDataCharSource).openStream();
|
||||||
metaDataCharSource).openStream();
|
|
||||||
} catch (TimeoutException te) {
|
} catch (TimeoutException te) {
|
||||||
final String msg = NbBundle.getMessage(this.getClass(),
|
final String msg = NbBundle.getMessage(this.getClass(),
|
||||||
"AbstractFileTikaTextExtract.index.tikaParseTimeout.text",
|
"AbstractFileTikaTextExtract.index.tikaParseTimeout.text",
|
||||||
@ -273,19 +273,19 @@ final class TikaTextExtractor extends TextExtractor {
|
|||||||
* @throws
|
* @throws
|
||||||
* org.sleuthkit.autopsy.textextractors.TextExtractor.ExtractionException
|
* org.sleuthkit.autopsy.textextractors.TextExtractor.ExtractionException
|
||||||
*/
|
*/
|
||||||
private InputStream runOcrAndGetOutputStream(AbstractFile file) throws ExtractionException {
|
private InputStream performOCR(AbstractFile file) throws ExtractionException {
|
||||||
File inputFile = null;
|
File inputFile = null;
|
||||||
File outputFile = null;
|
File outputFile = null;
|
||||||
try {
|
try {
|
||||||
|
String tempDirectory = Case.getCurrentCaseThrows().getTempDirectory();
|
||||||
|
|
||||||
//Appending file id makes the name unique
|
//Appending file id makes the name unique
|
||||||
String tempFileName = file.getId() + file.getName();
|
String tempFileName = FileUtil.escapeFileName(file.getId() + file.getName());
|
||||||
inputFile = Paths.get(Case.getCurrentCaseThrows().getTempDirectory(),
|
inputFile = Paths.get(tempDirectory, tempFileName).toFile();
|
||||||
tempFileName).toFile();
|
|
||||||
ContentUtils.writeToFile(content, inputFile);
|
ContentUtils.writeToFile(content, inputFile);
|
||||||
|
|
||||||
String tempOutputName = file.getId() + TESSERACT_OUTPUT_FILE_NAME;
|
String tempOutputName = FileUtil.escapeFileName(file.getId() + TESSERACT_OUTPUT_FILE_NAME);
|
||||||
String outputFilePath = Paths.get(Case.getCurrentCaseThrows().getTempDirectory(),
|
String outputFilePath = Paths.get(tempDirectory, tempOutputName).toString();
|
||||||
tempOutputName).toString();
|
|
||||||
String executeablePath = TESSERACT_PATH.toString();
|
String executeablePath = TESSERACT_PATH.toString();
|
||||||
|
|
||||||
//Build tesseract commands
|
//Build tesseract commands
|
||||||
@ -303,7 +303,7 @@ final class TikaTextExtractor extends TextExtractor {
|
|||||||
} else {
|
} else {
|
||||||
ExecUtil.execute(process);
|
ExecUtil.execute(process);
|
||||||
}
|
}
|
||||||
|
|
||||||
outputFile = new File(outputFilePath + ".txt");
|
outputFile = new File(outputFilePath + ".txt");
|
||||||
//Open a stream of the Tesseract text file and send this to Tika
|
//Open a stream of the Tesseract text file and send this to Tika
|
||||||
return new CleanUpStream(outputFile);
|
return new CleanUpStream(outputFile);
|
||||||
@ -324,6 +324,7 @@ final class TikaTextExtractor extends TextExtractor {
|
|||||||
* cancelled.
|
* cancelled.
|
||||||
*/
|
*/
|
||||||
private class GetTikaReader implements Callable<Reader> {
|
private class GetTikaReader implements Callable<Reader> {
|
||||||
|
|
||||||
private final AutoDetectParser parser;
|
private final AutoDetectParser parser;
|
||||||
private final InputStream stream;
|
private final InputStream stream;
|
||||||
private final Metadata metadata;
|
private final Metadata metadata;
|
||||||
@ -354,9 +355,10 @@ final class TikaTextExtractor extends TextExtractor {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Store a reference to file on construction
|
* Store a reference to file on construction
|
||||||
*
|
*
|
||||||
* @param file
|
* @param file
|
||||||
* @throws FileNotFoundException
|
*
|
||||||
|
* @throws FileNotFoundException
|
||||||
*/
|
*/
|
||||||
public CleanUpStream(File file) throws FileNotFoundException {
|
public CleanUpStream(File file) throws FileNotFoundException {
|
||||||
super(file);
|
super(file);
|
||||||
@ -365,8 +367,8 @@ final class TikaTextExtractor extends TextExtractor {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Delete this underlying file when close is called.
|
* Delete this underlying file when close is called.
|
||||||
*
|
*
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
@ -450,25 +452,19 @@ final class TikaTextExtractor extends TextExtractor {
|
|||||||
*/
|
*/
|
||||||
private static String getLanguagePacks() {
|
private static String getLanguagePacks() {
|
||||||
File languagePackRootDir = new File(TESSERACT_PATH.getParent(), "tessdata");
|
File languagePackRootDir = new File(TESSERACT_PATH.getParent(), "tessdata");
|
||||||
//Acceptable extensions for Tesseract-OCR version 3.05 language packs.
|
if (!languagePackRootDir.exists()) {
|
||||||
//All extensions other than traineddata are associated with cube files that
|
return "";
|
||||||
//have been made obsolete since version 4.0.
|
}
|
||||||
List<String> acceptableExtensions = Arrays.asList("traineddata", "params",
|
|
||||||
"lm", "fold", "bigrams", "nn", "word-freq", "size",
|
List<String> languagePacks = new ArrayList<>();
|
||||||
"user-patterns", "user-words");
|
for (File languagePack : languagePackRootDir.listFiles()) {
|
||||||
//Pull out only unique languagePacks
|
String fileExt = FilenameUtils.getExtension(languagePack.getName());
|
||||||
HashSet<String> languagePacks = new HashSet<>();
|
if (!languagePack.isDirectory() && TESSERACT_LANGUAGE_PACK_EXT.equals(fileExt)) {
|
||||||
if (languagePackRootDir.exists()) {
|
String packageName = FilenameUtils.getBaseName(languagePack.getName());
|
||||||
for (File languagePack : languagePackRootDir.listFiles()) {
|
languagePacks.add(packageName);
|
||||||
if (languagePack.isDirectory() || !acceptableExtensions.contains(
|
|
||||||
FilenameUtils.getExtension(languagePack.getName()))) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
String threeLetterPackageName = languagePack.getName().substring(0, 3);
|
|
||||||
//Ignore the eng language pack if accidentally added
|
|
||||||
languagePacks.add(threeLetterPackageName);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return String.join("+", languagePacks);
|
return String.join("+", languagePacks);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -499,8 +495,8 @@ final class TikaTextExtractor extends TextExtractor {
|
|||||||
* Determines how the extraction process will proceed given the settings
|
* Determines how the extraction process will proceed given the settings
|
||||||
* stored in this context instance.
|
* stored in this context instance.
|
||||||
*
|
*
|
||||||
* See the ImageConfig class in the extractionconfigs package
|
* See the ImageConfig class in the extractionconfigs package for available
|
||||||
for available settings.
|
* settings.
|
||||||
*
|
*
|
||||||
* @param context Instance containing config classes
|
* @param context Instance containing config classes
|
||||||
*/
|
*/
|
||||||
|
@ -31,6 +31,7 @@ import org.openide.util.NbBundle.Messages;
|
|||||||
import org.openide.util.lookup.Lookups;
|
import org.openide.util.lookup.Lookups;
|
||||||
import org.sleuthkit.autopsy.casemodule.Case;
|
import org.sleuthkit.autopsy.casemodule.Case;
|
||||||
import org.sleuthkit.autopsy.casemodule.NoCurrentCaseException;
|
import org.sleuthkit.autopsy.casemodule.NoCurrentCaseException;
|
||||||
|
import org.sleuthkit.autopsy.coreutils.ExecUtil.ProcessTerminator;
|
||||||
import org.sleuthkit.autopsy.coreutils.Logger;
|
import org.sleuthkit.autopsy.coreutils.Logger;
|
||||||
import org.sleuthkit.autopsy.coreutils.MessageNotifyUtil;
|
import org.sleuthkit.autopsy.coreutils.MessageNotifyUtil;
|
||||||
import org.sleuthkit.autopsy.ingest.FileIngestModule;
|
import org.sleuthkit.autopsy.ingest.FileIngestModule;
|
||||||
@ -477,10 +478,11 @@ public final class KeywordSearchIngestModule implements FileIngestModule {
|
|||||||
private boolean extractTextAndIndex(AbstractFile aFile, String detectedFormat) throws IngesterException {
|
private boolean extractTextAndIndex(AbstractFile aFile, String detectedFormat) throws IngesterException {
|
||||||
ImageConfig imageConfig = new ImageConfig();
|
ImageConfig imageConfig = new ImageConfig();
|
||||||
imageConfig.setOCREnabled(KeywordSearchSettings.getOcrOption());
|
imageConfig.setOCREnabled(KeywordSearchSettings.getOcrOption());
|
||||||
Lookup extractionContext = Lookups.fixed(imageConfig);
|
ProcessTerminator terminator = () -> context.fileIngestIsCancelled();
|
||||||
|
Lookup extractionContext = Lookups.fixed(imageConfig, terminator);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
Reader specializedReader = TextReaders.getReader(aFile,extractionContext);
|
Reader specializedReader = TextReaders.getReader(aFile, extractionContext);
|
||||||
//divide into chunks and index
|
//divide into chunks and index
|
||||||
return Ingester.getDefault().indexText(specializedReader,aFile.getId(),aFile.getName(), aFile, context);
|
return Ingester.getDefault().indexText(specializedReader,aFile.getId(),aFile.getName(), aFile, context);
|
||||||
} catch (TextReaders.NoTextReaderFound ex) {
|
} catch (TextReaders.NoTextReaderFound ex) {
|
||||||
|
BIN
thirdparty/Tesseract-OCR/ambiguous_words.exe
vendored
BIN
thirdparty/Tesseract-OCR/ambiguous_words.exe
vendored
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/classifier_tester.exe
vendored
BIN
thirdparty/Tesseract-OCR/classifier_tester.exe
vendored
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/cntraining.exe
vendored
BIN
thirdparty/Tesseract-OCR/cntraining.exe
vendored
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/combine_tessdata.exe
vendored
BIN
thirdparty/Tesseract-OCR/combine_tessdata.exe
vendored
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/dawg2wordlist.exe
vendored
BIN
thirdparty/Tesseract-OCR/dawg2wordlist.exe
vendored
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/java/ScrollView.jar
vendored
BIN
thirdparty/Tesseract-OCR/java/ScrollView.jar
vendored
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/libgcc_s_sjlj-1.dll
vendored
BIN
thirdparty/Tesseract-OCR/libgcc_s_sjlj-1.dll
vendored
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/libgomp-1.dll
vendored
BIN
thirdparty/Tesseract-OCR/libgomp-1.dll
vendored
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/libstdc++-6.dll
vendored
BIN
thirdparty/Tesseract-OCR/libstdc++-6.dll
vendored
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/libtesseract-4.dll
vendored
Executable file
BIN
thirdparty/Tesseract-OCR/libtesseract-4.dll
vendored
Executable file
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/lstmeval.exe
vendored
Executable file
BIN
thirdparty/Tesseract-OCR/lstmeval.exe
vendored
Executable file
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/lstmtraining.exe
vendored
Executable file
BIN
thirdparty/Tesseract-OCR/lstmtraining.exe
vendored
Executable file
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/mftraining.exe
vendored
BIN
thirdparty/Tesseract-OCR/mftraining.exe
vendored
Binary file not shown.
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/shapeclustering.exe
vendored
BIN
thirdparty/Tesseract-OCR/shapeclustering.exe
vendored
Binary file not shown.
13
thirdparty/Tesseract-OCR/tessdata/configs/lstm.train
vendored
Executable file
13
thirdparty/Tesseract-OCR/tessdata/configs/lstm.train
vendored
Executable file
@ -0,0 +1,13 @@
|
|||||||
|
disable_character_fragments T
|
||||||
|
file_type .bl
|
||||||
|
textord_fast_pitch_test T
|
||||||
|
tessedit_single_match 0
|
||||||
|
tessedit_zero_rejection T
|
||||||
|
tessedit_minimal_rejection F
|
||||||
|
tessedit_write_rep_codes F
|
||||||
|
il1_adaption_test 1
|
||||||
|
edges_children_fix F
|
||||||
|
edges_childarea 0.65
|
||||||
|
edges_boxarea 0.9
|
||||||
|
tessedit_train_line_recognizer T
|
||||||
|
textord_no_rejects T
|
9551
thirdparty/Tesseract-OCR/tessdata/eng.cube.bigrams
vendored
9551
thirdparty/Tesseract-OCR/tessdata/eng.cube.bigrams
vendored
File diff suppressed because it is too large
Load Diff
12
thirdparty/Tesseract-OCR/tessdata/eng.cube.fold
vendored
12
thirdparty/Tesseract-OCR/tessdata/eng.cube.fold
vendored
@ -1,12 +0,0 @@
|
|||||||
0oO
|
|
||||||
lI1
|
|
||||||
cC
|
|
||||||
kK
|
|
||||||
pP
|
|
||||||
sS
|
|
||||||
uU
|
|
||||||
vV
|
|
||||||
wW
|
|
||||||
xX
|
|
||||||
yY
|
|
||||||
zZ
|
|
@ -1,7 +0,0 @@
|
|||||||
LeadPunc="({[`'
|
|
||||||
TrailPunc=}:;-]!?`,.)"'
|
|
||||||
NumLeadPunc=#({[@$
|
|
||||||
NumTrailPunc=}):;].,%
|
|
||||||
Operators=*+-/.:,()[]
|
|
||||||
Digits=0123456789
|
|
||||||
Alphas=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
|
|
BIN
thirdparty/Tesseract-OCR/tessdata/eng.cube.nn
vendored
BIN
thirdparty/Tesseract-OCR/tessdata/eng.cube.nn
vendored
Binary file not shown.
@ -1,14 +0,0 @@
|
|||||||
RecoWgt=1.0
|
|
||||||
SizeWgt=0.2435
|
|
||||||
OODWgt=0.0214
|
|
||||||
NumWgt=0.036
|
|
||||||
CharBigramsWgt=0.1567
|
|
||||||
MaxSegPerChar=8
|
|
||||||
BeamWidth=10
|
|
||||||
ConvGridSize=48
|
|
||||||
WordUnigramsWgt=0.01
|
|
||||||
MaxWordAspectRatio=20.0000
|
|
||||||
MinSpaceHeightRatio=0.5000
|
|
||||||
MaxSpaceHeightRatio=0.6000
|
|
||||||
HistWindWid=2
|
|
||||||
MinConCompSize=0
|
|
194633
thirdparty/Tesseract-OCR/tessdata/eng.cube.size
vendored
194633
thirdparty/Tesseract-OCR/tessdata/eng.cube.size
vendored
File diff suppressed because it is too large
Load Diff
171802
thirdparty/Tesseract-OCR/tessdata/eng.cube.word-freq
vendored
171802
thirdparty/Tesseract-OCR/tessdata/eng.cube.word-freq
vendored
File diff suppressed because it is too large
Load Diff
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/tessdata/eng.traineddata
vendored
BIN
thirdparty/Tesseract-OCR/tessdata/eng.traineddata
vendored
Binary file not shown.
@ -1,2 +0,0 @@
|
|||||||
1-\d\d\d-GOOG-411
|
|
||||||
www.\n\\\*.com
|
|
@ -1,5 +0,0 @@
|
|||||||
the
|
|
||||||
quick
|
|
||||||
brown
|
|
||||||
fox
|
|
||||||
jumped
|
|
BIN
thirdparty/Tesseract-OCR/tessdata/enm.traineddata
vendored
BIN
thirdparty/Tesseract-OCR/tessdata/enm.traineddata
vendored
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/tesseract.exe
vendored
BIN
thirdparty/Tesseract-OCR/tesseract.exe
vendored
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/text2image.exe
vendored
BIN
thirdparty/Tesseract-OCR/text2image.exe
vendored
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/unicharset_extractor.exe
vendored
BIN
thirdparty/Tesseract-OCR/unicharset_extractor.exe
vendored
Binary file not shown.
BIN
thirdparty/Tesseract-OCR/wordlist2dawg.exe
vendored
BIN
thirdparty/Tesseract-OCR/wordlist2dawg.exe
vendored
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user