Move language pack locations to AppData

This commit is contained in:
U-BASIS\dsmyda 2018-12-19 16:04:16 -05:00
parent 3c99771aba
commit ec06a54dbf
3 changed files with 39 additions and 11 deletions

View File

@ -20,6 +20,7 @@ package org.sleuthkit.autopsy.core;
import java.awt.Cursor;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.Callable;
@ -29,6 +30,8 @@ import java.util.logging.Handler;
import java.util.logging.Level;
import javafx.application.Platform;
import javafx.embed.swing.JFXPanel;
import org.apache.commons.io.FileUtils;
import org.openide.modules.InstalledFileLocator;
import org.openide.modules.ModuleInstall;
import org.openide.util.NbBundle;
import org.openide.windows.WindowManager;
@ -286,11 +289,33 @@ public class Installer extends ModuleInstall {
pythonModulesDir.mkdir();
}
/**
* Make a folder in the config directory for Ocr Language Packs if one does
* not exist.
*/
private static void ensureOcrLanguagePacksFolderExists() {
File ocrLanguagePacksDir = new File(PlatformUtil.getOcrLanguagePacksPath());
boolean createDirectory = ocrLanguagePacksDir.mkdir();
//If the directory did not exist, copy the tessdata folder over so we
//support english.
if(createDirectory) {
File tessdataDir = InstalledFileLocator.getDefault().locate(
"Tesseract-OCR/tessdata", Installer.class.getPackage().getName(), false);
try {
FileUtils.copyDirectory(tessdataDir, ocrLanguagePacksDir);
} catch (IOException ex) {
logger.log(Level.SEVERE, "Copying over default language packs for Tesseract failed.", ex);
}
}
}
@Override
public void restored() {
super.restored();
ensurePythonModulesFolderExists();
ensureClassifierFolderExists();
ensureOcrLanguagePacksFolderExists();
initJavaFx();
for (ModuleInstall mi : packageInstallers) {
try {

View File

@ -52,6 +52,7 @@ public class PlatformUtil {
private static final String PYTHON_MODULES_SUBDIRECTORY = "python_modules"; //NON-NLS
private static final String CLASSIFIERS_SUBDIRECTORY = "object_detection_classifiers"; //NON-NLS
private static final String OCR_LANGUAGE_SUBDIRECTORY = "ocr_language_packs"; //NON-NLS
private static String javaPath = null;
public static final String OS_NAME_UNKNOWN = NbBundle.getMessage(PlatformUtil.class, "PlatformUtil.nameUnknown");
public static final String OS_VERSION_UNKNOWN = NbBundle.getMessage(PlatformUtil.class, "PlatformUtil.verUnknown");
@ -117,6 +118,15 @@ public class PlatformUtil {
return getUserDirectory().getAbsolutePath() + File.separator + PYTHON_MODULES_SUBDIRECTORY;
}
/**
* Get root path where the user's Ocr language packs are stored.
*
* @return Absolute path to the Ocr language packs root directory.
*/
public static String getOcrLanguagePacksPath() {
return getUserDirectory().getAbsolutePath() + File.separator + OCR_LANGUAGE_SUBDIRECTORY;
}
/**
* Get root path where the user's object detection classifiers are stored.
*

View File

@ -202,14 +202,9 @@ final class TikaTextExtractor extends TextExtractor {
TesseractOCRConfig ocrConfig = new TesseractOCRConfig();
String tesseractFolder = TESSERACT_PATH.getParent();
ocrConfig.setTesseractPath(tesseractFolder);
/*
* Tesseract expects language data packs to be in a subdirectory
* of tesseractFolder, in a folder called "tessdata". If they
* are stored somewhere else, use
* ocrConfig.setTessdataPath(String tessdataPath) to point to
* them
*/
ocrConfig.setLanguage(LANGUAGE_PACKS);
ocrConfig.setTessdataPath(PlatformUtil.getOcrLanguagePacksPath());
parseContext.set(TesseractOCRConfig.class, ocrConfig);
stream = new ReadContentInputStream(content);
@ -292,6 +287,7 @@ final class TikaTextExtractor extends TextExtractor {
process.command(executeablePath,
String.format("\"%s\"", inputFile.getAbsolutePath()),
String.format("\"%s\"", outputFilePath),
"--tessdata-dir", PlatformUtil.getOcrLanguagePacksPath(),
//language pack command flag
"-l", LANGUAGE_PACKS);
@ -450,10 +446,7 @@ final class TikaTextExtractor extends TextExtractor {
* @return String of all language packs available for Tesseract to use
*/
private static String getLanguagePacks() {
File languagePackRootDir = new File(TESSERACT_PATH.getParent(), "tessdata");
if (!languagePackRootDir.exists()) {
return "";
}
File languagePackRootDir = new File(PlatformUtil.getOcrLanguagePacksPath());
List<String> languagePacks = new ArrayList<>();
for (File languagePack : languagePackRootDir.listFiles()) {