Move language pack locations to AppData

This commit is contained in:
U-BASIS\dsmyda 2018-12-19 16:04:16 -05:00
parent 3c99771aba
commit ec06a54dbf
3 changed files with 39 additions and 11 deletions

View File

@ -20,6 +20,7 @@ package org.sleuthkit.autopsy.core;
import java.awt.Cursor; import java.awt.Cursor;
import java.io.File; import java.io.File;
import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.concurrent.Callable; import java.util.concurrent.Callable;
@ -29,6 +30,8 @@ import java.util.logging.Handler;
import java.util.logging.Level; import java.util.logging.Level;
import javafx.application.Platform; import javafx.application.Platform;
import javafx.embed.swing.JFXPanel; import javafx.embed.swing.JFXPanel;
import org.apache.commons.io.FileUtils;
import org.openide.modules.InstalledFileLocator;
import org.openide.modules.ModuleInstall; import org.openide.modules.ModuleInstall;
import org.openide.util.NbBundle; import org.openide.util.NbBundle;
import org.openide.windows.WindowManager; import org.openide.windows.WindowManager;
@ -285,12 +288,34 @@ public class Installer extends ModuleInstall {
File pythonModulesDir = new File(PlatformUtil.getUserPythonModulesPath()); File pythonModulesDir = new File(PlatformUtil.getUserPythonModulesPath());
pythonModulesDir.mkdir(); pythonModulesDir.mkdir();
} }
/**
* Make a folder in the config directory for Ocr Language Packs if one does
* not exist.
*/
private static void ensureOcrLanguagePacksFolderExists() {
File ocrLanguagePacksDir = new File(PlatformUtil.getOcrLanguagePacksPath());
boolean createDirectory = ocrLanguagePacksDir.mkdir();
//If the directory did not exist, copy the tessdata folder over so we
//support english.
if(createDirectory) {
File tessdataDir = InstalledFileLocator.getDefault().locate(
"Tesseract-OCR/tessdata", Installer.class.getPackage().getName(), false);
try {
FileUtils.copyDirectory(tessdataDir, ocrLanguagePacksDir);
} catch (IOException ex) {
logger.log(Level.SEVERE, "Copying over default language packs for Tesseract failed.", ex);
}
}
}
@Override @Override
public void restored() { public void restored() {
super.restored(); super.restored();
ensurePythonModulesFolderExists(); ensurePythonModulesFolderExists();
ensureClassifierFolderExists(); ensureClassifierFolderExists();
ensureOcrLanguagePacksFolderExists();
initJavaFx(); initJavaFx();
for (ModuleInstall mi : packageInstallers) { for (ModuleInstall mi : packageInstallers) {
try { try {

View File

@ -52,6 +52,7 @@ public class PlatformUtil {
private static final String PYTHON_MODULES_SUBDIRECTORY = "python_modules"; //NON-NLS private static final String PYTHON_MODULES_SUBDIRECTORY = "python_modules"; //NON-NLS
private static final String CLASSIFIERS_SUBDIRECTORY = "object_detection_classifiers"; //NON-NLS private static final String CLASSIFIERS_SUBDIRECTORY = "object_detection_classifiers"; //NON-NLS
private static final String OCR_LANGUAGE_SUBDIRECTORY = "ocr_language_packs"; //NON-NLS
private static String javaPath = null; private static String javaPath = null;
public static final String OS_NAME_UNKNOWN = NbBundle.getMessage(PlatformUtil.class, "PlatformUtil.nameUnknown"); public static final String OS_NAME_UNKNOWN = NbBundle.getMessage(PlatformUtil.class, "PlatformUtil.nameUnknown");
public static final String OS_VERSION_UNKNOWN = NbBundle.getMessage(PlatformUtil.class, "PlatformUtil.verUnknown"); public static final String OS_VERSION_UNKNOWN = NbBundle.getMessage(PlatformUtil.class, "PlatformUtil.verUnknown");
@ -116,6 +117,15 @@ public class PlatformUtil {
public static String getUserPythonModulesPath() { public static String getUserPythonModulesPath() {
return getUserDirectory().getAbsolutePath() + File.separator + PYTHON_MODULES_SUBDIRECTORY; return getUserDirectory().getAbsolutePath() + File.separator + PYTHON_MODULES_SUBDIRECTORY;
} }
/**
* Get root path where the user's Ocr language packs are stored.
*
* @return Absolute path to the Ocr language packs root directory.
*/
public static String getOcrLanguagePacksPath() {
return getUserDirectory().getAbsolutePath() + File.separator + OCR_LANGUAGE_SUBDIRECTORY;
}
/** /**
* Get root path where the user's object detection classifiers are stored. * Get root path where the user's object detection classifiers are stored.

View File

@ -202,14 +202,9 @@ final class TikaTextExtractor extends TextExtractor {
TesseractOCRConfig ocrConfig = new TesseractOCRConfig(); TesseractOCRConfig ocrConfig = new TesseractOCRConfig();
String tesseractFolder = TESSERACT_PATH.getParent(); String tesseractFolder = TESSERACT_PATH.getParent();
ocrConfig.setTesseractPath(tesseractFolder); ocrConfig.setTesseractPath(tesseractFolder);
/*
* Tesseract expects language data packs to be in a subdirectory
* of tesseractFolder, in a folder called "tessdata". If they
* are stored somewhere else, use
* ocrConfig.setTessdataPath(String tessdataPath) to point to
* them
*/
ocrConfig.setLanguage(LANGUAGE_PACKS); ocrConfig.setLanguage(LANGUAGE_PACKS);
ocrConfig.setTessdataPath(PlatformUtil.getOcrLanguagePacksPath());
parseContext.set(TesseractOCRConfig.class, ocrConfig); parseContext.set(TesseractOCRConfig.class, ocrConfig);
stream = new ReadContentInputStream(content); stream = new ReadContentInputStream(content);
@ -292,6 +287,7 @@ final class TikaTextExtractor extends TextExtractor {
process.command(executeablePath, process.command(executeablePath,
String.format("\"%s\"", inputFile.getAbsolutePath()), String.format("\"%s\"", inputFile.getAbsolutePath()),
String.format("\"%s\"", outputFilePath), String.format("\"%s\"", outputFilePath),
"--tessdata-dir", PlatformUtil.getOcrLanguagePacksPath(),
//language pack command flag //language pack command flag
"-l", LANGUAGE_PACKS); "-l", LANGUAGE_PACKS);
@ -450,10 +446,7 @@ final class TikaTextExtractor extends TextExtractor {
* @return String of all language packs available for Tesseract to use * @return String of all language packs available for Tesseract to use
*/ */
private static String getLanguagePacks() { private static String getLanguagePacks() {
File languagePackRootDir = new File(TESSERACT_PATH.getParent(), "tessdata"); File languagePackRootDir = new File(PlatformUtil.getOcrLanguagePacksPath());
if (!languagePackRootDir.exists()) {
return "";
}
List<String> languagePacks = new ArrayList<>(); List<String> languagePacks = new ArrayList<>();
for (File languagePack : languagePackRootDir.listFiles()) { for (File languagePack : languagePackRootDir.listFiles()) {