mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-12 16:06:15 +00:00
Move language pack locations to AppData
This commit is contained in:
parent
3c99771aba
commit
ec06a54dbf
@ -20,6 +20,7 @@ package org.sleuthkit.autopsy.core;
|
||||
|
||||
import java.awt.Cursor;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.Callable;
|
||||
@ -29,6 +30,8 @@ import java.util.logging.Handler;
|
||||
import java.util.logging.Level;
|
||||
import javafx.application.Platform;
|
||||
import javafx.embed.swing.JFXPanel;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.openide.modules.InstalledFileLocator;
|
||||
import org.openide.modules.ModuleInstall;
|
||||
import org.openide.util.NbBundle;
|
||||
import org.openide.windows.WindowManager;
|
||||
@ -286,11 +289,33 @@ public class Installer extends ModuleInstall {
|
||||
pythonModulesDir.mkdir();
|
||||
}
|
||||
|
||||
/**
|
||||
* Make a folder in the config directory for Ocr Language Packs if one does
|
||||
* not exist.
|
||||
*/
|
||||
private static void ensureOcrLanguagePacksFolderExists() {
|
||||
File ocrLanguagePacksDir = new File(PlatformUtil.getOcrLanguagePacksPath());
|
||||
boolean createDirectory = ocrLanguagePacksDir.mkdir();
|
||||
|
||||
//If the directory did not exist, copy the tessdata folder over so we
|
||||
//support english.
|
||||
if(createDirectory) {
|
||||
File tessdataDir = InstalledFileLocator.getDefault().locate(
|
||||
"Tesseract-OCR/tessdata", Installer.class.getPackage().getName(), false);
|
||||
try {
|
||||
FileUtils.copyDirectory(tessdataDir, ocrLanguagePacksDir);
|
||||
} catch (IOException ex) {
|
||||
logger.log(Level.SEVERE, "Copying over default language packs for Tesseract failed.", ex);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void restored() {
|
||||
super.restored();
|
||||
ensurePythonModulesFolderExists();
|
||||
ensureClassifierFolderExists();
|
||||
ensureOcrLanguagePacksFolderExists();
|
||||
initJavaFx();
|
||||
for (ModuleInstall mi : packageInstallers) {
|
||||
try {
|
||||
|
@ -52,6 +52,7 @@ public class PlatformUtil {
|
||||
|
||||
private static final String PYTHON_MODULES_SUBDIRECTORY = "python_modules"; //NON-NLS
|
||||
private static final String CLASSIFIERS_SUBDIRECTORY = "object_detection_classifiers"; //NON-NLS
|
||||
private static final String OCR_LANGUAGE_SUBDIRECTORY = "ocr_language_packs"; //NON-NLS
|
||||
private static String javaPath = null;
|
||||
public static final String OS_NAME_UNKNOWN = NbBundle.getMessage(PlatformUtil.class, "PlatformUtil.nameUnknown");
|
||||
public static final String OS_VERSION_UNKNOWN = NbBundle.getMessage(PlatformUtil.class, "PlatformUtil.verUnknown");
|
||||
@ -117,6 +118,15 @@ public class PlatformUtil {
|
||||
return getUserDirectory().getAbsolutePath() + File.separator + PYTHON_MODULES_SUBDIRECTORY;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get root path where the user's Ocr language packs are stored.
|
||||
*
|
||||
* @return Absolute path to the Ocr language packs root directory.
|
||||
*/
|
||||
public static String getOcrLanguagePacksPath() {
|
||||
return getUserDirectory().getAbsolutePath() + File.separator + OCR_LANGUAGE_SUBDIRECTORY;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get root path where the user's object detection classifiers are stored.
|
||||
*
|
||||
|
@ -202,14 +202,9 @@ final class TikaTextExtractor extends TextExtractor {
|
||||
TesseractOCRConfig ocrConfig = new TesseractOCRConfig();
|
||||
String tesseractFolder = TESSERACT_PATH.getParent();
|
||||
ocrConfig.setTesseractPath(tesseractFolder);
|
||||
/*
|
||||
* Tesseract expects language data packs to be in a subdirectory
|
||||
* of tesseractFolder, in a folder called "tessdata". If they
|
||||
* are stored somewhere else, use
|
||||
* ocrConfig.setTessdataPath(String tessdataPath) to point to
|
||||
* them
|
||||
*/
|
||||
|
||||
ocrConfig.setLanguage(LANGUAGE_PACKS);
|
||||
ocrConfig.setTessdataPath(PlatformUtil.getOcrLanguagePacksPath());
|
||||
parseContext.set(TesseractOCRConfig.class, ocrConfig);
|
||||
|
||||
stream = new ReadContentInputStream(content);
|
||||
@ -292,6 +287,7 @@ final class TikaTextExtractor extends TextExtractor {
|
||||
process.command(executeablePath,
|
||||
String.format("\"%s\"", inputFile.getAbsolutePath()),
|
||||
String.format("\"%s\"", outputFilePath),
|
||||
"--tessdata-dir", PlatformUtil.getOcrLanguagePacksPath(),
|
||||
//language pack command flag
|
||||
"-l", LANGUAGE_PACKS);
|
||||
|
||||
@ -450,10 +446,7 @@ final class TikaTextExtractor extends TextExtractor {
|
||||
* @return String of all language packs available for Tesseract to use
|
||||
*/
|
||||
private static String getLanguagePacks() {
|
||||
File languagePackRootDir = new File(TESSERACT_PATH.getParent(), "tessdata");
|
||||
if (!languagePackRootDir.exists()) {
|
||||
return "";
|
||||
}
|
||||
File languagePackRootDir = new File(PlatformUtil.getOcrLanguagePacksPath());
|
||||
|
||||
List<String> languagePacks = new ArrayList<>();
|
||||
for (File languagePack : languagePackRootDir.listFiles()) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user