mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-12 16:06:15 +00:00
Merge pull request #6390 from sleuthkit/release-4.17.0
Merge release-4.17.0 branch into develop branch
This commit is contained in:
commit
fd20fb76a8
@ -62,6 +62,7 @@ public final class UserPreferences {
|
|||||||
private static final String MESSAGE_SERVICE_HOST = "MessageServiceHost"; //NON-NLS
|
private static final String MESSAGE_SERVICE_HOST = "MessageServiceHost"; //NON-NLS
|
||||||
private static final String MESSAGE_SERVICE_PORT = "MessageServicePort"; //NON-NLS
|
private static final String MESSAGE_SERVICE_PORT = "MessageServicePort"; //NON-NLS
|
||||||
public static final String TEXT_TRANSLATOR_NAME = "TextTranslatorName";
|
public static final String TEXT_TRANSLATOR_NAME = "TextTranslatorName";
|
||||||
|
public static final String OCR_TRANSLATION_ENABLED = "OcrTranslationEnabled";
|
||||||
public static final String PROCESS_TIME_OUT_ENABLED = "ProcessTimeOutEnabled"; //NON-NLS
|
public static final String PROCESS_TIME_OUT_ENABLED = "ProcessTimeOutEnabled"; //NON-NLS
|
||||||
public static final String PROCESS_TIME_OUT_HOURS = "ProcessTimeOutHours"; //NON-NLS
|
public static final String PROCESS_TIME_OUT_HOURS = "ProcessTimeOutHours"; //NON-NLS
|
||||||
private static final int DEFAULT_PROCESS_TIMEOUT_HR = 60;
|
private static final int DEFAULT_PROCESS_TIMEOUT_HR = 60;
|
||||||
@ -348,6 +349,14 @@ public final class UserPreferences {
|
|||||||
return preferences.get(TEXT_TRANSLATOR_NAME, null);
|
return preferences.get(TEXT_TRANSLATOR_NAME, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static void setUseOcrInTranslation(boolean enableOcr) {
|
||||||
|
preferences.putBoolean(OCR_TRANSLATION_ENABLED, enableOcr);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static boolean getUseOcrInTranslation() {
|
||||||
|
return preferences.getBoolean(OCR_TRANSLATION_ENABLED, true);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Persists message service connection info.
|
* Persists message service connection info.
|
||||||
*
|
*
|
||||||
|
@ -27,6 +27,7 @@ import javax.swing.text.ViewFactory;
|
|||||||
import javax.swing.text.html.HTMLEditorKit;
|
import javax.swing.text.html.HTMLEditorKit;
|
||||||
import javax.swing.text.html.InlineView;
|
import javax.swing.text.html.InlineView;
|
||||||
import javax.swing.text.html.ParagraphView;
|
import javax.swing.text.html.ParagraphView;
|
||||||
|
import org.sleuthkit.autopsy.coreutils.EscapeUtil;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* JTextPane extension that auto wraps input text using an HTMLEditorKit trick.
|
* JTextPane extension that auto wraps input text using an HTMLEditorKit trick.
|
||||||
@ -98,6 +99,6 @@ public class AutoWrappingJTextPane extends JTextPane {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void setText(String text) {
|
public void setText(String text) {
|
||||||
super.setText("<pre>" + text + "</pre>");
|
super.setText("<pre>" + EscapeUtil.escapeHtml(text) + "</pre>");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* Autopsy Forensic Browser
|
* Autopsy Forensic Browser
|
||||||
*
|
*
|
||||||
* Copyright 2011-2019 Basis Technology Corp.
|
* Copyright 2011-2020 Basis Technology Corp.
|
||||||
* Contact: carrier <at> sleuthkit <dot> org
|
* Contact: carrier <at> sleuthkit <dot> org
|
||||||
*
|
*
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
@ -46,7 +46,6 @@ import org.apache.tika.Tika;
|
|||||||
import org.apache.tika.exception.TikaException;
|
import org.apache.tika.exception.TikaException;
|
||||||
import org.apache.tika.metadata.Metadata;
|
import org.apache.tika.metadata.Metadata;
|
||||||
import org.apache.tika.parser.AutoDetectParser;
|
import org.apache.tika.parser.AutoDetectParser;
|
||||||
import org.apache.tika.parser.EmptyParser;
|
|
||||||
import org.apache.tika.parser.ParseContext;
|
import org.apache.tika.parser.ParseContext;
|
||||||
import org.apache.tika.parser.Parser;
|
import org.apache.tika.parser.Parser;
|
||||||
import org.apache.tika.parser.ParsingReader;
|
import org.apache.tika.parser.ParsingReader;
|
||||||
@ -72,6 +71,9 @@ import org.xml.sax.ContentHandler;
|
|||||||
import org.xml.sax.SAXException;
|
import org.xml.sax.SAXException;
|
||||||
import org.xml.sax.helpers.DefaultHandler;
|
import org.xml.sax.helpers.DefaultHandler;
|
||||||
import com.google.common.collect.ImmutableMap;
|
import com.google.common.collect.ImmutableMap;
|
||||||
|
import java.io.InputStreamReader;
|
||||||
|
import java.nio.charset.Charset;
|
||||||
|
import org.apache.tika.parser.pdf.PDFParserConfig.OCR_STRATEGY;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extracts text from Tika supported content. Protects against Tika parser hangs
|
* Extracts text from Tika supported content. Protects against Tika parser hangs
|
||||||
@ -126,16 +128,6 @@ final class TikaTextExtractor implements TextExtractor {
|
|||||||
"application/x-z", //NON-NLS
|
"application/x-z", //NON-NLS
|
||||||
"application/x-compress"); //NON-NLS
|
"application/x-compress"); //NON-NLS
|
||||||
|
|
||||||
//Tika should ignore types with embedded files that can be handled by the unpacking modules
|
|
||||||
private static final List<String> EMBEDDED_FILE_MIME_TYPES
|
|
||||||
= ImmutableList.of("application/msword", //NON-NLS
|
|
||||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.document", //NON-NLS
|
|
||||||
"application/vnd.ms-powerpoint", //NON-NLS
|
|
||||||
"application/vnd.openxmlformats-officedocument.presentationml.presentation", //NON-NLS
|
|
||||||
"application/vnd.ms-excel", //NON-NLS
|
|
||||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", //NON-NLS
|
|
||||||
"application/pdf"); //NON-NLS
|
|
||||||
|
|
||||||
// Used to log to the tika file that is why it uses the java.util.logging.logger class instead of the Autopsy one
|
// Used to log to the tika file that is why it uses the java.util.logging.logger class instead of the Autopsy one
|
||||||
private static final java.util.logging.Logger TIKA_LOGGER = java.util.logging.Logger.getLogger("Tika"); //NON-NLS
|
private static final java.util.logging.Logger TIKA_LOGGER = java.util.logging.Logger.getLogger("Tika"); //NON-NLS
|
||||||
private static final Logger AUTOPSY_LOGGER = Logger.getLogger(TikaTextExtractor.class.getName());
|
private static final Logger AUTOPSY_LOGGER = Logger.getLogger(TikaTextExtractor.class.getName());
|
||||||
@ -193,52 +185,31 @@ final class TikaTextExtractor implements TextExtractor {
|
|||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public Reader getReader() throws InitReaderException {
|
public Reader getReader() throws InitReaderException {
|
||||||
InputStream stream = null;
|
if (!this.isSupported()) {
|
||||||
|
throw new InitReaderException("Content is not supported");
|
||||||
|
}
|
||||||
|
|
||||||
ParseContext parseContext = new ParseContext();
|
// Only abstract files are supported, see isSupported()
|
||||||
|
final AbstractFile file = ((AbstractFile) content);
|
||||||
|
// This mime type must be non-null, see isSupported()
|
||||||
|
final String mimeType = file.getMIMEType();
|
||||||
|
|
||||||
//Disable appending embedded file text to output for EFE supported types
|
// Handle images seperately so the OCR task can be cancelled.
|
||||||
//JIRA-4975
|
// See JIRA-4519 for the need to have cancellation in the UI and ingest.
|
||||||
if(content instanceof AbstractFile && EMBEDDED_FILE_MIME_TYPES.contains(((AbstractFile)content).getMIMEType())) {
|
if (ocrEnabled() && mimeType.toLowerCase().startsWith("image/")) {
|
||||||
parseContext.set(Parser.class, new EmptyParser());
|
InputStream imageOcrStream = performOCR(file);
|
||||||
} else {
|
return new InputStreamReader(imageOcrStream, Charset.forName("UTF-8"));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set up Tika
|
||||||
|
final InputStream stream = new ReadContentInputStream(content);
|
||||||
|
final ParseContext parseContext = new ParseContext();
|
||||||
|
|
||||||
|
// Documents can contain other documents. By adding
|
||||||
|
// the parser back into the context, Tika will recursively
|
||||||
|
// parse embedded documents.
|
||||||
parseContext.set(Parser.class, parser);
|
parseContext.set(Parser.class, parser);
|
||||||
}
|
|
||||||
|
|
||||||
if (ocrEnabled() && content instanceof AbstractFile) {
|
|
||||||
AbstractFile file = ((AbstractFile) content);
|
|
||||||
//Run OCR on images with Tesseract directly.
|
|
||||||
if (file.getMIMEType().toLowerCase().startsWith("image/")) {
|
|
||||||
stream = performOCR(file);
|
|
||||||
} else {
|
|
||||||
//Otherwise, go through Tika for PDFs so that it can
|
|
||||||
//extract images and run Tesseract on them.
|
|
||||||
PDFParserConfig pdfConfig = new PDFParserConfig();
|
|
||||||
|
|
||||||
// Extracting the inline images and letting Tesseract run on each inline image.
|
|
||||||
// https://wiki.apache.org/tika/PDFParser%20%28Apache%20PDFBox%29
|
|
||||||
// https://tika.apache.org/1.7/api/org/apache/tika/parser/pdf/PDFParserConfig.html
|
|
||||||
pdfConfig.setExtractInlineImages(true);
|
|
||||||
// Multiple pages within a PDF file might refer to the same underlying image.
|
|
||||||
pdfConfig.setExtractUniqueInlineImagesOnly(true);
|
|
||||||
parseContext.set(PDFParserConfig.class, pdfConfig);
|
|
||||||
|
|
||||||
// Configure Tesseract parser to perform OCR
|
|
||||||
TesseractOCRConfig ocrConfig = new TesseractOCRConfig();
|
|
||||||
String tesseractFolder = TESSERACT_PATH.getParent();
|
|
||||||
ocrConfig.setTesseractPath(tesseractFolder);
|
|
||||||
|
|
||||||
ocrConfig.setLanguage(languagePacks);
|
|
||||||
ocrConfig.setTessdataPath(PlatformUtil.getOcrLanguagePacksPath());
|
|
||||||
parseContext.set(TesseractOCRConfig.class, ocrConfig);
|
|
||||||
|
|
||||||
stream = new ReadContentInputStream(content);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
stream = new ReadContentInputStream(content);
|
|
||||||
}
|
|
||||||
|
|
||||||
Metadata metadata = new Metadata();
|
|
||||||
// Use the more memory efficient Tika SAX parsers for DOCX and
|
// Use the more memory efficient Tika SAX parsers for DOCX and
|
||||||
// PPTX files (it already uses SAX for XLSX).
|
// PPTX files (it already uses SAX for XLSX).
|
||||||
OfficeParserConfig officeParserConfig = new OfficeParserConfig();
|
OfficeParserConfig officeParserConfig = new OfficeParserConfig();
|
||||||
@ -246,6 +217,30 @@ final class TikaTextExtractor implements TextExtractor {
|
|||||||
officeParserConfig.setUseSAXDocxExtractor(true);
|
officeParserConfig.setUseSAXDocxExtractor(true);
|
||||||
parseContext.set(OfficeParserConfig.class, officeParserConfig);
|
parseContext.set(OfficeParserConfig.class, officeParserConfig);
|
||||||
|
|
||||||
|
if (ocrEnabled()) {
|
||||||
|
// Configure OCR for Tika if it chooses to run OCR
|
||||||
|
// during extraction
|
||||||
|
TesseractOCRConfig ocrConfig = new TesseractOCRConfig();
|
||||||
|
String tesseractFolder = TESSERACT_PATH.getParent();
|
||||||
|
ocrConfig.setTesseractPath(tesseractFolder);
|
||||||
|
ocrConfig.setLanguage(languagePacks);
|
||||||
|
ocrConfig.setTessdataPath(PlatformUtil.getOcrLanguagePacksPath());
|
||||||
|
parseContext.set(TesseractOCRConfig.class, ocrConfig);
|
||||||
|
|
||||||
|
// Configure how Tika handles OCRing PDFs
|
||||||
|
PDFParserConfig pdfConfig = new PDFParserConfig();
|
||||||
|
|
||||||
|
// This stategy tries to pick between OCRing a page in the
|
||||||
|
// PDF and doing text extraction. It makes this choice by
|
||||||
|
// first running text extraction and then counting characters.
|
||||||
|
// If there are too few characters or too many unmapped
|
||||||
|
// unicode characters, it'll run the entire page through OCR
|
||||||
|
// and take that output instead. See JIRA-6938
|
||||||
|
pdfConfig.setOcrStrategy(OCR_STRATEGY.AUTO);
|
||||||
|
parseContext.set(PDFParserConfig.class, pdfConfig);
|
||||||
|
}
|
||||||
|
|
||||||
|
Metadata metadata = new Metadata();
|
||||||
//Make the creation of a TikaReader a cancellable future in case it takes too long
|
//Make the creation of a TikaReader a cancellable future in case it takes too long
|
||||||
Future<Reader> future = executorService.submit(
|
Future<Reader> future = executorService.submit(
|
||||||
new GetTikaReader(parser, stream, metadata, parseContext));
|
new GetTikaReader(parser, stream, metadata, parseContext));
|
||||||
|
@ -6,3 +6,4 @@ TranslationOptionsPanelController.moduleErr.msg=A module caused an error listeni
|
|||||||
TranslationContentPanel.showLabel.text=Show:
|
TranslationContentPanel.showLabel.text=Show:
|
||||||
TranslationOptionsPanel.translationServiceLabel.text=Text translator:
|
TranslationOptionsPanel.translationServiceLabel.text=Text translator:
|
||||||
TranslationOptionsPanel.translationOptionsDescription.text=Configure a 3rd party text translation service to enable text and file name translation.
|
TranslationOptionsPanel.translationOptionsDescription.text=Configure a 3rd party text translation service to enable text and file name translation.
|
||||||
|
TranslationOptionsPanel.enableOcrCheckBox.text=Enable Optical Character Recognition (OCR) in the translation content viewer
|
||||||
|
@ -10,3 +10,4 @@ TranslationOptionsPanelController.moduleErr.msg=A module caused an error listeni
|
|||||||
TranslationContentPanel.showLabel.text=Show:
|
TranslationContentPanel.showLabel.text=Show:
|
||||||
TranslationOptionsPanel.translationServiceLabel.text=Text translator:
|
TranslationOptionsPanel.translationServiceLabel.text=Text translator:
|
||||||
TranslationOptionsPanel.translationOptionsDescription.text=Configure a 3rd party text translation service to enable text and file name translation.
|
TranslationOptionsPanel.translationOptionsDescription.text=Configure a 3rd party text translation service to enable text and file name translation.
|
||||||
|
TranslationOptionsPanel.enableOcrCheckBox.text=Enable Optical Character Recognition (OCR) in the translation content viewer
|
||||||
|
@ -16,17 +16,23 @@
|
|||||||
<Layout>
|
<Layout>
|
||||||
<DimensionLayout dim="0">
|
<DimensionLayout dim="0">
|
||||||
<Group type="103" groupAlignment="0" attributes="0">
|
<Group type="103" groupAlignment="0" attributes="0">
|
||||||
|
<Component id="jSeparator1" alignment="0" max="32767" attributes="0"/>
|
||||||
<Group type="102" attributes="0">
|
<Group type="102" attributes="0">
|
||||||
<EmptySpace max="-2" attributes="0"/>
|
<EmptySpace max="-2" attributes="0"/>
|
||||||
<Group type="103" groupAlignment="0" attributes="0">
|
<Group type="103" groupAlignment="0" attributes="0">
|
||||||
<Component id="translationServicePanel" max="32767" attributes="0"/>
|
<Component id="translationServicePanel" max="32767" attributes="0"/>
|
||||||
|
<Component id="translationOptionsDescription" alignment="0" pref="462" max="32767" attributes="0"/>
|
||||||
|
<Group type="102" attributes="0">
|
||||||
|
<Group type="103" groupAlignment="0" attributes="0">
|
||||||
<Group type="102" attributes="0">
|
<Group type="102" attributes="0">
|
||||||
<Component id="translationServiceLabel" min="-2" max="-2" attributes="0"/>
|
<Component id="translationServiceLabel" min="-2" max="-2" attributes="0"/>
|
||||||
<EmptySpace min="-2" pref="10" max="-2" attributes="0"/>
|
<EmptySpace min="-2" pref="10" max="-2" attributes="0"/>
|
||||||
<Component id="translatorComboBox" min="-2" pref="214" max="-2" attributes="0"/>
|
<Component id="translatorComboBox" min="-2" pref="214" max="-2" attributes="0"/>
|
||||||
|
</Group>
|
||||||
|
<Component id="enableOcrCheckBox" alignment="0" min="-2" max="-2" attributes="0"/>
|
||||||
|
</Group>
|
||||||
<EmptySpace min="0" pref="0" max="32767" attributes="0"/>
|
<EmptySpace min="0" pref="0" max="32767" attributes="0"/>
|
||||||
</Group>
|
</Group>
|
||||||
<Component id="translationOptionsDescription" alignment="0" pref="462" max="32767" attributes="0"/>
|
|
||||||
</Group>
|
</Group>
|
||||||
<EmptySpace max="-2" attributes="0"/>
|
<EmptySpace max="-2" attributes="0"/>
|
||||||
</Group>
|
</Group>
|
||||||
@ -42,9 +48,13 @@
|
|||||||
<Component id="translatorComboBox" alignment="3" min="-2" max="-2" attributes="0"/>
|
<Component id="translatorComboBox" alignment="3" min="-2" max="-2" attributes="0"/>
|
||||||
<Component id="translationServiceLabel" alignment="3" min="-2" max="-2" attributes="0"/>
|
<Component id="translationServiceLabel" alignment="3" min="-2" max="-2" attributes="0"/>
|
||||||
</Group>
|
</Group>
|
||||||
<EmptySpace min="-2" max="-2" attributes="0"/>
|
<EmptySpace max="-2" attributes="0"/>
|
||||||
<Component id="translationServicePanel" max="32767" attributes="0"/>
|
<Component id="translationServicePanel" min="-2" max="-2" attributes="0"/>
|
||||||
<EmptySpace min="-2" max="-2" attributes="0"/>
|
<EmptySpace type="unrelated" max="-2" attributes="0"/>
|
||||||
|
<Component id="jSeparator1" min="-2" pref="10" max="-2" attributes="0"/>
|
||||||
|
<EmptySpace max="-2" attributes="0"/>
|
||||||
|
<Component id="enableOcrCheckBox" min="-2" max="-2" attributes="0"/>
|
||||||
|
<EmptySpace max="32767" attributes="0"/>
|
||||||
</Group>
|
</Group>
|
||||||
</Group>
|
</Group>
|
||||||
</DimensionLayout>
|
</DimensionLayout>
|
||||||
@ -76,5 +86,17 @@
|
|||||||
</Property>
|
</Property>
|
||||||
</Properties>
|
</Properties>
|
||||||
</Component>
|
</Component>
|
||||||
|
<Component class="javax.swing.JSeparator" name="jSeparator1">
|
||||||
|
</Component>
|
||||||
|
<Component class="javax.swing.JCheckBox" name="enableOcrCheckBox">
|
||||||
|
<Properties>
|
||||||
|
<Property name="text" type="java.lang.String" editor="org.netbeans.modules.i18n.form.FormI18nStringEditor">
|
||||||
|
<ResourceString bundle="org/sleuthkit/autopsy/texttranslation/Bundle.properties" key="TranslationOptionsPanel.enableOcrCheckBox.text" replaceFormat="org.openide.util.NbBundle.getMessage({sourceFileName}.class, "{key}")"/>
|
||||||
|
</Property>
|
||||||
|
</Properties>
|
||||||
|
<Events>
|
||||||
|
<EventHandler event="actionPerformed" listener="java.awt.event.ActionListener" parameters="java.awt.event.ActionEvent" handler="enableOcrCheckBoxActionPerformed"/>
|
||||||
|
</Events>
|
||||||
|
</Component>
|
||||||
</SubComponents>
|
</SubComponents>
|
||||||
</Form>
|
</Form>
|
@ -111,6 +111,7 @@ final class TranslationOptionsPanel extends javax.swing.JPanel {
|
|||||||
}
|
}
|
||||||
translatorComboBox.setSelectedItem(currentSelection);
|
translatorComboBox.setSelectedItem(currentSelection);
|
||||||
loadSelectedPanelSettings();
|
loadSelectedPanelSettings();
|
||||||
|
enableOcrCheckBox.setSelected(UserPreferences.getUseOcrInTranslation());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -128,6 +129,8 @@ final class TranslationOptionsPanel extends javax.swing.JPanel {
|
|||||||
logger.log(Level.WARNING, "Unable to save settings for TextTranslator named: " + currentSelection, ex);
|
logger.log(Level.WARNING, "Unable to save settings for TextTranslator named: " + currentSelection, ex);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Save whether OCR is enabled in the content viewer
|
||||||
|
UserPreferences.setUseOcrInTranslation(enableOcrCheckBox.isSelected());
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -144,6 +147,8 @@ final class TranslationOptionsPanel extends javax.swing.JPanel {
|
|||||||
translationServiceLabel = new javax.swing.JLabel();
|
translationServiceLabel = new javax.swing.JLabel();
|
||||||
translationServicePanel = new javax.swing.JPanel();
|
translationServicePanel = new javax.swing.JPanel();
|
||||||
translationOptionsDescription = new javax.swing.JLabel();
|
translationOptionsDescription = new javax.swing.JLabel();
|
||||||
|
jSeparator1 = new javax.swing.JSeparator();
|
||||||
|
enableOcrCheckBox = new javax.swing.JCheckBox();
|
||||||
|
|
||||||
translatorComboBox.addActionListener(new java.awt.event.ActionListener() {
|
translatorComboBox.addActionListener(new java.awt.event.ActionListener() {
|
||||||
public void actionPerformed(java.awt.event.ActionEvent evt) {
|
public void actionPerformed(java.awt.event.ActionEvent evt) {
|
||||||
@ -157,20 +162,31 @@ final class TranslationOptionsPanel extends javax.swing.JPanel {
|
|||||||
|
|
||||||
org.openide.awt.Mnemonics.setLocalizedText(translationOptionsDescription, org.openide.util.NbBundle.getMessage(TranslationOptionsPanel.class, "TranslationOptionsPanel.translationOptionsDescription.text")); // NOI18N
|
org.openide.awt.Mnemonics.setLocalizedText(translationOptionsDescription, org.openide.util.NbBundle.getMessage(TranslationOptionsPanel.class, "TranslationOptionsPanel.translationOptionsDescription.text")); // NOI18N
|
||||||
|
|
||||||
|
org.openide.awt.Mnemonics.setLocalizedText(enableOcrCheckBox, org.openide.util.NbBundle.getMessage(TranslationOptionsPanel.class, "TranslationOptionsPanel.enableOcrCheckBox.text")); // NOI18N
|
||||||
|
enableOcrCheckBox.addActionListener(new java.awt.event.ActionListener() {
|
||||||
|
public void actionPerformed(java.awt.event.ActionEvent evt) {
|
||||||
|
enableOcrCheckBoxActionPerformed(evt);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
javax.swing.GroupLayout layout = new javax.swing.GroupLayout(this);
|
javax.swing.GroupLayout layout = new javax.swing.GroupLayout(this);
|
||||||
this.setLayout(layout);
|
this.setLayout(layout);
|
||||||
layout.setHorizontalGroup(
|
layout.setHorizontalGroup(
|
||||||
layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
|
layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
|
||||||
|
.addComponent(jSeparator1)
|
||||||
.addGroup(layout.createSequentialGroup()
|
.addGroup(layout.createSequentialGroup()
|
||||||
.addContainerGap()
|
.addContainerGap()
|
||||||
.addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
|
.addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
|
||||||
.addComponent(translationServicePanel, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE)
|
.addComponent(translationServicePanel, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE)
|
||||||
|
.addComponent(translationOptionsDescription, javax.swing.GroupLayout.DEFAULT_SIZE, 462, Short.MAX_VALUE)
|
||||||
|
.addGroup(layout.createSequentialGroup()
|
||||||
|
.addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
|
||||||
.addGroup(layout.createSequentialGroup()
|
.addGroup(layout.createSequentialGroup()
|
||||||
.addComponent(translationServiceLabel)
|
.addComponent(translationServiceLabel)
|
||||||
.addGap(10, 10, 10)
|
.addGap(10, 10, 10)
|
||||||
.addComponent(translatorComboBox, javax.swing.GroupLayout.PREFERRED_SIZE, 214, javax.swing.GroupLayout.PREFERRED_SIZE)
|
.addComponent(translatorComboBox, javax.swing.GroupLayout.PREFERRED_SIZE, 214, javax.swing.GroupLayout.PREFERRED_SIZE))
|
||||||
.addGap(0, 0, Short.MAX_VALUE))
|
.addComponent(enableOcrCheckBox))
|
||||||
.addComponent(translationOptionsDescription, javax.swing.GroupLayout.PREFERRED_SIZE, 462, Short.MAX_VALUE))
|
.addGap(0, 0, Short.MAX_VALUE)))
|
||||||
.addContainerGap())
|
.addContainerGap())
|
||||||
);
|
);
|
||||||
layout.setVerticalGroup(
|
layout.setVerticalGroup(
|
||||||
@ -183,8 +199,12 @@ final class TranslationOptionsPanel extends javax.swing.JPanel {
|
|||||||
.addComponent(translatorComboBox, javax.swing.GroupLayout.PREFERRED_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.PREFERRED_SIZE)
|
.addComponent(translatorComboBox, javax.swing.GroupLayout.PREFERRED_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.PREFERRED_SIZE)
|
||||||
.addComponent(translationServiceLabel))
|
.addComponent(translationServiceLabel))
|
||||||
.addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
|
.addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
|
||||||
.addComponent(translationServicePanel, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE)
|
.addComponent(translationServicePanel, javax.swing.GroupLayout.PREFERRED_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.PREFERRED_SIZE)
|
||||||
.addContainerGap())
|
.addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.UNRELATED)
|
||||||
|
.addComponent(jSeparator1, javax.swing.GroupLayout.PREFERRED_SIZE, 10, javax.swing.GroupLayout.PREFERRED_SIZE)
|
||||||
|
.addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
|
||||||
|
.addComponent(enableOcrCheckBox)
|
||||||
|
.addContainerGap(javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE))
|
||||||
);
|
);
|
||||||
}// </editor-fold>//GEN-END:initComponents
|
}// </editor-fold>//GEN-END:initComponents
|
||||||
|
|
||||||
@ -192,8 +212,14 @@ final class TranslationOptionsPanel extends javax.swing.JPanel {
|
|||||||
updatePanel();
|
updatePanel();
|
||||||
}//GEN-LAST:event_translatorComboBoxActionPerformed
|
}//GEN-LAST:event_translatorComboBoxActionPerformed
|
||||||
|
|
||||||
|
private void enableOcrCheckBoxActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_enableOcrCheckBoxActionPerformed
|
||||||
|
controller.changed();
|
||||||
|
}//GEN-LAST:event_enableOcrCheckBoxActionPerformed
|
||||||
|
|
||||||
|
|
||||||
// Variables declaration - do not modify//GEN-BEGIN:variables
|
// Variables declaration - do not modify//GEN-BEGIN:variables
|
||||||
|
private javax.swing.JCheckBox enableOcrCheckBox;
|
||||||
|
private javax.swing.JSeparator jSeparator1;
|
||||||
private javax.swing.JLabel translationOptionsDescription;
|
private javax.swing.JLabel translationOptionsDescription;
|
||||||
private javax.swing.JLabel translationServiceLabel;
|
private javax.swing.JLabel translationServiceLabel;
|
||||||
private javax.swing.JPanel translationServicePanel;
|
private javax.swing.JPanel translationServicePanel;
|
||||||
|
@ -8,6 +8,7 @@ TranslatedContentViewer.errorExtractingText=An error occurred while extracting t
|
|||||||
TranslatedContentViewer.extractingText=Extracting text, please wait...
|
TranslatedContentViewer.extractingText=Extracting text, please wait...
|
||||||
TranslatedContentViewer.fileHasNoText=File has no text.
|
TranslatedContentViewer.fileHasNoText=File has no text.
|
||||||
TranslatedContentViewer.noServiceProvider=The machine translation software was not found.
|
TranslatedContentViewer.noServiceProvider=The machine translation software was not found.
|
||||||
|
TranslatedContentViewer.ocrNotEnabled=OCR is not enabled. To change, go to Tools->Options->Machine Translation
|
||||||
TranslatedContentViewer.translatingText=Translating text, please wait...
|
TranslatedContentViewer.translatingText=Translating text, please wait...
|
||||||
# {0} - exception message
|
# {0} - exception message
|
||||||
TranslatedContentViewer.translationException=An error occurred while translating the text ({0}).
|
TranslatedContentViewer.translationException=An error occurred while translating the text ({0}).
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* Autopsy Forensic Browser
|
* Autopsy Forensic Browser
|
||||||
*
|
*
|
||||||
* Copyright 2019 Basis Technology Corp.
|
* Copyright 2020 Basis Technology Corp.
|
||||||
* Contact: carrier <at> sleuthkit <dot> org
|
* Contact: carrier <at> sleuthkit <dot> org
|
||||||
*
|
*
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
@ -38,16 +38,15 @@ import org.sleuthkit.datamodel.AbstractFile;
|
|||||||
import org.openide.util.Lookup;
|
import org.openide.util.Lookup;
|
||||||
import org.openide.util.NbBundle;
|
import org.openide.util.NbBundle;
|
||||||
import org.openide.util.lookup.Lookups;
|
import org.openide.util.lookup.Lookups;
|
||||||
import org.sleuthkit.autopsy.corecomponents.DataContentViewerUtility;
|
|
||||||
import org.sleuthkit.autopsy.coreutils.ExecUtil.ProcessTerminator;
|
import org.sleuthkit.autopsy.coreutils.ExecUtil.ProcessTerminator;
|
||||||
import org.sleuthkit.autopsy.textextractors.TextExtractor;
|
import org.sleuthkit.autopsy.textextractors.TextExtractor;
|
||||||
import org.sleuthkit.autopsy.textextractors.TextExtractorFactory;
|
import org.sleuthkit.autopsy.textextractors.TextExtractorFactory;
|
||||||
import org.sleuthkit.autopsy.textextractors.configs.ImageConfig;
|
import org.sleuthkit.autopsy.textextractors.configs.ImageConfig;
|
||||||
import org.sleuthkit.autopsy.texttranslation.TextTranslationService;
|
import org.sleuthkit.autopsy.texttranslation.TextTranslationService;
|
||||||
import org.sleuthkit.datamodel.Content;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.logging.Level;
|
import java.util.logging.Level;
|
||||||
import javax.swing.SwingUtilities;
|
import javax.swing.SwingUtilities;
|
||||||
|
import org.sleuthkit.autopsy.core.UserPreferences;
|
||||||
import org.sleuthkit.autopsy.coreutils.Logger;
|
import org.sleuthkit.autopsy.coreutils.Logger;
|
||||||
import org.sleuthkit.autopsy.coreutils.PlatformUtil;
|
import org.sleuthkit.autopsy.coreutils.PlatformUtil;
|
||||||
import org.sleuthkit.autopsy.texttranslation.ui.TranslationContentPanel.DisplayDropdownOptions;
|
import org.sleuthkit.autopsy.texttranslation.ui.TranslationContentPanel.DisplayDropdownOptions;
|
||||||
@ -60,8 +59,6 @@ public final class TranslatedTextViewer implements TextViewer {
|
|||||||
|
|
||||||
private static final Logger logger = Logger.getLogger(TranslatedTextViewer.class.getName());
|
private static final Logger logger = Logger.getLogger(TranslatedTextViewer.class.getName());
|
||||||
|
|
||||||
private static final boolean OCR_ENABLED = true;
|
|
||||||
private static final boolean OCR_DISABLED = false;
|
|
||||||
private static final int MAX_EXTRACT_SIZE_BYTES = 25600;
|
private static final int MAX_EXTRACT_SIZE_BYTES = 25600;
|
||||||
private static final List<String> INSTALLED_LANGUAGE_PACKS = PlatformUtil.getOcrLanguagePacks();
|
private static final List<String> INSTALLED_LANGUAGE_PACKS = PlatformUtil.getOcrLanguagePacks();
|
||||||
private final TranslationContentPanel panel = new TranslationContentPanel();
|
private final TranslationContentPanel panel = new TranslationContentPanel();
|
||||||
@ -81,15 +78,10 @@ public final class TranslatedTextViewer implements TextViewer {
|
|||||||
SelectionChangeListener displayDropDownListener = new DisplayDropDownChangeListener();
|
SelectionChangeListener displayDropDownListener = new DisplayDropDownChangeListener();
|
||||||
panel.addDisplayTextActionListener(displayDropDownListener);
|
panel.addDisplayTextActionListener(displayDropDownListener);
|
||||||
panel.addOcrDropDownActionListener(new OCRDropdownChangeListener());
|
panel.addOcrDropDownActionListener(new OCRDropdownChangeListener());
|
||||||
Content source = DataContentViewerUtility.getDefaultContent(node);
|
if (UserPreferences.getUseOcrInTranslation()) {
|
||||||
|
|
||||||
if (source instanceof AbstractFile) {
|
|
||||||
boolean isImage = ((AbstractFile) source).getMIMEType().toLowerCase().startsWith("image/");
|
|
||||||
if (isImage) {
|
|
||||||
panel.enableOCRSelection(OCR_ENABLED);
|
|
||||||
panel.addLanguagePackNames(INSTALLED_LANGUAGE_PACKS);
|
panel.addLanguagePackNames(INSTALLED_LANGUAGE_PACKS);
|
||||||
}
|
}
|
||||||
}
|
panel.enableOCRSelection(UserPreferences.getUseOcrInTranslation());
|
||||||
|
|
||||||
int payloadMaxInKB = TextTranslationService.getInstance().getMaxTextChars() / 1000;
|
int payloadMaxInKB = TextTranslationService.getInstance().getMaxTextChars() / 1000;
|
||||||
panel.setWarningLabelMsg(String.format(Bundle.TranslatedTextViewer_maxPayloadSize(), payloadMaxInKB));
|
panel.setWarningLabelMsg(String.format(Bundle.TranslatedTextViewer_maxPayloadSize(), payloadMaxInKB));
|
||||||
@ -201,15 +193,17 @@ public final class TranslatedTextViewer implements TextViewer {
|
|||||||
* @throws
|
* @throws
|
||||||
* org.sleuthkit.autopsy.textextractors.TextExtractor.InitReaderException
|
* org.sleuthkit.autopsy.textextractors.TextExtractor.InitReaderException
|
||||||
*/
|
*/
|
||||||
|
@NbBundle.Messages({
|
||||||
|
"TranslatedContentViewer.ocrNotEnabled=OCR is not enabled. To change, go to Tools->Options->Machine Translation",
|
||||||
|
})
|
||||||
private String getFileText(AbstractFile file) throws IOException, InterruptedException, TextExtractor.InitReaderException {
|
private String getFileText(AbstractFile file) throws IOException, InterruptedException, TextExtractor.InitReaderException {
|
||||||
final boolean isImage = file.getMIMEType().toLowerCase().startsWith("image/"); // NON-NLS
|
final boolean isImage = file.getMIMEType().toLowerCase().startsWith("image/"); // NON-NLS
|
||||||
String result;
|
if (isImage && ! UserPreferences.getUseOcrInTranslation()) {
|
||||||
if (isImage) {
|
return Bundle.TranslatedContentViewer_ocrNotEnabled();
|
||||||
result = extractText(file, OCR_ENABLED);
|
|
||||||
} else {
|
|
||||||
result = extractText(file, OCR_DISABLED);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
String result = extractText(file, UserPreferences.getUseOcrInTranslation());
|
||||||
|
|
||||||
//Correct for UTF-8
|
//Correct for UTF-8
|
||||||
byte[] resultInUTF8Bytes = result.getBytes("UTF8");
|
byte[] resultInUTF8Bytes = result.getBytes("UTF8");
|
||||||
byte[] trimToArraySize = Arrays.copyOfRange(resultInUTF8Bytes, 0,
|
byte[] trimToArraySize = Arrays.copyOfRange(resultInUTF8Bytes, 0,
|
||||||
|
Binary file not shown.
Before Width: | Height: | Size: 47 KiB After Width: | Height: | Size: 60 KiB |
BIN
docs/doxygen-user/images/mt_ocr_image.png
Normal file
BIN
docs/doxygen-user/images/mt_ocr_image.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 136 KiB |
BIN
docs/doxygen-user/images/mt_ocr_result.png
Normal file
BIN
docs/doxygen-user/images/mt_ocr_result.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 17 KiB |
@ -57,6 +57,7 @@ The "Indexed Text" tab shows the results when running the keyword search module
|
|||||||
|
|
||||||
\image html keyword-search-ocr-indexed-text.png
|
\image html keyword-search-ocr-indexed-text.png
|
||||||
|
|
||||||
|
\anchor keyword_search_ocr_config
|
||||||
By default, OCR is only configured for English text. Its configuration depends on the presence of language files (called "traineddata" files)
|
By default, OCR is only configured for English text. Its configuration depends on the presence of language files (called "traineddata" files)
|
||||||
that exist in a location that Autopsy can understand. To add support for more languages, you will need to download additional "traineddata"
|
that exist in a location that Autopsy can understand. To add support for more languages, you will need to download additional "traineddata"
|
||||||
and move them to the right location. The following steps breakdown this process for you:
|
and move them to the right location. The following steps breakdown this process for you:
|
||||||
|
@ -17,6 +17,8 @@ To set up a machine translation service, go to Options->Tools and then select th
|
|||||||
|
|
||||||
Each service will require slightly different configuration steps. After setting everything up, you can run a quick check that the service is set up correctly using the "Test" button.
|
Each service will require slightly different configuration steps. After setting everything up, you can run a quick check that the service is set up correctly using the "Test" button.
|
||||||
|
|
||||||
|
The checkbox at the bottom allows you to enable or disable optical character recognition (OCR). When enabled, if you select an image in the \ref mt_content_viewer "content viewer" Autopsy will use OCR to attempt to extract text to be translated. Instructions for installing OCR packages for different languages can be found on the \ref keyword_search_ocr_config "Keyword Search page".
|
||||||
|
|
||||||
\section mt_file_names Translating File Names
|
\section mt_file_names Translating File Names
|
||||||
|
|
||||||
You can use machine translation to automatically translate file and folder names, such as the ones seen below:
|
You can use machine translation to automatically translate file and folder names, such as the ones seen below:
|
||||||
@ -49,4 +51,12 @@ Then use the drop-down menu on the right to change from "Original Text" to "Tran
|
|||||||
|
|
||||||
\image html mt_message_translated.png
|
\image html mt_message_translated.png
|
||||||
|
|
||||||
|
If you've enabled OCR as described in the \ref mt_config section above, you can extract and translate text from images. Here is an image containing the beginning of a French poem:
|
||||||
|
|
||||||
|
\image html mt_ocr_image.png
|
||||||
|
|
||||||
|
If you go to the Text tab and then the Translation viewer it will use OCR to read text from the image and then display the translation.
|
||||||
|
|
||||||
|
\image html mt_ocr_result.png
|
||||||
|
|
||||||
*/
|
*/
|
Loading…
x
Reference in New Issue
Block a user