Enable OCR in translation tab

This commit is contained in:
apriestman 2020-10-09 16:38:09 -04:00
parent 2d239849b1
commit 12df9b574d
12 changed files with 98 additions and 19 deletions

View File

@ -62,6 +62,7 @@ public final class UserPreferences {
private static final String MESSAGE_SERVICE_HOST = "MessageServiceHost"; //NON-NLS
private static final String MESSAGE_SERVICE_PORT = "MessageServicePort"; //NON-NLS
public static final String TEXT_TRANSLATOR_NAME = "TextTranslatorName";
public static final String OCR_TRANSLATION_ENABLED = "OcrTranslationEnabled";
public static final String PROCESS_TIME_OUT_ENABLED = "ProcessTimeOutEnabled"; //NON-NLS
public static final String PROCESS_TIME_OUT_HOURS = "ProcessTimeOutHours"; //NON-NLS
private static final int DEFAULT_PROCESS_TIMEOUT_HR = 60;
@ -348,6 +349,14 @@ public final class UserPreferences {
return preferences.get(TEXT_TRANSLATOR_NAME, null);
}
public static void setUseOcrInTranslation(boolean enableOcr) {
preferences.putBoolean(OCR_TRANSLATION_ENABLED, enableOcr);
}
public static boolean getUseOcrInTranslation() {
return preferences.getBoolean(OCR_TRANSLATION_ENABLED, true);
}
/**
* Persists message service connection info.
*

View File

@ -6,3 +6,4 @@ TranslationOptionsPanelController.moduleErr.msg=A module caused an error listeni
TranslationContentPanel.showLabel.text=Show:
TranslationOptionsPanel.translationServiceLabel.text=Text translator:
TranslationOptionsPanel.translationOptionsDescription.text=Configure a 3rd party text translation service to enable text and file name translation.
TranslationOptionsPanel.enableOcrCheckBox.text=Enable Optical Character Recognition (OCR) in the translation content viewer

View File

@ -10,3 +10,4 @@ TranslationOptionsPanelController.moduleErr.msg=A module caused an error listeni
TranslationContentPanel.showLabel.text=Show:
TranslationOptionsPanel.translationServiceLabel.text=Text translator:
TranslationOptionsPanel.translationOptionsDescription.text=Configure a 3rd party text translation service to enable text and file name translation.
TranslationOptionsPanel.enableOcrCheckBox.text=Enable Optical Character Recognition (OCR) in the translation content viewer

View File

@ -16,17 +16,23 @@
<Layout>
<DimensionLayout dim="0">
<Group type="103" groupAlignment="0" attributes="0">
<Component id="jSeparator1" alignment="0" max="32767" attributes="0"/>
<Group type="102" attributes="0">
<EmptySpace max="-2" attributes="0"/>
<Group type="103" groupAlignment="0" attributes="0">
<Component id="translationServicePanel" max="32767" attributes="0"/>
<Component id="translationOptionsDescription" alignment="0" pref="462" max="32767" attributes="0"/>
<Group type="102" attributes="0">
<Group type="103" groupAlignment="0" attributes="0">
<Group type="102" attributes="0">
<Component id="translationServiceLabel" min="-2" max="-2" attributes="0"/>
<EmptySpace min="-2" pref="10" max="-2" attributes="0"/>
<Component id="translatorComboBox" min="-2" pref="214" max="-2" attributes="0"/>
</Group>
<Component id="enableOcrCheckBox" alignment="0" min="-2" max="-2" attributes="0"/>
</Group>
<EmptySpace min="0" pref="0" max="32767" attributes="0"/>
</Group>
<Component id="translationOptionsDescription" alignment="0" pref="462" max="32767" attributes="0"/>
</Group>
<EmptySpace max="-2" attributes="0"/>
</Group>
@ -42,9 +48,13 @@
<Component id="translatorComboBox" alignment="3" min="-2" max="-2" attributes="0"/>
<Component id="translationServiceLabel" alignment="3" min="-2" max="-2" attributes="0"/>
</Group>
<EmptySpace min="-2" max="-2" attributes="0"/>
<Component id="translationServicePanel" max="32767" attributes="0"/>
<EmptySpace min="-2" max="-2" attributes="0"/>
<EmptySpace max="-2" attributes="0"/>
<Component id="translationServicePanel" min="-2" max="-2" attributes="0"/>
<EmptySpace type="unrelated" max="-2" attributes="0"/>
<Component id="jSeparator1" min="-2" pref="10" max="-2" attributes="0"/>
<EmptySpace max="-2" attributes="0"/>
<Component id="enableOcrCheckBox" min="-2" max="-2" attributes="0"/>
<EmptySpace max="32767" attributes="0"/>
</Group>
</Group>
</DimensionLayout>
@ -76,5 +86,17 @@
</Property>
</Properties>
</Component>
<Component class="javax.swing.JSeparator" name="jSeparator1">
</Component>
<Component class="javax.swing.JCheckBox" name="enableOcrCheckBox">
<Properties>
<Property name="text" type="java.lang.String" editor="org.netbeans.modules.i18n.form.FormI18nStringEditor">
<ResourceString bundle="org/sleuthkit/autopsy/texttranslation/Bundle.properties" key="TranslationOptionsPanel.enableOcrCheckBox.text" replaceFormat="org.openide.util.NbBundle.getMessage({sourceFileName}.class, &quot;{key}&quot;)"/>
</Property>
</Properties>
<Events>
<EventHandler event="actionPerformed" listener="java.awt.event.ActionListener" parameters="java.awt.event.ActionEvent" handler="enableOcrCheckBoxActionPerformed"/>
</Events>
</Component>
</SubComponents>
</Form>

View File

@ -111,6 +111,7 @@ final class TranslationOptionsPanel extends javax.swing.JPanel {
}
translatorComboBox.setSelectedItem(currentSelection);
loadSelectedPanelSettings();
enableOcrCheckBox.setSelected(UserPreferences.getUseOcrInTranslation());
}
/**
@ -128,6 +129,8 @@ final class TranslationOptionsPanel extends javax.swing.JPanel {
logger.log(Level.WARNING, "Unable to save settings for TextTranslator named: " + currentSelection, ex);
}
}
// Save whether OCR is enabled in the content viewer
UserPreferences.setUseOcrInTranslation(enableOcrCheckBox.isSelected());
}
@ -144,6 +147,8 @@ final class TranslationOptionsPanel extends javax.swing.JPanel {
translationServiceLabel = new javax.swing.JLabel();
translationServicePanel = new javax.swing.JPanel();
translationOptionsDescription = new javax.swing.JLabel();
jSeparator1 = new javax.swing.JSeparator();
enableOcrCheckBox = new javax.swing.JCheckBox();
translatorComboBox.addActionListener(new java.awt.event.ActionListener() {
public void actionPerformed(java.awt.event.ActionEvent evt) {
@ -157,20 +162,31 @@ final class TranslationOptionsPanel extends javax.swing.JPanel {
org.openide.awt.Mnemonics.setLocalizedText(translationOptionsDescription, org.openide.util.NbBundle.getMessage(TranslationOptionsPanel.class, "TranslationOptionsPanel.translationOptionsDescription.text")); // NOI18N
org.openide.awt.Mnemonics.setLocalizedText(enableOcrCheckBox, org.openide.util.NbBundle.getMessage(TranslationOptionsPanel.class, "TranslationOptionsPanel.enableOcrCheckBox.text")); // NOI18N
enableOcrCheckBox.addActionListener(new java.awt.event.ActionListener() {
public void actionPerformed(java.awt.event.ActionEvent evt) {
enableOcrCheckBoxActionPerformed(evt);
}
});
javax.swing.GroupLayout layout = new javax.swing.GroupLayout(this);
this.setLayout(layout);
layout.setHorizontalGroup(
layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
.addComponent(jSeparator1)
.addGroup(layout.createSequentialGroup()
.addContainerGap()
.addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
.addComponent(translationServicePanel, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE)
.addComponent(translationOptionsDescription, javax.swing.GroupLayout.DEFAULT_SIZE, 462, Short.MAX_VALUE)
.addGroup(layout.createSequentialGroup()
.addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
.addGroup(layout.createSequentialGroup()
.addComponent(translationServiceLabel)
.addGap(10, 10, 10)
.addComponent(translatorComboBox, javax.swing.GroupLayout.PREFERRED_SIZE, 214, javax.swing.GroupLayout.PREFERRED_SIZE)
.addGap(0, 0, Short.MAX_VALUE))
.addComponent(translationOptionsDescription, javax.swing.GroupLayout.PREFERRED_SIZE, 462, Short.MAX_VALUE))
.addComponent(translatorComboBox, javax.swing.GroupLayout.PREFERRED_SIZE, 214, javax.swing.GroupLayout.PREFERRED_SIZE))
.addComponent(enableOcrCheckBox))
.addGap(0, 0, Short.MAX_VALUE)))
.addContainerGap())
);
layout.setVerticalGroup(
@ -183,8 +199,12 @@ final class TranslationOptionsPanel extends javax.swing.JPanel {
.addComponent(translatorComboBox, javax.swing.GroupLayout.PREFERRED_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.PREFERRED_SIZE)
.addComponent(translationServiceLabel))
.addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
.addComponent(translationServicePanel, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE)
.addContainerGap())
.addComponent(translationServicePanel, javax.swing.GroupLayout.PREFERRED_SIZE, javax.swing.GroupLayout.DEFAULT_SIZE, javax.swing.GroupLayout.PREFERRED_SIZE)
.addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.UNRELATED)
.addComponent(jSeparator1, javax.swing.GroupLayout.PREFERRED_SIZE, 10, javax.swing.GroupLayout.PREFERRED_SIZE)
.addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
.addComponent(enableOcrCheckBox)
.addContainerGap(javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE))
);
}// </editor-fold>//GEN-END:initComponents
@ -192,8 +212,14 @@ final class TranslationOptionsPanel extends javax.swing.JPanel {
updatePanel();
}//GEN-LAST:event_translatorComboBoxActionPerformed
private void enableOcrCheckBoxActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_enableOcrCheckBoxActionPerformed
controller.changed();
}//GEN-LAST:event_enableOcrCheckBoxActionPerformed
// Variables declaration - do not modify//GEN-BEGIN:variables
private javax.swing.JCheckBox enableOcrCheckBox;
private javax.swing.JSeparator jSeparator1;
private javax.swing.JLabel translationOptionsDescription;
private javax.swing.JLabel translationServiceLabel;
private javax.swing.JPanel translationServicePanel;

View File

@ -8,6 +8,7 @@ TranslatedContentViewer.errorExtractingText=An error occurred while extracting t
TranslatedContentViewer.extractingText=Extracting text, please wait...
TranslatedContentViewer.fileHasNoText=File has no text.
TranslatedContentViewer.noServiceProvider=The machine translation software was not found.
TranslatedContentViewer.ocrNotEnabled=OCR is not enabled. To change, go to Tools->Options->Machine Translation
TranslatedContentViewer.translatingText=Translating text, please wait...
# {0} - exception message
TranslatedContentViewer.translationException=An error occurred while translating the text ({0}).

View File

@ -48,6 +48,7 @@ import org.sleuthkit.datamodel.Content;
import java.util.List;
import java.util.logging.Level;
import javax.swing.SwingUtilities;
import org.sleuthkit.autopsy.core.UserPreferences;
import org.sleuthkit.autopsy.coreutils.Logger;
import org.sleuthkit.autopsy.coreutils.PlatformUtil;
import org.sleuthkit.autopsy.texttranslation.ui.TranslationContentPanel.DisplayDropdownOptions;
@ -201,10 +202,17 @@ public final class TranslatedTextViewer implements TextViewer {
* @throws
* org.sleuthkit.autopsy.textextractors.TextExtractor.InitReaderException
*/
@NbBundle.Messages({
"TranslatedContentViewer.ocrNotEnabled=OCR is not enabled. To change, go to Tools->Options->Machine Translation",
})
private String getFileText(AbstractFile file) throws IOException, InterruptedException, TextExtractor.InitReaderException {
final boolean isImage = file.getMIMEType().toLowerCase().startsWith("image/"); // NON-NLS
if (isImage && ! UserPreferences.getUseOcrInTranslation()) {
return Bundle.TranslatedContentViewer_ocrNotEnabled();
}
String result;
if (isImage) {
if (UserPreferences.getUseOcrInTranslation()) {
result = extractText(file, OCR_ENABLED);
} else {
result = extractText(file, OCR_DISABLED);

Binary file not shown.

Before

Width:  |  Height:  |  Size: 47 KiB

After

Width:  |  Height:  |  Size: 60 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 136 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

View File

@ -57,6 +57,7 @@ The "Indexed Text" tab shows the results when running the keyword search module
\image html keyword-search-ocr-indexed-text.png
\anchor keyword_search_ocr_config
By default, OCR is only configured for English text. Its configuration depends on the presence of language files (called "traineddata" files)
that exist in a location that Autopsy can understand. To add support for more languages, you will need to download additional "traineddata"
and move them to the right location. The following steps breakdown this process for you:

View File

@ -17,6 +17,8 @@ To set up a machine translation service, go to Options->Tools and then select th
Each service will require slightly different configuration steps. After setting everything up, you can run a quick check that the service is set up correctly using the "Test" button.
The checkbox at the bottom allows you to enable or disable optical character recognition (OCR). When enabled, if you select an image in the \ref mt_content_viewer "content viewer" Autopsy will use OCR to attempt to extract text to be translated. Instructions for installing OCR packages for different languages can be found on the \ref keyword_search_ocr_config "Keyword Search page".
\section mt_file_names Translating File Names
You can use machine translation to automatically translate file and folder names, such as the ones seen below:
@ -49,4 +51,12 @@ Then use the drop-down menu on the right to change from "Original Text" to "Tran
\image html mt_message_translated.png
If you've enabled OCR as described in the \ref mt_config section above, you can extract and translate text from images. Here is an image containing the beginning of a French poem:
\image html mt_ocr_image.png
If you go to the Text tab and then the Translation viewer it will use OCR to read text from the image and then display the translation.
\image html mt_ocr_result.png
*/