merge from develop

This commit is contained in:
Greg DiCristofaro 2023-05-15 11:04:08 -04:00
parent 57f2f1c9d7
commit 86b1de466f
33 changed files with 1815 additions and 1564 deletions

View File

@ -0,0 +1,228 @@
package org.sleuthkit.autopsy.guicomponentutils;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
import java.awt.event.FocusAdapter;
import java.awt.event.FocusEvent;
import java.awt.event.FocusListener;
import java.awt.event.KeyAdapter;
import java.awt.event.KeyEvent;
import java.awt.event.KeyListener;
import java.beans.PropertyChangeEvent;
import java.beans.PropertyChangeListener;
import javax.swing.ComboBoxEditor;
import javax.swing.ComboBoxModel;
import javax.swing.JComboBox;
import javax.swing.text.AttributeSet;
import javax.swing.text.BadLocationException;
import javax.swing.text.JTextComponent;
import javax.swing.text.PlainDocument;
/*
* This code is taken from http://www.orbital-computer.de/JComboBox/source/AutoCompletion.java
* Author: Thomas Bierhance
* This work is hereby released into the Public Domain. To view a copy of the
* public domain dedication, visit
* http://creativecommons.org/licenses/publicdomain/
*/
public class AutoCompletion extends PlainDocument {
private static final long serialVersionUID = 1L;
private JComboBox<?> comboBox;
private ComboBoxModel<?> model;
private JTextComponent editor;
// flag to indicate if setSelectedItem has been called
// subsequent calls to remove/insertString should be ignored
private boolean selecting = false;
private boolean hidePopupOnFocusLoss;
private boolean hitBackspace = false;
private boolean hitBackspaceOnSelection;
private KeyListener editorKeyListener;
private FocusListener editorFocusListener;
public AutoCompletion(final JComboBox<?> comboBox) {
this.comboBox = comboBox;
model = comboBox.getModel();
comboBox.addActionListener(new ActionListener() {
@Override
public void actionPerformed(ActionEvent e) {
if (!selecting) {
highlightCompletedText(0);
}
}
});
comboBox.addPropertyChangeListener(new PropertyChangeListener() {
@Override
public void propertyChange(PropertyChangeEvent e) {
if (e.getPropertyName().equals("editor")) {
configureEditor((ComboBoxEditor) e.getNewValue());
}
if (e.getPropertyName().equals("model")) {
model = (ComboBoxModel) e.getNewValue();
}
}
});
editorKeyListener = new KeyAdapter() {
@Override
public void keyPressed(KeyEvent e) {
if (comboBox.isDisplayable()) {
comboBox.setPopupVisible(true);
}
hitBackspace = false;
switch (e.getKeyCode()) {
// determine if the pressed key is backspace (needed by the remove method)
case KeyEvent.VK_BACK_SPACE:
hitBackspace = true;
hitBackspaceOnSelection = editor.getSelectionStart() != editor.getSelectionEnd();
break;
// ignore delete key
case KeyEvent.VK_DELETE:
e.consume();
comboBox.getToolkit().beep();
break;
}
}
};
// Bug 5100422 on Java 1.5: Editable JComboBox won't hide popup when tabbing out
hidePopupOnFocusLoss = System.getProperty("java.version").startsWith("1.5");
// Highlight whole text when gaining focus
editorFocusListener = new FocusAdapter() {
@Override
public void focusGained(FocusEvent e) {
highlightCompletedText(0);
}
@Override
public void focusLost(FocusEvent e) {
// Workaround for Bug 5100422 - Hide Popup on focus loss
if (hidePopupOnFocusLoss) {
comboBox.setPopupVisible(false);
}
}
};
configureEditor(comboBox.getEditor());
// Handle initially selected object
Object selected = comboBox.getSelectedItem();
if (selected != null) {
setText(selected.toString());
}
highlightCompletedText(0);
}
public static void enable(JComboBox<?> comboBox) {
// has to be editable
comboBox.setEditable(true);
// change the editor's document
new AutoCompletion(comboBox);
}
void configureEditor(ComboBoxEditor newEditor) {
if (editor != null) {
editor.removeKeyListener(editorKeyListener);
editor.removeFocusListener(editorFocusListener);
}
if (newEditor != null) {
editor = (JTextComponent) newEditor.getEditorComponent();
editor.addKeyListener(editorKeyListener);
editor.addFocusListener(editorFocusListener);
editor.setDocument(this);
}
}
public void remove(int offs, int len) throws BadLocationException {
// return immediately when selecting an item
if (selecting) {
return;
}
if (hitBackspace) {
// user hit backspace => move the selection backwards
// old item keeps being selected
if (offs > 0) {
if (hitBackspaceOnSelection) {
offs--;
}
} else {
// User hit backspace with the cursor positioned on the start => beep
comboBox.getToolkit().beep(); // when available use: UIManager.getLookAndFeel().provideErrorFeedback(comboBox);
}
highlightCompletedText(offs);
} else {
super.remove(offs, len);
}
}
@Override
public void insertString(int offs, String str, AttributeSet a) throws BadLocationException {
// return immediately when selecting an item
if (selecting) {
return;
}
// insert the string into the document
super.insertString(offs, str, a);
// lookup and select a matching item
Object item = lookupItem(getText(0, getLength()));
if (item != null) {
setSelectedItem(item);
} else {
// keep old item selected if there is no match
item = comboBox.getSelectedItem();
// imitate no insert (later on offs will be incremented by str.length(): selection won't move forward)
offs = offs - str.length();
// provide feedback to the user that his input has been received but can not be accepted
comboBox.getToolkit().beep(); // when available use: UIManager.getLookAndFeel().provideErrorFeedback(comboBox);
}
setText(item.toString());
// select the completed part
highlightCompletedText(offs + str.length());
}
private void setText(String text) {
try {
// remove all text and insert the completed string
super.remove(0, getLength());
super.insertString(0, text, null);
} catch (BadLocationException e) {
throw new RuntimeException(e.toString());
}
}
private void highlightCompletedText(int start) {
editor.setCaretPosition(getLength());
editor.moveCaretPosition(start);
}
private void setSelectedItem(Object item) {
selecting = true;
model.setSelectedItem(item);
selecting = false;
}
private Object lookupItem(String pattern) {
Object selectedItem = model.getSelectedItem();
// only search for a different item if the currently selected does not match
if (selectedItem != null && startsWithIgnoreCase(selectedItem.toString(), pattern)) {
return selectedItem;
} else {
// iterate over all items
for (int i = 0, n = model.getSize(); i < n; i++) {
Object currentItem = model.getElementAt(i);
// current item starts with the pattern?
if (currentItem != null && startsWithIgnoreCase(currentItem.toString(), pattern)) {
return currentItem;
}
}
}
// no item starts with the pattern => return null
return null;
}
// checks if str1 starts with str2 - ignores case
private boolean startsWithIgnoreCase(String str1, String str2) {
return str1.toUpperCase().startsWith(str2.toUpperCase());
}
}

View File

@ -25,6 +25,7 @@ import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
@ -402,7 +403,7 @@ public final class IngestJobSettings {
}
for (String moduleName : disabledModuleNames) {
if (!loadedModuleNames.contains(moduleName)) {
missingModuleNames.add(moduleName);
logger.log(Level.WARNING, MessageFormat.format("A module marked as disabled in the ingest job settings, ''{0}'', could not be found.", moduleName));
}
}
for (String moduleName : missingModuleNames) {

View File

@ -18,6 +18,7 @@
*/
package org.sleuthkit.autopsy.modules.interestingitems;
import org.sleuthkit.autopsy.guicomponentutils.AutoCompletion;
import java.awt.Color;
import java.awt.event.ActionEvent;
import java.util.Arrays;
@ -76,6 +77,7 @@ final class FilesSetRulePanel extends javax.swing.JPanel {
*/
FilesSetRulePanel(JButton okButton, JButton cancelButton, PANEL_TYPE panelType) {
initComponents();
AutoCompletion.enable(mimeTypeComboBox);
if (panelType == FilesSetDefsPanel.PANEL_TYPE.FILE_INGEST_FILTERS) { //Hide the mimetype settings when this is displaying a FileSet rule instead of a interesting item rule
mimeTypeComboBox.setVisible(false);
mimeCheck.setVisible(false);
@ -102,6 +104,7 @@ final class FilesSetRulePanel extends javax.swing.JPanel {
*/
FilesSetRulePanel(FilesSet.Rule rule, JButton okButton, JButton cancelButton, PANEL_TYPE panelType) {
initComponents();
AutoCompletion.enable(mimeTypeComboBox);
if (panelType == FilesSetDefsPanel.PANEL_TYPE.FILE_INGEST_FILTERS) { //Hide the mimetype settings when this is displaying a FileSet rule instead of a interesting item rule
mimeTypeComboBox.setVisible(false);
mimeCheck.setVisible(false);

View File

@ -18,7 +18,8 @@
<dependency conf="solr-war->default" org="org.apache.solr" name="solr" rev="4.10.4" transitive="false" /> <!-- the war file for embedded Solr 4 -->
<dependency conf="solr-libs->default" name="solr-cell" rev="8.11.2" org="org.apache.solr"/>
<!-- https://mvnrepository.com/artifact/org.apache.lucene/lucene-core -->
<dependency conf="autopsy->default" org="org.apache.lucene" name="lucene-core" rev="8.11.2"/>
<!-- Autopsy -->
<dependency conf="autopsy->default" org="org.apache.solr" name="solr-solrj" rev="8.11.2"/>
<dependency conf="autopsy->default" org="com.optimaize.languagedetector" name="language-detector" rev="0.6"/>

View File

@ -48,6 +48,7 @@ file.reference.stax2-api-4.2.1.jar=release/modules/ext/stax2-api-4.2.1.jar
file.reference.woodstox-core-6.2.4.jar=release/modules/ext/woodstox-core-6.2.4.jar
file.reference.zookeeper-3.8.0.jar=release/modules/ext/zookeeper-3.8.0.jar
file.reference.zookeeper-jute-3.8.0.jar=release/modules/ext/zookeeper-jute-3.8.0.jar
file.reference.lucene-core-8.11.2.jar=release/modules/ext/lucene-core-8.11.2.jar
javac.source=17
javac.compilerargs=-Xlint -Xlint:-serial
license.file=../LICENSE-2.0.txt

View File

@ -434,6 +434,10 @@
<runtime-relative-path>ext/zookeeper-jute-3.8.0.jar</runtime-relative-path>
<binary-origin>release/modules/ext/zookeeper-jute-3.8.0.jar</binary-origin>
</class-path-extension>
<class-path-extension>
<runtime-relative-path>ext/lucene-core-8.11.2.jar</runtime-relative-path>
<binary-origin>release/modules/ext/lucene-core-8.11.2.jar</binary-origin>
</class-path-extension>
</data>
</configuration>
</project>

View File

@ -301,7 +301,7 @@
<autoCommit>
<maxTime>300000</maxTime>
<!-- maxDocs>15000</maxDocs -->
<openSearcher>true</openSearcher>
<openSearcher>false</openSearcher>
</autoCommit>
<!-- softAutoCommit is like autoCommit except it causes a

View File

@ -112,9 +112,9 @@ abstract class AdHocSearchPanel extends javax.swing.JPanel {
}
if (filesIndexed == 0) {
if (isIngestRunning) {
// ELTODO this message should be dependent on whether Solr indexing is enabled or not
KeywordSearchUtil.displayDialog(keywordSearchErrorDialogHeader, NbBundle.getMessage(this.getClass(),
"AbstractKeywordSearchPerformer.search.noFilesInIdxMsg",
KeywordSearchSettings.getUpdateFrequency().getTime()), KeywordSearchUtil.DIALOG_MESSAGE_TYPE.ERROR);
"AbstractKeywordSearchPerformer.search.noFilesInIdxMsg"), KeywordSearchUtil.DIALOG_MESSAGE_TYPE.ERROR);
} else {
KeywordSearchUtil.displayDialog(keywordSearchErrorDialogHeader, NbBundle.getMessage(this.getClass(),
"AbstractKeywordSearchPerformer.search.noFilesIdxdMsg"), KeywordSearchUtil.DIALOG_MESSAGE_TYPE.ERROR);

View File

@ -39,8 +39,8 @@ AbstractKeywordSearchPerformer.search.invalidSyntaxHeader=Invalid query statemen
AbstractKeywordSearchPerformer.search.searchIngestInProgressTitle=Keyword Search Ingest in Progress
AbstractKeywordSearchPerformer.search.ingestInProgressBody=<html>Keyword Search Ingest is currently running.<br />Not all files have been indexed and this search might yield incomplete results.<br />Do you want to proceed with this search anyway?</html>
AbstractKeywordSearchPerformer.search.emptyKeywordErrorBody=Keyword list is empty, please add at least one keyword to the list
AbstractKeywordSearchPerformer.search.noFilesInIdxMsg=<html>No files are in index yet. <br />Try again later. Index is updated every {0} minutes.</html>
AbstractKeywordSearchPerformer.search.noFilesIdxdMsg=<html>No files were indexed.<br />Re-ingest the image with the Keyword Search Module enabled. </html>
AbstractKeywordSearchPerformer.search.noFilesInIdxMsg=<html>No files are in index yet. <br />If Solr keyword search indexing was enabled, wait for ingest to complete</html>
AbstractKeywordSearchPerformer.search.noFilesIdxdMsg=<html>No files were indexed.<br />Re-ingest the image with the Keyword Search Module and Solr indexing enabled. </html>
ExtractedContentViewer.toolTip=Displays extracted text from files and keyword-search results. Requires Keyword Search ingest to be run on a file to activate this viewer.
ExtractedContentViewer.getTitle=Indexed Text
HighlightedMatchesSource.toString=Search Results
@ -122,7 +122,7 @@ KeywordSearchListsManagementPanel.fileExtensionFilterLbl=Autopsy Keyword List Fi
KeywordSearchListsManagementPanel.fileExtensionFilterLb2=Encase Keyword List File (txt)
KeywordSearch.listImportFeatureTitle=Keyword List Import
KeywordSearchIngestModule.moduleName=Keyword Search
KeywordSearchIngestModule.moduleDescription=Performs file indexing and periodic search using keywords and regular expressions in lists.
KeywordSearchIngestModule.moduleDescription=Performs file indexing and search using selected keyword lists.
DropdownSearchPanel.keywordTextField.text=
KeywordSearchPanel.searchDropButton.text=Keyword Search
DropdownSearchPanel.exactRadioButton.text=Exact Match
@ -211,11 +211,6 @@ KeywordSearchGlobalLanguageSettingsPanel.enableUTF8Checkbox.text=Enable UTF8 tex
KeywordSearchGlobalLanguageSettingsPanel.ingestSettingsLabel.text=Ingest settings for string extraction from unknown file types (changes effective on next ingest):
KeywordSearchGlobalLanguageSettingsPanel.enableUTF16Checkbox.text=Enable UTF16LE and UTF16BE string extraction
KeywordSearchGlobalLanguageSettingsPanel.languagesLabel.text=Enabled scripts (languages):
KeywordSearchGlobalSearchSettingsPanel.timeRadioButton1.toolTipText=20 mins. (fastest ingest time)
KeywordSearchGlobalSearchSettingsPanel.timeRadioButton1.text=20 minutes (slowest feedback, fastest ingest)
KeywordSearchGlobalSearchSettingsPanel.timeRadioButton2.toolTipText=10 minutes (faster overall ingest time than default)
KeywordSearchGlobalSearchSettingsPanel.timeRadioButton2.text=10 minutes (slower feedback, faster ingest)
KeywordSearchGlobalSearchSettingsPanel.frequencyLabel.text=Results update frequency during ingest:
KeywordSearchGlobalSearchSettingsPanel.skipNSRLCheckBox.toolTipText=Requires Hash Set service to had run previously, or be selected for next ingest.
KeywordSearchGlobalSearchSettingsPanel.skipNSRLCheckBox.text=Do not add files in NSRL (known files) to keyword index during ingest
KeywordSearchGlobalSearchSettingsPanel.informationLabel.text=Information
@ -224,11 +219,7 @@ KeywordSearchGlobalSearchSettingsPanel.filesIndexedValue.text=0
KeywordSearchGlobalSearchSettingsPanel.filesIndexedLabel.text=Files in keyword index:
KeywordSearchGlobalSearchSettingsPanel.showSnippetsCB.text=Show Keyword Preview in Keyword Search Results (will result in longer search times)
KeywordSearchGlobalSearchSettingsPanel.chunksValLabel.text=0
KeywordSearchGlobalSearchSettingsPanel.timeRadioButton4.toolTipText=1 minute (overall ingest time will be longest)
KeywordSearchGlobalSearchSettingsPanel.timeRadioButton4.text_1=1 minute (faster feedback, longest ingest)
KeywordSearchGlobalSearchSettingsPanel.chunksLabel.text=Chunks in keyword index:
KeywordSearchGlobalSearchSettingsPanel.timeRadioButton3.toolTipText=5 minutes (overall ingest time will be longer)
KeywordSearchGlobalSearchSettingsPanel.timeRadioButton3.text=5 minutes (default)
KeywordSearchIngestModule.regExpHitLbl=Reg Ex hit:
KeywordSearchIngestModule.kwHitLbl=Keyword hit:
KeywordSearchIngestModule.kwHitThLbl=Keyword
@ -254,8 +245,6 @@ KeywordSearchListsManagementPanel.newKeywordListDescription2=Keyword list <{0}>
KeywordSearchModuleFactory.getIngestJobSettingsPanel.exception.msg=Expected settings argument to be instanceof KeywordSearchJobSettings
KeywordSearchModuleFactory.createFileIngestModule.exception.msg=Expected settings argument to be instanceof KeywordSearchJobSettings
SearchRunner.Searcher.done.err.msg=Error performing keyword search
KeywordSearchGlobalSearchSettingsPanel.timeRadioButton5.toolTipText=Fastest overall, but no results until the end
KeywordSearchGlobalSearchSettingsPanel.timeRadioButton5.text=No periodic searches
SolrConnectionCheck.HostnameOrPort=Invalid hostname and/or port number.
SolrConnectionCheck.Hostname=Invalid hostname.
SolrConnectionCheck.MissingHostname=Missing hostname.
@ -322,3 +311,4 @@ ExtractedContentPanel.pagesLabel.text=Page:
KeywordSearchJobSettingsPanel.ocrCheckBox.text=Enable Optical Character Recognition (OCR)
KeywordSearchJobSettingsPanel.limitedOcrCheckbox.text=<html>Only process PDFs, MS Office docs and images which are over 100KB in size or extracted from another file (Beta)</html>
KeywordSearchJobSettingsPanel.ocrOnlyCheckbox.text=Only index text extracted using OCR
KeywordSearchJobSettingsPanel.solrCheckbox.text=Add text to Solr Index

View File

@ -15,16 +15,13 @@ ExtractAllTermsReport.error.noOpenCase=No currently open case.
ExtractAllTermsReport.export.error=Error During Unique Word Extraction
ExtractAllTermsReport.exportComplete=Unique Word Extraction Complete
ExtractAllTermsReport.getName.text=Extract Unique Words
# {0} - Number of extracted terms
ExtractAllTermsReport.numberExtractedTerms=Extracted {0} terms...
ExtractAllTermsReport.search.ingestInProgressBody=<html>Keyword Search Ingest is currently running.<br />Not all files have been indexed and unique word extraction might yield incomplete results.<br />Do you want to proceed with unique word extraction anyway?</html>
# {0} - Keyword search commit frequency
ExtractAllTermsReport.search.noFilesInIdxMsg=No files are in index yet. Try again later. Index is updated every {0} minutes.
ExtractAllTermsReport.search.noFilesInIdxMsg2=No files are in index yet. Try again later
ExtractAllTermsReport.search.noFilesInIdxMsg=No files are in index yet. If Solr keyword search indexing and Solr indexing were enabled, wait for ingest to complete.
ExtractAllTermsReport.search.noFilesInIdxMsg2=No files are in index yet. Re-ingest the image with the Keyword Search Module and Solr indexing enabled.
ExtractAllTermsReport.search.searchIngestInProgressTitle=Keyword Search Ingest in Progress
ExtractAllTermsReport.startExport=Starting Unique Word Extraction
ExtractedContentPanel.setMarkup.panelTxt=<span style='font-style:italic'>Loading text... Please wait</span>
# {0} - Content name
ExtractedContentPanel.SetMarkup.progress.loading=Loading text for {0}
GlobalEditListPanel.editKeyword.title=Edit Keyword
GlobalEditListPanel.warning.text=Boundary characters ^ and $ do not match word boundaries. Consider\nreplacing with an explicit list of boundary characters, such as [ \\.,]
@ -91,8 +88,8 @@ AbstractKeywordSearchPerformer.search.invalidSyntaxHeader=Invalid query statemen
AbstractKeywordSearchPerformer.search.searchIngestInProgressTitle=Keyword Search Ingest in Progress
AbstractKeywordSearchPerformer.search.ingestInProgressBody=<html>Keyword Search Ingest is currently running.<br />Not all files have been indexed and this search might yield incomplete results.<br />Do you want to proceed with this search anyway?</html>
AbstractKeywordSearchPerformer.search.emptyKeywordErrorBody=Keyword list is empty, please add at least one keyword to the list
AbstractKeywordSearchPerformer.search.noFilesInIdxMsg=<html>No files are in index yet. <br />Try again later. Index is updated every {0} minutes.</html>
AbstractKeywordSearchPerformer.search.noFilesIdxdMsg=<html>No files were indexed.<br />Re-ingest the image with the Keyword Search Module enabled. </html>
AbstractKeywordSearchPerformer.search.noFilesInIdxMsg=<html>No files are in index yet. <br />If Solr keyword search indexing was enabled, wait for ingest to complete</html>
AbstractKeywordSearchPerformer.search.noFilesIdxdMsg=<html>No files were indexed.<br />Re-ingest the image with the Keyword Search Module and Solr indexing enabled. </html>
ExtractedContentViewer.toolTip=Displays extracted text from files and keyword-search results. Requires Keyword Search ingest to be run on a file to activate this viewer.
ExtractedContentViewer.getTitle=Indexed Text
HighlightedMatchesSource.toString=Search Results
@ -176,7 +173,7 @@ KeywordSearchListsManagementPanel.fileExtensionFilterLbl=Autopsy Keyword List Fi
KeywordSearchListsManagementPanel.fileExtensionFilterLb2=Encase Keyword List File (txt)
KeywordSearch.listImportFeatureTitle=Keyword List Import
KeywordSearchIngestModule.moduleName=Keyword Search
KeywordSearchIngestModule.moduleDescription=Performs file indexing and periodic search using keywords and regular expressions in lists.
KeywordSearchIngestModule.moduleDescription=Performs file indexing and search using selected keyword lists.
DropdownSearchPanel.keywordTextField.text=
KeywordSearchPanel.searchDropButton.text=Keyword Search
DropdownSearchPanel.exactRadioButton.text=Exact Match
@ -227,8 +224,6 @@ KeywordSearchSettings.properties_options.text={0}_Options
KeywordSearchSettings.propertiesNSRL.text={0}_NSRL
KeywordSearchSettings.propertiesScripts.text={0}_Scripts
NoOpenCoreException.err.noOpenSorlCore.msg=No currently open Solr core.
SearchRunner.query.exception.msg=Error performing query:
# {0} - colelction name
Server.deleteCore.exception.msg=Failed to delete Solr colelction {0}
Server.exceptionMessage.unableToBackupCollection=Unable to backup Solr collection
Server.exceptionMessage.unableToCreateCollection=Unable to create Solr collection
@ -272,11 +267,6 @@ KeywordSearchGlobalLanguageSettingsPanel.enableUTF8Checkbox.text=Enable UTF8 tex
KeywordSearchGlobalLanguageSettingsPanel.ingestSettingsLabel.text=Ingest settings for string extraction from unknown file types (changes effective on next ingest):
KeywordSearchGlobalLanguageSettingsPanel.enableUTF16Checkbox.text=Enable UTF16LE and UTF16BE string extraction
KeywordSearchGlobalLanguageSettingsPanel.languagesLabel.text=Enabled scripts (languages):
KeywordSearchGlobalSearchSettingsPanel.timeRadioButton1.toolTipText=20 mins. (fastest ingest time)
KeywordSearchGlobalSearchSettingsPanel.timeRadioButton1.text=20 minutes (slowest feedback, fastest ingest)
KeywordSearchGlobalSearchSettingsPanel.timeRadioButton2.toolTipText=10 minutes (faster overall ingest time than default)
KeywordSearchGlobalSearchSettingsPanel.timeRadioButton2.text=10 minutes (slower feedback, faster ingest)
KeywordSearchGlobalSearchSettingsPanel.frequencyLabel.text=Results update frequency during ingest:
KeywordSearchGlobalSearchSettingsPanel.skipNSRLCheckBox.toolTipText=Requires Hash Set service to had run previously, or be selected for next ingest.
KeywordSearchGlobalSearchSettingsPanel.skipNSRLCheckBox.text=Do not add files in NSRL (known files) to keyword index during ingest
KeywordSearchGlobalSearchSettingsPanel.informationLabel.text=Information
@ -285,11 +275,7 @@ KeywordSearchGlobalSearchSettingsPanel.filesIndexedValue.text=0
KeywordSearchGlobalSearchSettingsPanel.filesIndexedLabel.text=Files in keyword index:
KeywordSearchGlobalSearchSettingsPanel.showSnippetsCB.text=Show Keyword Preview in Keyword Search Results (will result in longer search times)
KeywordSearchGlobalSearchSettingsPanel.chunksValLabel.text=0
KeywordSearchGlobalSearchSettingsPanel.timeRadioButton4.toolTipText=1 minute (overall ingest time will be longest)
KeywordSearchGlobalSearchSettingsPanel.timeRadioButton4.text_1=1 minute (faster feedback, longest ingest)
KeywordSearchGlobalSearchSettingsPanel.chunksLabel.text=Chunks in keyword index:
KeywordSearchGlobalSearchSettingsPanel.timeRadioButton3.toolTipText=5 minutes (overall ingest time will be longer)
KeywordSearchGlobalSearchSettingsPanel.timeRadioButton3.text=5 minutes (default)
KeywordSearchIngestModule.regExpHitLbl=Reg Ex hit:
KeywordSearchIngestModule.kwHitLbl=Keyword hit:
KeywordSearchIngestModule.kwHitThLbl=Keyword
@ -315,8 +301,6 @@ KeywordSearchListsManagementPanel.newKeywordListDescription2=Keyword list <{0}>
KeywordSearchModuleFactory.getIngestJobSettingsPanel.exception.msg=Expected settings argument to be instanceof KeywordSearchJobSettings
KeywordSearchModuleFactory.createFileIngestModule.exception.msg=Expected settings argument to be instanceof KeywordSearchJobSettings
SearchRunner.Searcher.done.err.msg=Error performing keyword search
KeywordSearchGlobalSearchSettingsPanel.timeRadioButton5.toolTipText=Fastest overall, but no results until the end
KeywordSearchGlobalSearchSettingsPanel.timeRadioButton5.text=No periodic searches
Server.status.failed.msg=Local Solr server did not respond to status request. This may be because the server failed to start or is taking too long to initialize.
SolrConnectionCheck.HostnameOrPort=Invalid hostname and/or port number.
SolrConnectionCheck.Hostname=Invalid hostname.
@ -404,6 +388,7 @@ ExtractedContentPanel.pagesLabel.text=Page:
KeywordSearchJobSettingsPanel.ocrCheckBox.text=Enable Optical Character Recognition (OCR)
KeywordSearchJobSettingsPanel.limitedOcrCheckbox.text=<html>Only process PDFs, MS Office docs and images which are over 100KB in size or extracted from another file (Beta)</html>
KeywordSearchJobSettingsPanel.ocrOnlyCheckbox.text=Only index text extracted using OCR
KeywordSearchJobSettingsPanel.solrCheckbox.text=Add text to Solr Index
TextZoomPanel.zoomInButton.text=
TextZoomPanel.zoomOutButton.text=
TextZoomPanel.zoomResetButton.text=Reset

View File

@ -7,8 +7,6 @@ AbstractKeywordSearchPerformer.search.dialogErrorHeader=\u30ad\u30fc\u30ef\u30fc
AbstractKeywordSearchPerformer.search.emptyKeywordErrorBody=\u30ad\u30fc\u30ef\u30fc\u30c9\u30ea\u30b9\u30c8\u304c\u7a7a(\u672a\u5165\u529b)\u3067\u3059\u3002\u5c11\u306a\u304f\u3068\u30821\u3064\u306e\u30ad\u30fc\u30ef\u30fc\u30c9\u3092\u30ea\u30b9\u30c8\u306b\u8ffd\u52a0\u3057\u3066\u304f\u3060\u3055\u3044\u3002
AbstractKeywordSearchPerformer.search.ingestInProgressBody=<html>\u30ad\u30fc\u30ef\u30fc\u30c9\u691c\u7d22\u30a4\u30f3\u30b8\u30a7\u30b9\u30c8\u304c\u73fe\u5728\u5b9f\u884c\u4e2d\u3067\u3059\u3002<br />\u3059\u3079\u3066\u306e\u30d5\u30a1\u30a4\u30eb\u304c\u30a4\u30f3\u30c7\u30c3\u30af\u30b9\u3055\u308c\u306a\u304b\u3063\u305f\u305f\u3081\u3001\u3053\u306e\u691c\u7d22\u306f\u4e0d\u5b8c\u5168\u306a\u7d50\u679c\u3092\u751f\u6210\u3059\u308b\u53ef\u80fd\u6027\u304c\u3042\u308a\u307e\u3059\u3002<br />\u305d\u308c\u3067\u3082\u3053\u306e\u691c\u7d22\u3092\u7d9a\u884c\u3057\u307e\u3059\u304b?</html>
AbstractKeywordSearchPerformer.search.invalidSyntaxHeader=\u7121\u52b9\u306a\u30af\u30a8\u30ea\u30fb\u30b9\u30c6\u30fc\u30c8\u30e1\u30f3\u30c8\u3002 \u5185\u5bb9\u304c\u6b63\u898f\u8868\u73fe\u306e\u5834\u5408\u3001Lucene\u6b63\u898f\u8868\u73fe\u30d1\u30bf\u30fc\u30f3\u306e\u307f\u304c\u30b5\u30dd\u30fc\u30c8\u3055\u308c\u307e\u3059\u3002 POSIX\u6587\u5b57\u30af\u30e9\u30b9\uff08\\ n\u3084\\ w\u306a\u3069\uff09\u306f\u7121\u52b9\u3067\u3059\u3002
AbstractKeywordSearchPerformer.search.noFilesIdxdMsg=<html>\u30d5\u30a1\u30a4\u30eb\u304c\u30a4\u30f3\u30c7\u30c3\u30af\u30b9\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002<br />\u30ad\u30fc\u30ef\u30fc\u30c9\u691c\u7d22\u30e2\u30b8\u30e5\u30fc\u30eb\u3092\u6709\u52b9\u306b\u3057\u305f\u72b6\u614b\u3067\u30a4\u30e1\u30fc\u30b8\u3092\u518d\u30a4\u30f3\u30b8\u30a7\u30b9\u30c8\u3057\u3066\u304f\u3060\u3055\u3044\u3002</html>
AbstractKeywordSearchPerformer.search.noFilesInIdxMsg=<html>\u307e\u3060\u30d5\u30a1\u30a4\u30eb\u304c\u7d22\u5f15\u306b\u542b\u307e\u308c\u3066\u3044\u307e\u305b\u3093\u3002<br />\u5f8c\u3067\u3082\u3046\u4e00\u5ea6\u304a\u8a66\u3057\u304f\u3060\u3055\u3044\u3002 \u7d22\u5f15\u306f {0} \u5206\u3054\u3068\u306b\u66f4\u65b0\u3055\u308c\u307e\u3059\u3002</html>
AbstractKeywordSearchPerformer.search.searchIngestInProgressTitle=\u30ad\u30fc\u30ef\u30fc\u30c9\u691c\u7d22\u30a4\u30f3\u30b8\u30a7\u30b9\u30c8\u304c\u9032\u884c\u4e2d\u3067\u3059
AccountsText.creditCardNumber=\u30af\u30ec\u30b8\u30c3\u30c8\u30ab\u30fc\u30c9\u756a\u53f7
AccountsText.creditCardNumbers=\u30af\u30ec\u30b8\u30c3\u30c8\u30ab\u30fc\u30c9\u756a\u53f7
@ -55,8 +53,6 @@ ExtractAllTermsReport.exportComplete=\u30e6\u30cb\u30fc\u30af\u306a\u5358\u8a9e\
ExtractAllTermsReport.getName.text=\u30e6\u30cb\u30fc\u30af\u306a\u5358\u8a9e\u3092\u62bd\u51fa\u3059\u308b
ExtractAllTermsReport.numberExtractedTerms=\u62bd\u51fa\u3055\u308c\u305f{0}\u7528\u8a9e...
ExtractAllTermsReport.search.ingestInProgressBody=<html> \u30ad\u30fc\u30ef\u30fc\u30c9\u691c\u7d22\u8aad\u8fbc\u306f\u73fe\u5728\u5b9f\u884c\u4e2d\u3067\u3059\u3002<br/>\u3059\u3079\u3066\u306e\u30d5\u30a1\u30a4\u30eb\u304c\u30a4\u30f3\u30c7\u30c3\u30af\u30b9\u306b\u767b\u9332\u3055\u308c\u3066\u3044\u308b\u308f\u3051\u3067\u306f\u306a\u304f\u3001\u30e6\u30cb\u30fc\u30af\u306a\u5358\u8a9e\u3092\u62bd\u51fa\u306f\u4e0d\u5b8c\u5168\u306a\u7d50\u679c\u306b\u306a\u308b\u53ef\u80fd\u6027\u304c\u3042\u308a\u307e\u3059\u3002<br />\u305d\u308c\u3067\u3082\u30e6\u30cb\u30fc\u30af\u306a\u5358\u8a9e\u306e\u62bd\u51fa\u3092\u7d9a\u884c\u3057\u307e\u3059\u304b\uff1f</ html>
ExtractAllTermsReport.search.noFilesInIdxMsg=\u307e\u3060\u30a4\u30f3\u30c7\u30c3\u30af\u30b9\u306b\u767b\u9332\u3055\u308c\u3066\u3044\u308b\u30d5\u30a1\u30a4\u30eb\u306f\u3042\u308a\u307e\u305b\u3093\u3002 \u3042\u3068\u3067\u3082\u3046\u4e00\u5ea6\u8a66\u3057\u3066\u307f\u3066\u304f\u3060\u3055\u3044\u3002 \u30a4\u30f3\u30c7\u30c3\u30af\u30b9\u306f{0}\u5206\u3054\u3068\u306b\u66f4\u65b0\u3055\u308c\u307e\u3059\u3002
ExtractAllTermsReport.search.noFilesInIdxMsg2=\u307e\u3060\u30a4\u30f3\u30c7\u30c3\u30af\u30b9\u306b\u767b\u9332\u3055\u308c\u3066\u3044\u308b\u30d5\u30a1\u30a4\u30eb\u306f\u3042\u308a\u307e\u305b\u3093\u3002 \u3042\u3068\u3067\u3082\u3046\u4e00\u5ea6\u8a66\u3057\u3066\u307f\u3066\u304f\u3060\u3055\u3044\u3002
ExtractAllTermsReport.search.searchIngestInProgressTitle=\u30ad\u30fc\u30ef\u30fc\u30c9\u691c\u7d22\u30a4\u30f3\u30b8\u30a7\u30b9\u30c8\u304c\u9032\u884c\u4e2d\u3067\u3059
ExtractAllTermsReport.startExport=\u30e6\u30cb\u30fc\u30af\u306a\u5358\u8a9e\u62bd\u51fa\u306e\u958b\u59cb
ExtractedContentPanel.SetMarkup.progress.loading={0} \u306e\u30c6\u30ad\u30b9\u30c8\u3092\u8aad\u307f\u8fbc\u3093\u3067\u3044\u307e\u3059
@ -210,23 +206,12 @@ KeywordSearchGlobalSearchSettingsPanel.customizeComponents.windowsLimitedOCR=\u3
KeywordSearchGlobalSearchSettingsPanel.customizeComponents.windowsOCR=OCR\u6587\u5b57\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\uff08Windows 64\u30d3\u30c3\u30c8\u304c\u5fc5\u8981\uff09
KeywordSearchGlobalSearchSettingsPanel.filesIndexedLabel.text=\u30ad\u30fc\u30ef\u30fc\u30c9\u7d22\u5f15\u5185\u306e\u30d5\u30a1\u30a4\u30eb\:
KeywordSearchGlobalSearchSettingsPanel.filesIndexedValue.text=0
KeywordSearchGlobalSearchSettingsPanel.frequencyLabel.text=\u30a4\u30f3\u30b8\u30a7\u30b9\u30c8\u4e2d\u306e\u7d50\u679c\u66f4\u65b0\u983b\u5ea6\:
KeywordSearchGlobalSearchSettingsPanel.informationLabel.text=\u60c5\u5831
KeywordSearchGlobalSearchSettingsPanel.ingestWarningLabel.text=\u30a4\u30f3\u30b8\u30a7\u30b9\u30c8\u304c\u9032\u884c\u4e2d\u3067\u3059\u3002\u30a4\u30f3\u30b8\u30a7\u30b9\u30c8\u304c\u5b8c\u4e86\u3059\u308b\u307e\u3067\u4e00\u90e8\u306e\u8a2d\u5b9a\u3092\u5229\u7528\u3067\u304d\u307e\u305b\u3093\u3002
KeywordSearchGlobalSearchSettingsPanel.settingsLabel.text=\u8a2d\u5b9a
KeywordSearchGlobalSearchSettingsPanel.showSnippetsCB.text=\u30ad\u30fc\u30ef\u30fc\u30c9\u691c\u7d22\u7d50\u679c\u306b\u30ad\u30fc\u30ef\u30fc\u30c9\u30d7\u30ec\u30d3\u30e5\u30fc\u3092\u8868\u793a(\u691c\u7d22\u6642\u9593\u304c\u9577\u304f\u306a\u308a\u307e\u3059)
KeywordSearchGlobalSearchSettingsPanel.skipNSRLCheckBox.text=\u30a4\u30f3\u30b8\u30a7\u30b9\u30c8\u4e2d\u306bNSRL(\u65e2\u77e5\u306e\u30d5\u30a1\u30a4\u30eb)\u306e\u30d5\u30a1\u30a4\u30eb\u3092\u30ad\u30fc\u30ef\u30fc\u30c9\u306b\u8ffd\u52a0\u3057\u306a\u3044\u3067\u304f\u3060\u3055\u3044
KeywordSearchGlobalSearchSettingsPanel.skipNSRLCheckBox.toolTipText=\u30cf\u30c3\u30b7\u30e5\u30bb\u30c3\u30c8\u30b5\u30fc\u30d3\u30b9\u306b\u4ee5\u524d\u306b\u5b9f\u884c\u6e08\u307f\u3067\u3042\u308b\u3053\u3068\u3001\u307e\u305f\u306f\u6b21\u306e\u30a4\u30f3\u30b8\u30a7\u30b9\u30c8\u306b\u9078\u629e\u3055\u308c\u308b\u3053\u3068\u3092\u8981\u6c42\u3057\u307e\u3059\u3002
KeywordSearchGlobalSearchSettingsPanel.timeRadioButton1.text=20\u5206(\u6700\u9045\u30d5\u30a3\u30fc\u30c9\u30d0\u30c3\u30af\u3001\u6700\u901f\u30a4\u30f3\u30b8\u30a7\u30b9\u30c8)
KeywordSearchGlobalSearchSettingsPanel.timeRadioButton1.toolTipText=20\u5206(\u6700\u901f\u30a4\u30f3\u30b8\u30a7\u30b9\u30c8\u6642\u9593)
KeywordSearchGlobalSearchSettingsPanel.timeRadioButton2.text=10\u5206(\u3088\u308a\u9045\u3044\u30d5\u30a3\u30fc\u30c9\u30d0\u30c3\u30af\u3001\u3088\u308a\u901f\u3044\u30a4\u30f3\u30b8\u30a7\u30b9\u30c8)
KeywordSearchGlobalSearchSettingsPanel.timeRadioButton2.toolTipText=10\u5206(\u30c7\u30d5\u30a9\u30eb\u30c8\u3088\u308a\u3082\u901f\u3044\u7dcf\u30a4\u30f3\u30b8\u30a7\u30b9\u30c8\u6642\u9593)
KeywordSearchGlobalSearchSettingsPanel.timeRadioButton3.text=5\u5206(\u30c7\u30d5\u30a9\u30eb\u30c8)
KeywordSearchGlobalSearchSettingsPanel.timeRadioButton3.toolTipText=5\u5206(\u7dcf\u30a4\u30f3\u30b8\u30a7\u30b9\u30c8\u6642\u9593\u304c\u9577\u304f\u306a\u308a\u307e\u3059)
KeywordSearchGlobalSearchSettingsPanel.timeRadioButton4.text_1=1\u5206(\u3088\u308a\u901f\u3044\u30d5\u30a3\u30fc\u30c9\u30d0\u30c3\u30af\u3001\u6700\u9577\u306e\u30a4\u30f3\u30b8\u30a7\u30b9\u30c8)
KeywordSearchGlobalSearchSettingsPanel.timeRadioButton4.toolTipText=1\u5206(\u7dcf\u30a4\u30f3\u30b8\u30a7\u30b9\u30c8\u6642\u9593\u306f\u6700\u9577\u306b\u306a\u308a\u307e\u3059)
KeywordSearchGlobalSearchSettingsPanel.timeRadioButton5.text=\u5b9a\u671f\u691c\u7d22\u306a\u3057
KeywordSearchGlobalSearchSettingsPanel.timeRadioButton5.toolTipText=\u5168\u4f53\u3067\u6700\u901f\u3067\u3059\u304c\u3001\u6700\u5f8c\u307e\u3067\u7d50\u679c\u306f\u8868\u793a\u3055\u308c\u307e\u305b\u3093
KeywordSearchGlobalSettingsPanel.Title=\u30ad\u30fc\u30ef\u30fc\u30c9\u4e00\u62ec\u691c\u7d22\u8a2d\u5b9a
KeywordSearchIngestModule.doInBackGround.displayName=\u30ad\u30fc\u30ef\u30fc\u30c9\u5b9a\u671f\u691c\u7d22
KeywordSearchIngestModule.doInBackGround.finalizeMsg=\u78ba\u5b9a

View File

@ -398,6 +398,8 @@ class Chunker implements Iterator<Chunk>, Iterable<Chunk> {
private final StringBuilder sb;
private final int baseChunkSizeChars;
private final StringBuilder lowerCasedChunk;
private boolean hasHit = false;
private int chunkId = 0;
Chunk(StringBuilder sb, int baseChunkSizeChars, StringBuilder lowerCasedChunk) {
this.sb = sb;
@ -420,7 +422,7 @@ class Chunker implements Iterator<Chunk>, Iterable<Chunk> {
*
* @return The content of the chunk.
*/
public String geLowerCasedChunk() {
public String getLowerCasedChunk() {
return lowerCasedChunk.toString();
}
@ -432,5 +434,21 @@ class Chunker implements Iterator<Chunk>, Iterable<Chunk> {
int getBaseChunkLength() {
return baseChunkSizeChars;
}
boolean hasHit() {
return hasHit;
}
void setHasHit(boolean b) {
hasHit = b;
}
void setChunkId(int id) {
chunkId = id;
}
int getChunkId() {
return chunkId;
}
}
}

View File

@ -1,7 +1,7 @@
/*
* Autopsy Forensic Browser
*
* Copyright 2011-2018 Basis Technology Corp.
* Copyright 2011-2022 Basis Technology Corp.
* Contact: carrier <at> sleuthkit <dot> org
*
* Licensed under the Apache License, Version 2.0 (the "License");
@ -30,7 +30,6 @@ import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.logging.Level;
import javax.swing.JCheckBox;
import javax.swing.JTable;
import javax.swing.ListSelectionModel;
@ -143,10 +142,7 @@ class DropdownListSearchPanel extends AdHocSearchPanel {
searchAddListener = new ActionListener() {
@Override
public void actionPerformed(ActionEvent e) {
if (ingestRunning) {
IngestSearchRunner.getInstance().addKeywordListsToAllJobs(listsTableModel.getSelectedLists());
logger.log(Level.INFO, "Submitted enqueued lists to ingest"); //NON-NLS
} else {
if (!ingestRunning) {
searchAction(e);
}
}

View File

@ -1,7 +1,7 @@
/*
* Autopsy Forensic Browser
*
* Copyright 2021 Basis Technology Corp.
* Copyright 2022 Basis Technology Corp.
* Contact: carrier <at> sleuthkit <dot> org
*
* Licensed under the Apache License, Version 2.0 (the "License");
@ -51,9 +51,8 @@ public class ExtractAllTermsReport implements GeneralReportModule {
@NbBundle.Messages({
"ExtractAllTermsReport.error.noOpenCase=No currently open case.",
"# {0} - Keyword search commit frequency",
"ExtractAllTermsReport.search.noFilesInIdxMsg=No files are in index yet. Try again later. Index is updated every {0} minutes.",
"ExtractAllTermsReport.search.noFilesInIdxMsg2=No files are in index yet. Try again later",
"ExtractAllTermsReport.search.noFilesInIdxMsg=No files are in index yet. If Solr keyword search indexing and Solr indexing were enabled, wait for ingest to complete.",
"ExtractAllTermsReport.search.noFilesInIdxMsg2=No files are in index yet. Re-ingest the image with the Keyword Search Module and Solr indexing enabled.",
"ExtractAllTermsReport.search.searchIngestInProgressTitle=Keyword Search Ingest in Progress",
"ExtractAllTermsReport.search.ingestInProgressBody=<html>Keyword Search Ingest is currently running.<br />Not all files have been indexed and unique word extraction might yield incomplete results.<br />Do you want to proceed with unique word extraction anyway?</html>",
"ExtractAllTermsReport.startExport=Starting Unique Word Extraction",
@ -83,7 +82,7 @@ public class ExtractAllTermsReport implements GeneralReportModule {
if (filesIndexed == 0) {
if (isIngestRunning) {
progressPanel.complete(ReportProgressPanel.ReportStatus.ERROR, Bundle.ExtractAllTermsReport_search_noFilesInIdxMsg(KeywordSearchSettings.getUpdateFrequency().getTime()));
progressPanel.complete(ReportProgressPanel.ReportStatus.ERROR, Bundle.ExtractAllTermsReport_search_noFilesInIdxMsg());
} else {
progressPanel.complete(ReportProgressPanel.ReportStatus.ERROR, Bundle.ExtractAllTermsReport_search_noFilesInIdxMsg2());
}

View File

@ -1,705 +0,0 @@
/*
* Autopsy Forensic Browser
*
* Copyright 2014 - 2021 Basis Technology Corp.
* Contact: carrier <at> sleuthkit <dot> org
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.sleuthkit.autopsy.keywordsearch;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.concurrent.CancellationException;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import java.util.concurrent.ScheduledThreadPoolExecutor;
import static java.util.concurrent.TimeUnit.MILLISECONDS;
import java.util.concurrent.atomic.AtomicLong;
import java.util.logging.Level;
import javax.annotation.concurrent.GuardedBy;
import javax.swing.SwingUtilities;
import javax.swing.SwingWorker;
import org.netbeans.api.progress.ProgressHandle;
import org.openide.util.Cancellable;
import org.openide.util.NbBundle;
import org.openide.util.NbBundle.Messages;
import org.sleuthkit.autopsy.core.RuntimeProperties;
import org.sleuthkit.autopsy.coreutils.Logger;
import org.sleuthkit.autopsy.coreutils.MessageNotifyUtil;
import org.sleuthkit.autopsy.coreutils.StopWatch;
import org.sleuthkit.autopsy.coreutils.ThreadConfined;
import org.sleuthkit.autopsy.ingest.IngestJobContext;
import org.sleuthkit.autopsy.ingest.IngestMessage;
import org.sleuthkit.autopsy.ingest.IngestServices;
/**
* Performs periodic and final keyword searches for ingest jobs. Periodic
* searches are done in background tasks. This represents a careful working
* around of the contract for IngestModule.process(). Final searches are done
* synchronously in the calling thread, as required by the contract for
* IngestModule.shutDown().
*/
final class IngestSearchRunner {
private static final Logger logger = Logger.getLogger(IngestSearchRunner.class.getName());
private static IngestSearchRunner instance = null;
private final IngestServices services = IngestServices.getInstance();
private Ingester ingester = null;
private long currentUpdateIntervalMs;
private volatile boolean periodicSearchTaskRunning;
private volatile Future<?> periodicSearchTaskHandle;
private final ScheduledThreadPoolExecutor periodicSearchTaskExecutor;
private static final int NUM_SEARCH_SCHEDULING_THREADS = 1;
private static final String SEARCH_SCHEDULER_THREAD_NAME = "periodic-search-scheduling-%d";
private final Map<Long, SearchJobInfo> jobs = new ConcurrentHashMap<>(); // Ingest job ID to search job info
private final boolean usingNetBeansGUI = RuntimeProperties.runningWithGUI();
/*
* Constructs a singleton object that performs periodic and final keyword
* searches for ingest jobs. Periodic searches are done in background tasks.
* This represents a careful working around of the contract for
* IngestModule.process(). Final searches are done synchronously in the
* calling thread, as required by the contract for IngestModule.shutDown().
*/
private IngestSearchRunner() {
currentUpdateIntervalMs = ((long) KeywordSearchSettings.getUpdateFrequency().getTime()) * 60 * 1000;
ingester = Ingester.getDefault();
periodicSearchTaskExecutor = new ScheduledThreadPoolExecutor(NUM_SEARCH_SCHEDULING_THREADS, new ThreadFactoryBuilder().setNameFormat(SEARCH_SCHEDULER_THREAD_NAME).build());
}
/**
* Gets the ingest search runner singleton.
*
* @return The ingest search runner.
*/
public static synchronized IngestSearchRunner getInstance() {
if (instance == null) {
instance = new IngestSearchRunner();
}
return instance;
}
/**
* Starts the search job for an ingest job.
*
* @param jobContext The ingest job context.
* @param keywordListNames The names of the keyword search lists for the
* ingest job.
*/
public synchronized void startJob(IngestJobContext jobContext, List<String> keywordListNames) {
long jobId = jobContext.getJobId();
if (jobs.containsKey(jobId) == false) {
SearchJobInfo jobData = new SearchJobInfo(jobContext, keywordListNames);
jobs.put(jobId, jobData);
}
/*
* Keep track of the number of keyword search file ingest modules that
* are doing analysis for the ingest job, i.e., that have called this
* method. This is needed by endJob().
*/
jobs.get(jobId).incrementModuleReferenceCount();
/*
* Start a periodic search task in the
*/
if ((jobs.size() > 0) && (periodicSearchTaskRunning == false)) {
currentUpdateIntervalMs = ((long) KeywordSearchSettings.getUpdateFrequency().getTime()) * 60 * 1000;
periodicSearchTaskHandle = periodicSearchTaskExecutor.schedule(new PeriodicSearchTask(), currentUpdateIntervalMs, MILLISECONDS);
periodicSearchTaskRunning = true;
}
}
/**
* Finishes a search job for an ingest job.
*
* @param jobId The ingest job ID.
*/
public synchronized void endJob(long jobId) {
/*
* Only complete the job if this is the last keyword search file ingest
* module doing annalysis for this job.
*/
SearchJobInfo job;
job = jobs.get(jobId);
if (job == null) {
return; // RJCTODO: SEVERE
}
if (job.decrementModuleReferenceCount() != 0) {
jobs.remove(jobId);
}
/*
* Commit the index and do the final search. The final search is done in
* the ingest thread that shutDown() on the keyword search file ingest
* module, per the contract of IngestModule.shutDwon().
*/
logger.log(Level.INFO, "Commiting search index before final search for search job {0}", job.getJobId()); //NON-NLS
commit();
logger.log(Level.INFO, "Starting final search for search job {0}", job.getJobId()); //NON-NLS
doFinalSearch(job);
logger.log(Level.INFO, "Final search for search job {0} completed", job.getJobId()); //NON-NLS
if (jobs.isEmpty()) {
cancelPeriodicSearchSchedulingTask();
}
}
/**
* Stops the search job for an ingest job.
*
* @param jobId The ingest job ID.
*/
public synchronized void stopJob(long jobId) {
logger.log(Level.INFO, "Stopping search job {0}", jobId); //NON-NLS
commit();
SearchJobInfo job;
job = jobs.get(jobId);
if (job == null) {
return;
}
/*
* Request cancellation of the current keyword search, whether it is a
* preiodic search or a final search.
*/
IngestSearchRunner.Searcher currentSearcher = job.getCurrentSearcher();
if ((currentSearcher != null) && (!currentSearcher.isDone())) {
logger.log(Level.INFO, "Cancelling search job {0}", jobId); //NON-NLS
currentSearcher.cancel(true);
}
jobs.remove(jobId);
if (jobs.isEmpty()) {
cancelPeriodicSearchSchedulingTask();
}
}
/**
* Adds the given keyword list names to the set of keyword lists to be
* searched by ALL keyword search jobs. This supports adding one or more
* keyword search lists to ingest jobs already in progress.
*
* @param keywordListNames The n ames of the additional keyword lists.
*/
public synchronized void addKeywordListsToAllJobs(List<String> keywordListNames) {
for (String listName : keywordListNames) {
logger.log(Level.INFO, "Adding keyword list {0} to all jobs", listName); //NON-NLS
for (SearchJobInfo j : jobs.values()) {
j.addKeywordListName(listName);
}
}
}
/**
* Commits the Solr index for the current case and publishes an event
* indicating the current number of indexed items (this is no longer just
* files).
*/
private void commit() {
ingester.commit();
/*
* Publish an event advertising the number of indexed items. Note that
* this is no longer the number of indexed files, since the text of many
* items in addition to files is indexed.
*/
try {
final int numIndexedFiles = KeywordSearch.getServer().queryNumIndexedFiles();
KeywordSearch.fireNumIndexedFilesChange(null, numIndexedFiles);
} catch (NoOpenCoreException | KeywordSearchModuleException ex) {
logger.log(Level.SEVERE, "Error executing Solr query for number of indexed files", ex); //NON-NLS
}
}
/**
* Performs the final keyword search for an ingest job. The search is done
* synchronously, as required by the contract for IngestModule.shutDown().
*
* @param job The keyword search job info.
*/
private void doFinalSearch(SearchJobInfo job) {
if (!job.getKeywordListNames().isEmpty()) {
try {
/*
* Wait for any periodic searches being done in a SwingWorker
* pool thread to finish.
*/
job.waitForCurrentWorker();
IngestSearchRunner.Searcher finalSearcher = new IngestSearchRunner.Searcher(job, true);
job.setCurrentSearcher(finalSearcher);
/*
* Do the final search synchronously on the current ingest
* thread, per the contract specified
*/
finalSearcher.doInBackground();
} catch (InterruptedException | CancellationException ex) {
logger.log(Level.INFO, "Final search for search job {0} interrupted or cancelled", job.getJobId()); //NON-NLS
} catch (Exception ex) {
logger.log(Level.SEVERE, String.format("Final search for search job %d failed", job.getJobId()), ex); //NON-NLS
}
}
}
/**
* Cancels the current periodic search scheduling task.
*/
private synchronized void cancelPeriodicSearchSchedulingTask() {
if (periodicSearchTaskHandle != null) {
logger.log(Level.INFO, "No more search jobs, stopping periodic search scheduling"); //NON-NLS
periodicSearchTaskHandle.cancel(true);
periodicSearchTaskRunning = false;
}
}
/**
* Task that runs in ScheduledThreadPoolExecutor to periodically start and
* wait for keyword search tasks for each keyword search job in progress.
* The keyword search tasks for individual ingest jobs are implemented as
* SwingWorkers to support legacy APIs.
*/
private final class PeriodicSearchTask implements Runnable {
@Override
public void run() {
/*
* If there are no more jobs or this task has been cancelled, exit.
*/
if (jobs.isEmpty() || periodicSearchTaskHandle.isCancelled()) {
logger.log(Level.INFO, "Periodic search scheduling task has been cancelled, exiting"); //NON-NLS
periodicSearchTaskRunning = false;
return;
}
/*
* Commit the Solr index for the current case before doing the
* searches.
*/
commit();
/*
* Do a keyword search for each ingest job in progress. When the
* searches are done, recalculate the "hold off" time between
* searches to prevent back-to-back periodic searches and schedule
* the nect periodic search task.
*/
final StopWatch stopWatch = new StopWatch();
stopWatch.start();
for (Iterator<Entry<Long, SearchJobInfo>> iterator = jobs.entrySet().iterator(); iterator.hasNext();) {
SearchJobInfo job = iterator.next().getValue();
if (periodicSearchTaskHandle.isCancelled()) {
logger.log(Level.INFO, "Periodic search scheduling task has been cancelled, exiting"); //NON-NLS
periodicSearchTaskRunning = false;
return;
}
if (!job.getKeywordListNames().isEmpty() && !job.isWorkerRunning()) {
logger.log(Level.INFO, "Starting periodic search for search job {0}", job.getJobId());
Searcher searcher = new Searcher(job, false);
job.setCurrentSearcher(searcher);
searcher.execute();
job.setWorkerRunning(true);
try {
searcher.get();
} catch (InterruptedException | ExecutionException ex) {
logger.log(Level.SEVERE, String.format("Error performing keyword search for ingest job %d", job.getJobId()), ex); //NON-NLS
services.postMessage(IngestMessage.createErrorMessage(
KeywordSearchModuleFactory.getModuleName(),
NbBundle.getMessage(this.getClass(), "SearchRunner.Searcher.done.err.msg"), ex.getMessage()));
} catch (java.util.concurrent.CancellationException ex) {
logger.log(Level.SEVERE, String.format("Keyword search for ingest job %d cancelled", job.getJobId()), ex); //NON-NLS
}
}
}
stopWatch.stop();
logger.log(Level.INFO, "Periodic searches for all ingest jobs cumulatively took {0} secs", stopWatch.getElapsedTimeSecs()); //NON-NLS
recalculateUpdateIntervalTime(stopWatch.getElapsedTimeSecs()); // ELDEBUG
periodicSearchTaskHandle = periodicSearchTaskExecutor.schedule(new PeriodicSearchTask(), currentUpdateIntervalMs, MILLISECONDS);
}
/**
* Sets the time interval between periodic keyword searches to avoid
* running back-to-back searches. If the most recent round of searches
* took longer that 1/4 of the current interval, doubles the interval.
*
* @param lastSerchTimeSec The time in seconds used to execute the most
* recent round of keword searches.
*/
private void recalculateUpdateIntervalTime(long lastSerchTimeSec) {
if (lastSerchTimeSec * 1000 < currentUpdateIntervalMs / 4) {
return;
}
currentUpdateIntervalMs *= 2;
logger.log(Level.WARNING, "Last periodic search took {0} sec. Increasing search interval to {1} sec", new Object[]{lastSerchTimeSec, currentUpdateIntervalMs / 1000});
}
}
/**
* A data structure to keep track of the keyword lists, current results, and
* search running status for an ingest job.
*/
private class SearchJobInfo {
private final IngestJobContext jobContext;
private final long jobId;
private final long dataSourceId;
private volatile boolean workerRunning;
@GuardedBy("this")
private final List<String> keywordListNames;
@GuardedBy("this")
private final Map<Keyword, Set<Long>> currentResults; // Keyword to object IDs of items with hits
private IngestSearchRunner.Searcher currentSearcher;
private final AtomicLong moduleReferenceCount = new AtomicLong(0);
private final Object finalSearchLock = new Object();
private SearchJobInfo(IngestJobContext jobContext, List<String> keywordListNames) {
this.jobContext = jobContext;
jobId = jobContext.getJobId();
dataSourceId = jobContext.getDataSource().getId();
this.keywordListNames = new ArrayList<>(keywordListNames);
currentResults = new HashMap<>();
workerRunning = false;
currentSearcher = null;
}
private IngestJobContext getJobContext() {
return jobContext;
}
private long getJobId() {
return jobId;
}
private long getDataSourceId() {
return dataSourceId;
}
private synchronized List<String> getKeywordListNames() {
return new ArrayList<>(keywordListNames);
}
private synchronized void addKeywordListName(String keywordListName) {
if (!keywordListNames.contains(keywordListName)) {
keywordListNames.add(keywordListName);
}
}
private synchronized Set<Long> currentKeywordResults(Keyword k) {
return currentResults.get(k);
}
private synchronized void addKeywordResults(Keyword k, Set<Long> resultsIDs) {
currentResults.put(k, resultsIDs);
}
private boolean isWorkerRunning() {
return workerRunning;
}
private void setWorkerRunning(boolean flag) {
workerRunning = flag;
}
private synchronized IngestSearchRunner.Searcher getCurrentSearcher() {
return currentSearcher;
}
private synchronized void setCurrentSearcher(IngestSearchRunner.Searcher searchRunner) {
currentSearcher = searchRunner;
}
private void incrementModuleReferenceCount() {
moduleReferenceCount.incrementAndGet();
}
private long decrementModuleReferenceCount() {
return moduleReferenceCount.decrementAndGet();
}
/**
* Waits for the current search task to complete.
*
* @throws InterruptedException
*/
private void waitForCurrentWorker() throws InterruptedException {
synchronized (finalSearchLock) {
while (workerRunning) {
logger.log(Level.INFO, String.format("Waiting for previous search task for job %d to finish", jobId)); //NON-NLS
finalSearchLock.wait();
logger.log(Level.INFO, String.format("Notified previous search task for job %d to finish", jobId)); //NON-NLS
}
}
}
/**
* Signals any threads waiting on the current search task to complete.
*/
private void searchNotify() {
synchronized (finalSearchLock) {
workerRunning = false;
finalSearchLock.notify();
}
}
}
/*
* A SwingWorker responsible for searching the Solr index of the current
* case for the keywords for an ingest job. Keyword hit analysis results are
* created and posted to the blackboard and notifications are sent to the
* ingest inbox.
*/
private final class Searcher extends SwingWorker<Object, Void> {
/*
* Searcher has private copies/snapshots of the lists and keywords
*/
private final SearchJobInfo job;
private final List<Keyword> keywords; //keywords to search
private final List<String> keywordListNames; // lists currently being searched
private final List<KeywordList> keywordLists;
private final Map<Keyword, KeywordList> keywordToList; //keyword to list name mapping
@ThreadConfined(type = ThreadConfined.ThreadType.AWT)
private ProgressHandle progressIndicator;
private boolean finalRun = false;
Searcher(SearchJobInfo job, boolean finalRun) {
this.job = job;
this.finalRun = finalRun;
keywordListNames = job.getKeywordListNames();
keywords = new ArrayList<>();
keywordToList = new HashMap<>();
keywordLists = new ArrayList<>();
}
@Override
@Messages("SearchRunner.query.exception.msg=Error performing query:")
protected Object doInBackground() throws Exception {
try {
if (usingNetBeansGUI) {
/*
* If running in the NetBeans thick client application
* version of Autopsy, NetBeans progress handles (i.e.,
* progress bars) are used to display search progress in the
* lower right hand corner of the main application window.
*
* A layer of abstraction to allow alternate representations
* of progress could be used here, as it is in other places
* in the application (see implementations and usage of
* org.sleuthkit.autopsy.progress.ProgressIndicator
* interface), to better decouple keyword search from the
* application's presentation layer.
*/
SwingUtilities.invokeAndWait(() -> {
final String displayName = NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.doInBackGround.displayName")
+ (finalRun ? (" - " + NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.doInBackGround.finalizeMsg")) : "");
progressIndicator = ProgressHandle.createHandle(displayName, new Cancellable() {
@Override
public boolean cancel() {
if (progressIndicator != null) {
progressIndicator.setDisplayName(displayName + " " + NbBundle.getMessage(this.getClass(), "SearchRunner.doInBackGround.cancelMsg"));
}
logger.log(Level.INFO, "Search cancelled by user"); //NON-NLS
new Thread(() -> {
IngestSearchRunner.Searcher.this.cancel(true);
}).start();
return true;
}
});
progressIndicator.start();
progressIndicator.switchToIndeterminate();
});
}
updateKeywords();
for (Keyword keyword : keywords) {
if (isCancelled() || job.getJobContext().fileIngestIsCancelled()) {
logger.log(Level.INFO, "Cancellation requested, exiting before new keyword processed: {0}", keyword.getSearchTerm()); //NON-NLS
return null;
}
KeywordList keywordList = keywordToList.get(keyword);
if (usingNetBeansGUI) {
String searchTermStr = keyword.getSearchTerm();
if (searchTermStr.length() > 50) {
searchTermStr = searchTermStr.substring(0, 49) + "...";
}
final String progressMessage = keywordList.getName() + ": " + searchTermStr;
SwingUtilities.invokeLater(() -> {
progressIndicator.progress(progressMessage);
});
}
// Filtering
//limit search to currently ingested data sources
//set up a filter with 1 or more image ids OR'ed
KeywordSearchQuery keywordSearchQuery = KeywordSearchUtil.getQueryForKeyword(keyword, keywordList);
KeywordQueryFilter dataSourceFilter = new KeywordQueryFilter(KeywordQueryFilter.FilterType.DATA_SOURCE, job.getDataSourceId());
keywordSearchQuery.addFilter(dataSourceFilter);
// Do the actual search
QueryResults queryResults;
try {
queryResults = keywordSearchQuery.performQuery();
} catch (KeywordSearchModuleException | NoOpenCoreException ex) {
logger.log(Level.SEVERE, "Error performing query: " + keyword.getSearchTerm(), ex); //NON-NLS
if (usingNetBeansGUI) {
final String userMessage = Bundle.SearchRunner_query_exception_msg() + keyword.getSearchTerm();
SwingUtilities.invokeLater(() -> {
MessageNotifyUtil.Notify.error(userMessage, ex.getCause().getMessage());
});
}
//no reason to continue with next query if recovery failed
//or wait for recovery to kick in and run again later
//likely case has closed and threads are being interrupted
return null;
} catch (CancellationException e) {
logger.log(Level.INFO, "Cancellation requested, exiting during keyword query: {0}", keyword.getSearchTerm()); //NON-NLS
return null;
}
// Reduce the results of the query to only those hits we
// have not already seen.
QueryResults newResults = filterResults(queryResults);
if (!newResults.getKeywords().isEmpty()) {
// Create blackboard artifacts
newResults.process(this, keywordList.getIngestMessages(), true, job.getJobId());
}
}
} catch (Exception ex) {
logger.log(Level.SEVERE, String.format("Error performing keyword search for ingest job %d", job.getJobId()), ex); //NON-NLS
} finally {
if (progressIndicator != null) {
SwingUtilities.invokeLater(new Runnable() {
@Override
public void run() {
progressIndicator.finish();
progressIndicator = null;
}
});
}
// In case a thread is waiting on this worker to be done
job.searchNotify();
}
return null;
}
/**
* Sync-up the updated keywords from the currently used lists in the XML
*/
private void updateKeywords() {
XmlKeywordSearchList loader = XmlKeywordSearchList.getCurrent();
keywords.clear();
keywordToList.clear();
keywordLists.clear();
for (String name : keywordListNames) {
KeywordList list = loader.getList(name);
keywordLists.add(list);
for (Keyword k : list.getKeywords()) {
keywords.add(k);
keywordToList.put(k, list);
}
}
}
/**
* This method filters out all of the hits found in earlier periodic
* searches and returns only the results found by the most recent
* search.
*
* This method will only return hits for objects for which we haven't
* previously seen a hit for the keyword.
*
* @param queryResult The results returned by a keyword search.
*
* @return A unique set of hits found by the most recent search for
* objects that have not previously had a hit. The hits will be
* for the lowest numbered chunk associated with the object.
*
*/
private QueryResults filterResults(QueryResults queryResult) {
// Create a new (empty) QueryResults object to hold the most recently
// found hits.
QueryResults newResults = new QueryResults(queryResult.getQuery());
// For each keyword represented in the results.
for (Keyword keyword : queryResult.getKeywords()) {
// These are all of the hits across all objects for the most recent search.
// This may well include duplicates of hits we've seen in earlier periodic searches.
List<KeywordHit> queryTermResults = queryResult.getResults(keyword);
// Sort the hits for this keyword so that we are always
// guaranteed to return the hit for the lowest chunk.
Collections.sort(queryTermResults);
// This will be used to build up the hits we haven't seen before
// for this keyword.
List<KeywordHit> newUniqueHits = new ArrayList<>();
// Get the set of object ids seen in the past by this searcher
// for the given keyword.
Set<Long> curTermResults = job.currentKeywordResults(keyword);
if (curTermResults == null) {
// We create a new empty set if we haven't seen results for
// this keyword before.
curTermResults = new HashSet<>();
}
// For each hit for this keyword.
for (KeywordHit hit : queryTermResults) {
if (curTermResults.contains(hit.getSolrObjectId())) {
// Skip the hit if we've already seen a hit for
// this keyword in the object.
continue;
}
// We haven't seen the hit before so add it to list of new
// unique hits.
newUniqueHits.add(hit);
// Add the object id to the results we've seen for this
// keyword.
curTermResults.add(hit.getSolrObjectId());
}
// Update the job with the list of objects for which we have
// seen hits for the current keyword.
job.addKeywordResults(keyword, curTermResults);
// Add the new hits for the current keyword into the results
// to be returned.
newResults.addResult(keyword, newUniqueHits);
}
return newResults;
}
}
}

View File

@ -19,9 +19,14 @@
package org.sleuthkit.autopsy.keywordsearch;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.logging.Level;
@ -29,9 +34,9 @@ import org.apache.commons.lang3.math.NumberUtils;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.common.SolrInputDocument;
import org.openide.util.NbBundle;
import org.openide.util.io.ReaderInputStream;
import org.sleuthkit.autopsy.coreutils.Logger;
import org.sleuthkit.autopsy.coreutils.TimeZoneUtils;
import org.sleuthkit.autopsy.datamodel.ContentUtils;
import org.sleuthkit.autopsy.healthmonitor.HealthMonitor;
import org.sleuthkit.autopsy.healthmonitor.TimingMetric;
import org.sleuthkit.autopsy.ingest.IngestJobContext;
@ -146,10 +151,10 @@ class Ingester {
* @throws org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException
*/
// TODO (JIRA-3118): Cancelled text indexing does not propagate cancellation to clients
< T extends SleuthkitVisitableItem> boolean indexText(Reader sourceReader, long sourceID, String sourceName, T source, IngestJobContext context) throws Ingester.IngesterException {
boolean doLanguageDetection = true;
return indexText(sourceReader, sourceID, sourceName, source, context, doLanguageDetection);
}
// < T extends SleuthkitVisitableItem> boolean search(Reader sourceReader, long sourceID, String sourceName, T source, IngestJobContext context, boolean indexIntoSolr, List<String> keywordListNames) throws Ingester.IngesterException {
// boolean doLanguageDetection = true;
// return search(sourceReader, sourceID, sourceName, source, context, doLanguageDetection, indexIntoSolr, keywordListNames);
// }
/**
* Read and chunk the source text for indexing in Solr. Does NOT perform
@ -170,11 +175,17 @@ class Ingester {
* @throws org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException
*/
// TODO (JIRA-3118): Cancelled text indexing does not propagate cancellation to clients
< T extends SleuthkitVisitableItem> boolean indexStrings(Reader sourceReader, long sourceID, String sourceName, T source, IngestJobContext context) throws Ingester.IngesterException {
// Per JIRA-7100, it was determined that language detection on extracted strings can take a really long time.
boolean doLanguageDetection = false;
return indexText(sourceReader, sourceID, sourceName, source, context, doLanguageDetection);
}
// < T extends SleuthkitVisitableItem> boolean searchStrings(Reader sourceReader, long sourceID, String sourceName, T source, IngestJobContext context, boolean indexIntoSolr) throws Ingester.IngesterException {
// // Per JIRA-7100, it was determined that language detection on extracted strings can take a really long time.
// boolean doLanguageDetection = false;
// return search(sourceReader, sourceID, sourceName, source, context, doLanguageDetection, indexIntoSolr, null);
// }
//
// < T extends SleuthkitVisitableItem> boolean searchStrings(Reader sourceReader, long sourceID, String sourceName, T source, IngestJobContext context, boolean indexIntoSolr, List<String> keywordListNames) throws Ingester.IngesterException {
// // Per JIRA-7100, it was determined that language detection on extracted strings can take a really long time.
// boolean doLanguageDetection = false;
// return search(sourceReader, sourceID, sourceName, source, context, doLanguageDetection, indexIntoSolr, keywordListNames);
// }
/**
* Read and chunk the source text for indexing in Solr.
@ -195,25 +206,30 @@ class Ingester {
* @throws org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException
*/
// TODO (JIRA-3118): Cancelled text indexing does not propagate cancellation to clients
private < T extends SleuthkitVisitableItem> boolean indexText(Reader sourceReader, long sourceID, String sourceName, T source, IngestJobContext context, boolean doLanguageDetection) throws Ingester.IngesterException {
< T extends SleuthkitVisitableItem> void search(Reader sourceReader, long sourceID, String sourceName, T source, IngestJobContext context, boolean doLanguageDetection, boolean indexIntoSolr, List<String> keywordListNames) throws Ingester.IngesterException, IOException, TskCoreException, Exception {
int numChunks = 0; //unknown until chunking is done
Map<String, String> contentFields = Collections.unmodifiableMap(getContentFields(source));
Optional<Language> language = Optional.empty();
InlineSearcher searcher = new InlineSearcher(keywordListNames, context);
List<Chunk> activeChunkList = new ArrayList<>();
boolean fileIndexed = false;
//Get a reader for the content of the given source
try (BufferedReader reader = new BufferedReader(sourceReader)) {
Chunker chunker = new Chunker(reader);
String name = sourceName;
if(!(source instanceof BlackboardArtifact)) {
searcher.searchString(name, sourceID, 0);
}
while (chunker.hasNext()) {
if (context != null && context.fileIngestIsCancelled()) {
if ( context.fileIngestIsCancelled()) {
logger.log(Level.INFO, "File ingest cancelled. Cancelling keyword search indexing of {0}", sourceName);
return false;
return;
}
Chunk chunk = chunker.next();
Map<String, Object> fields = new HashMap<>(contentFields);
String chunkId = Server.getChunkIdString(sourceID, numChunks + 1);
fields.put(Server.Schema.ID.toString(), chunkId);
fields.put(Server.Schema.CHUNK_SIZE.toString(), String.valueOf(chunk.getBaseChunkLength()));
chunk.setChunkId(numChunks+1);
if (doLanguageDetection) {
int size = Math.min(chunk.getBaseChunkLength(), LANGUAGE_DETECTION_STRING_SIZE);
@ -222,15 +238,108 @@ class Ingester {
// only do language detection on the first chunk of the document
doLanguageDetection = false;
}
if(keywordListNames != null) {
boolean hitFoundInChunk = searcher.searchChunk(chunk, sourceID, numChunks);
if(!indexIntoSolr) {
if(!hitFoundInChunk) {
if(!activeChunkList.isEmpty() ) {
if(activeChunkList.get(activeChunkList.size() - 1).hasHit()) {
activeChunkList.add(chunk);
// Write List
for(Chunk c: activeChunkList) {
indexChunk(c, sourceID, sourceName, language, contentFields, chunker.hasNext());
}
activeChunkList.clear();
} else {
activeChunkList.clear();
activeChunkList.add(chunk);
}
} else {
activeChunkList.add(chunk);
}
} else {
fileIndexed = true;
chunk.setHasHit(true);
activeChunkList.add(chunk);
}
} else {
indexChunk(chunk, sourceID, sourceName, language, contentFields, chunker.hasNext());
}
}
numChunks++;
}
if(activeChunkList.size() > 1 || (activeChunkList.size() == 1 && activeChunkList.get(0).hasHit())) {
for(Chunk c: activeChunkList) {
indexChunk(c, sourceID, sourceName, language, contentFields, true);
}
}
if (chunker.hasException()) {
logger.log(Level.WARNING, "Error chunking content from " + sourceID + ": " + sourceName, chunker.getException());
throw chunker.getException();
}
} finally {
if (context.fileIngestIsCancelled()) {
return ;
}
if (fileIndexed) {
Map<String, Object> fields = new HashMap<>(contentFields);
//after all chunks, index just the meta data, including the numChunks, of the parent file
fields.put(Server.Schema.NUM_CHUNKS.toString(), Integer.toString(numChunks));
//reset id field to base document id
fields.put(Server.Schema.ID.toString(), Long.toString(sourceID));
//"parent" docs don't have chunk_size
fields.remove(Server.Schema.CHUNK_SIZE.toString());
indexChunk(null, null, sourceName, fields);
}
}
}
< T extends SleuthkitVisitableItem> boolean indexFile(Reader sourceReader, long sourceID, String sourceName, T source, IngestJobContext context, boolean doLanguageDetection) throws Ingester.IngesterException {
int numChunks = 0; //unknown until chunking is done
Map<String, String> contentFields = Collections.unmodifiableMap(getContentFields(source));
Optional<Language> language = Optional.empty();
//Get a reader for the content of the given source
try (BufferedReader reader = new BufferedReader(sourceReader)) {
Chunker chunker = new Chunker(reader);
while (chunker.hasNext()) {
if ( context.fileIngestIsCancelled()) {
logger.log(Level.INFO, "File ingest cancelled. Cancelling keyword search indexing of {0}", sourceName);
return false;
}
Chunk chunk = chunker.next();
if (doLanguageDetection) {
int size = Math.min(chunk.getBaseChunkLength(), LANGUAGE_DETECTION_STRING_SIZE);
language = languageSpecificContentIndexingHelper.detectLanguageIfNeeded(chunk.toString().substring(0, size));
// only do language detection on the first chunk of the document
doLanguageDetection = false;
}
Map<String, Object> fields = new HashMap<>(contentFields);
String chunkId = Server.getChunkIdString(sourceID, numChunks + 1);
fields.put(Server.Schema.ID.toString(), chunkId);
fields.put(Server.Schema.CHUNK_SIZE.toString(), String.valueOf(chunk.getBaseChunkLength()));
language.ifPresent(lang -> languageSpecificContentIndexingHelper.updateLanguageSpecificFields(fields, chunk, lang));
try {
//add the chunk text to Solr index
indexChunk(chunk.toString(), chunk.geLowerCasedChunk(), sourceName, fields);
indexChunk(chunk.toString(), chunk.getLowerCasedChunk(), sourceName, fields);
// add mini chunk when there's a language specific field
if (chunker.hasNext() && language.isPresent()) {
languageSpecificContentIndexingHelper.indexMiniChunk(chunk, sourceName, new HashMap<>(contentFields), chunkId, language.get());
}
numChunks++;
numChunks++;
} catch (Ingester.IngesterException ingEx) {
logger.log(Level.WARNING, "Ingester had a problem with extracted string from file '" //NON-NLS
+ sourceName + "' (id: " + sourceID + ").", ingEx);//NON-NLS
@ -242,13 +351,14 @@ class Ingester {
logger.log(Level.WARNING, "Error chunking content from " + sourceID + ": " + sourceName, chunker.getException());
return false;
}
} catch (Exception ex) {
logger.log(Level.WARNING, "Unexpected error, can't read content stream from " + sourceID + ": " + sourceName, ex);//NON-NLS
return false;
} finally {
if (context != null && context.fileIngestIsCancelled()) {
if (context.fileIngestIsCancelled()) {
return false;
} else {
} else {
Map<String, Object> fields = new HashMap<>(contentFields);
//after all chunks, index just the meta data, including the numChunks, of the parent file
fields.put(Server.Schema.NUM_CHUNKS.toString(), Integer.toString(numChunks));
@ -259,9 +369,35 @@ class Ingester {
indexChunk(null, null, sourceName, fields);
}
}
return true;
}
private void indexChunk(Chunk chunk, long sourceID, String sourceName, Optional<Language> language, Map<String, String> contentFields, boolean hasNext) throws IngesterException {
Map<String, Object> fields = new HashMap<>(contentFields);
String chunkId = Server.getChunkIdString(sourceID, chunk.getChunkId());
fields.put(Server.Schema.ID.toString(), chunkId);
fields.put(Server.Schema.CHUNK_SIZE.toString(), String.valueOf(chunk.getBaseChunkLength()));
language.ifPresent(lang -> languageSpecificContentIndexingHelper.updateLanguageSpecificFields(fields, chunk, lang));
try {
//add the chunk text to Solr index
indexChunk(chunk.toString(), chunk.getLowerCasedChunk(), sourceName, fields);
// add mini chunk when there's a language specific field
if (hasNext && language.isPresent()) {
languageSpecificContentIndexingHelper.indexMiniChunk(chunk, sourceName, new HashMap<>(contentFields), chunkId, language.get());
}
} catch (Ingester.IngesterException ingEx) {
logger.log(Level.WARNING, "Ingester had a problem with extracted string from file '" //NON-NLS
+ sourceName + "' (id: " + sourceID + ").", ingEx);//NON-NLS
throw ingEx; //need to rethrow to signal error and move on
}
}
/**
* Add one chunk as to the Solr index as a separate Solr document.
*

View File

@ -0,0 +1,614 @@
/*
* Autopsy Forensic Browser
*
* Copyright 2022 Basis Technology Corp.
* Contact: carrier <at> sleuthkit <dot> org
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.sleuthkit.autopsy.keywordsearch;
import com.twelvemonkeys.lang.StringUtil;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.ConcurrentHashMap;
import java.util.logging.Level;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.validator.routines.DomainValidator;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.sleuthkit.autopsy.casemodule.Case;
import org.sleuthkit.autopsy.casemodule.NoCurrentCaseException;
import org.sleuthkit.autopsy.coreutils.Logger;
import org.sleuthkit.autopsy.ingest.IngestJobContext;
import org.sleuthkit.autopsy.keywordsearch.Chunker.Chunk;
import static org.sleuthkit.autopsy.keywordsearch.RegexQuery.CREDIT_CARD_NUM_PATTERN;
import org.sleuthkit.datamodel.Blackboard;
import org.sleuthkit.datamodel.BlackboardArtifact;
import org.sleuthkit.datamodel.BlackboardAttribute;
import org.sleuthkit.datamodel.Content;
import org.sleuthkit.datamodel.SleuthkitCase;
import org.sleuthkit.datamodel.TskCoreException;
import org.sleuthkit.datamodel.TskException;
final class InlineSearcher {
private final List<KeywordList> keywordList;
private static final int MIN_EMAIL_ADDR_LENGTH = 8;
private static final Logger logger = Logger.getLogger(InlineSearcher.class.getName());
private final IngestJobContext context;
static final Map<Long, List<UniqueKeywordHit>> uniqueHitMap = new ConcurrentHashMap<>();
static final Map<Long, Map<Long, Map<Keyword, Map<Keyword, List<UniqueKeywordHit>>>>> uniqueHitMap2 = new ConcurrentHashMap<>();
// Uses mostly native java and the lucene api to search the a given chuck
// for Keywords. Create unique KeywordHits for any unique hit.
InlineSearcher(List<String> keywordListNames, IngestJobContext context) {
this.keywordList = new ArrayList<>();
this.context = context;
if (keywordListNames != null) {
XmlKeywordSearchList loader = XmlKeywordSearchList.getCurrent();
for (String name : keywordListNames) {
keywordList.add(loader.getList(name));
}
}
}
/**
* Search the chunk for the currently selected keywords.
*
* @param chunk
* @param sourceID
*
* @throws TskCoreException
*/
boolean searchChunk(Chunk chunk, long sourceID, int chunkId) throws TskCoreException {
return searchString(chunk.getLowerCasedChunk(), sourceID, chunkId);
}
/**
* Search a string for the currently selected keywords.
*
* @param text
* @param sourceID
*
* @throws TskCoreException
*/
boolean searchString(String text, long sourceID, int chunkId) throws TskCoreException {
boolean hitFound = false;
Map<Keyword, Map<Keyword, List<UniqueKeywordHit>>> hitByKeyword = getMap(context.getJobId(), sourceID);
for (KeywordList list : keywordList) {
List<Keyword> keywords = list.getKeywords();
for (Keyword originalKeyword : keywords) {
Map<Keyword, List<UniqueKeywordHit>> hitMap = hitByKeyword.get(originalKeyword);
if (hitMap == null) {
hitMap = new HashMap<>();
hitByKeyword.put(originalKeyword, hitMap);
}
List<UniqueKeywordHit> keywordHits = new ArrayList<>();
if (originalKeyword.searchTermIsLiteral()) {
if (StringUtil.containsIgnoreCase(text, originalKeyword.getSearchTerm())) {
keywordHits.addAll(createKeywordHits(text, originalKeyword, sourceID, chunkId, list.getName()));
}
} else {
String regex = originalKeyword.getSearchTerm();
try {
// validate the regex
Pattern pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE);
Matcher matcher = pattern.matcher(text);
if (matcher.find()) {
keywordHits.addAll(createKeywordHits(text, originalKeyword, sourceID, chunkId, list.getName()));
}
} catch (IllegalArgumentException ex) {
//TODO What should we do here? Log and continue?
}
}
if (!keywordHits.isEmpty()) {
hitFound = true;
for (UniqueKeywordHit hit : keywordHits) {
Keyword keywordCopy = new Keyword(hit.getHit(),
originalKeyword.searchTermIsLiteral(),
originalKeyword.searchTermIsWholeWord(),
list.getName(),
originalKeyword.getOriginalTerm());
List<UniqueKeywordHit> mapHitList = hitMap.get(keywordCopy);
if (mapHitList == null) {
mapHitList = new ArrayList<>();
hitMap.put(keywordCopy, mapHitList);
}
if (!mapHitList.contains(hit)) {
mapHitList.add(hit);
}
}
}
if (context.fileIngestIsCancelled()) {
return hitFound;
}
}
}
return hitFound;
}
/**
* This method very similar to RegexQuery createKeywordHits, with the
* knowledge of solr removed.
*
* @param text
* @param originalKeyword
*
* @return A list of KeywordHit objects.
*
* @throws TskCoreException
*/
private List<UniqueKeywordHit> createKeywordHits(String text, Keyword originalKeyword, long sourceID, int chunkId, String keywordListName) throws TskCoreException {
if (originalKeyword.searchTermIsLiteral() && originalKeyword.searchTermIsWholeWord()) {
try {
return getExactMatchHits(text, originalKeyword, sourceID, chunkId, keywordListName);
} catch (IOException ex) {
throw new TskCoreException("Failed to create exactMatch hits", ex);
}
}
final HashMap<String, String> keywordsFoundInThisDocument = new HashMap<>();
List<UniqueKeywordHit> hits = new ArrayList<>();
String keywordString = originalKeyword.getSearchTerm();
boolean queryStringContainsWildcardSuffix = originalKeyword.getSearchTerm().endsWith(".*");
String searchPattern;
if (originalKeyword.searchTermIsLiteral()) {
/**
* For substring searches, the following pattern was arrived at
* through trial and error in an attempt to reproduce the same hits
* we were getting when we were using the TermComponent approach.
* This basically looks for zero of more word characters followed
* optionally by a dot or apostrophe, followed by the quoted
* lowercase substring following by zero or more word characters
* followed optionally by a dot or apostrophe. The reason that the
* dot and apostrophe characters are being handled here is because
* the old code used to find hits in domain names (e.g. hacks.ie)
* and possessives (e.g. hacker's). This obviously works for English
* but is probably not sufficient for other languages.
*/
searchPattern = "[\\w[\\.']]*" + java.util.regex.Pattern.quote(keywordString.toLowerCase()) + "[\\w[\\.']]*";
} else {
searchPattern = keywordString;
}
final java.util.regex.Pattern pattern = java.util.regex.Pattern.compile(searchPattern, Pattern.CASE_INSENSITIVE);
try {
String content = text;
Matcher hitMatcher = pattern.matcher(content);
int offset = 0;
while (hitMatcher.find(offset)) {
String hit = hitMatcher.group().toLowerCase();
/**
* No need to continue on if the the string is "" nothing to
* find or do.
*/
if ("".equals(hit)) {
break;
}
offset = hitMatcher.end();
final BlackboardAttribute.ATTRIBUTE_TYPE artifactAttributeType = originalKeyword.getArtifactAttributeType();
// We attempt to reduce false positives for phone numbers and IP address hits
// by querying Solr for hits delimited by a set of known boundary characters.
// See KeywordSearchList.PHONE_NUMBER_REGEX for an example.
// Because of this the hits may contain an extra character at the beginning or end that
// needs to be chopped off, unless the user has supplied their own wildcard suffix
// as part of the regex.
if (!queryStringContainsWildcardSuffix
&& (artifactAttributeType == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_PHONE_NUMBER
|| artifactAttributeType == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_IP_ADDRESS)) {
if (artifactAttributeType == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_PHONE_NUMBER) {
// For phone numbers replace all non numeric characters (except "(") at the start of the hit.
hit = hit.replaceAll("^[^0-9\\(]", "");
} else {
// Replace all non numeric characters at the start of the hit.
hit = hit.replaceAll("^[^0-9]", "");
}
// Replace all non numeric at the end of the hit.
hit = hit.replaceAll("[^0-9]$", "");
if (offset > 1) {
/*
* NOTE: our IP and phone number regex patterns look for
* boundary characters immediately before and after the
* keyword hit. After a match, Java pattern mather
* re-starts at the first character not matched by the
* previous match. This basically requires two boundary
* characters to be present between each pattern match.
* To mitigate this we are resetting the offest one
* character back.
*/
offset--;
}
}
/**
* Boundary characters are removed from the start and end of the
* hit to normalize the hits. This is being done for substring
* searches only at this point. We don't do it for real regular
* expression searches because the user may have explicitly
* included boundary characters in their regular expression.
*/
if (originalKeyword.searchTermIsLiteral()) {
hit = hit.replaceAll("^" + KeywordSearchList.BOUNDARY_CHARACTERS + "*", "");
hit = hit.replaceAll(KeywordSearchList.BOUNDARY_CHARACTERS + "*$", "");
}
/**
* The use of String interning is an optimization to ensure that
* we reuse the same keyword hit String object across all hits.
* Even though we benefit from G1GC String deduplication, the
* overhead associated with creating a new String object for
* every KeywordHit can be significant when the number of hits
* gets large.
*/
hit = hit.intern();
// We will only create one KeywordHit instance per document for
// a given hit.
if (keywordsFoundInThisDocument.containsKey(hit)) {
continue;
}
keywordsFoundInThisDocument.put(hit, hit);
if (artifactAttributeType == null) {
hits.add(new UniqueKeywordHit(chunkId, sourceID, KeywordSearchUtil.makeSnippet(content, hitMatcher, hit), hit, keywordListName, originalKeyword.searchTermIsWholeWord(), originalKeyword.searchTermIsLiteral(), originalKeyword.getArtifactAttributeType(), originalKeyword.getSearchTerm()));
} else {
switch (artifactAttributeType) {
case TSK_EMAIL:
/*
* Reduce false positives by eliminating email
* address hits that are either too short or are not
* for valid top level domains.
*/
if (hit.length() >= MIN_EMAIL_ADDR_LENGTH
&& DomainValidator.getInstance(true).isValidTld(hit.substring(hit.lastIndexOf('.')))) {
hits.add(new UniqueKeywordHit(chunkId, sourceID, KeywordSearchUtil.makeSnippet(content, hitMatcher, hit), hit, keywordListName, originalKeyword.searchTermIsWholeWord(), originalKeyword.searchTermIsLiteral(), originalKeyword.getArtifactAttributeType(), originalKeyword.getSearchTerm()));
}
break;
case TSK_CARD_NUMBER:
/*
* If searching for credit card account numbers, do
* extra validation on the term and discard it if it
* does not pass.
*/
Matcher ccnMatcher = CREDIT_CARD_NUM_PATTERN.matcher(hit);
for (int rLength = hit.length(); rLength >= 12; rLength--) {
ccnMatcher.region(0, rLength);
if (ccnMatcher.find()) {
final String group = ccnMatcher.group("ccn");
if (CreditCardValidator.isValidCCN(group)) {
hits.add(new UniqueKeywordHit(chunkId, sourceID, KeywordSearchUtil.makeSnippet(content, hitMatcher, hit), hit, keywordListName, originalKeyword.searchTermIsWholeWord(), originalKeyword.searchTermIsLiteral(), originalKeyword.getArtifactAttributeType(), originalKeyword.getSearchTerm()));
}
}
}
break;
default:
hits.add(new UniqueKeywordHit(chunkId, sourceID, KeywordSearchUtil.makeSnippet(content, hitMatcher, hit), hit, keywordListName, originalKeyword.searchTermIsWholeWord(), originalKeyword.searchTermIsLiteral(), originalKeyword.getArtifactAttributeType(), originalKeyword.getSearchTerm()));
break;
}
}
}
} catch (Throwable error) {
/*
* NOTE: Matcher.find() is known to throw StackOverflowError in rare
* cases (see JIRA-2700). StackOverflowError is an error, not an
* exception, and therefore needs to be caught as a Throwable. When
* this occurs we should re-throw the error as TskCoreException so
* that it is logged by the calling method and move on to the next
* Solr document.
*/
throw new TskCoreException("Failed to create keyword hits for chunk due to " + error.getMessage());
}
return hits;
}
/**
* Clean up the memory that is being used for the given job.
*
* @param context
*/
static void cleanup(IngestJobContext context) {
Map<Long, Map<Keyword, Map<Keyword, List<UniqueKeywordHit>>>> jobMap = uniqueHitMap2.get(context.getJobId());
if (jobMap != null) {
jobMap.clear();
}
}
/**
* Generates the artifacts for the found KeywordHits. This method should be
* called once per content object.
*
* @param context
*/
static void makeArtifacts(IngestJobContext context) throws TskException {
Map<Long, Map<Keyword, Map<Keyword, List<UniqueKeywordHit>>>> jobMap = uniqueHitMap2.get(context.getJobId());
if (jobMap == null) {
return;
}
for (Map.Entry<Long, Map<Keyword, Map<Keyword, List<UniqueKeywordHit>>>> mapBySource : jobMap.entrySet()) {
Long sourceId = mapBySource.getKey();
Map<Keyword, Map<Keyword, List<UniqueKeywordHit>>> mapByKeyword = mapBySource.getValue();
for (Map.Entry<Keyword, Map<Keyword, List<UniqueKeywordHit>>> item : mapByKeyword.entrySet()) {
Keyword originalKeyword = item.getKey();
Map<Keyword, List<UniqueKeywordHit>> map = item.getValue();
List<BlackboardArtifact> hitArtifacts = new ArrayList<>();
if (!map.isEmpty()) {
for (Map.Entry<Keyword, List<UniqueKeywordHit>> entry : map.entrySet()) {
Keyword hitKeyword = entry.getKey();
List<UniqueKeywordHit> hitList = entry.getValue();
// Only create one hit for the document.
// The first hit in the list should be the first one that
// was found.
if (!hitList.isEmpty()) {
UniqueKeywordHit hit = hitList.get(0);
SleuthkitCase tskCase = Case.getCurrentCase().getSleuthkitCase();
Content content = tskCase.getContentById(hit.getContentID());
BlackboardArtifact artifact = RegexQuery.createKeywordHitArtifact(content, originalKeyword, hitKeyword, hit, hit.getSnippet(), hitKeyword.getListName(), sourceId);
// createKeywordHitArtifact has the potential to return null
// when a CCN account is created.
if (artifact != null) {
hitArtifacts.add(artifact);
}
}
}
if (!hitArtifacts.isEmpty()) {
try {
SleuthkitCase tskCase = Case.getCurrentCaseThrows().getSleuthkitCase();
Blackboard blackboard = tskCase.getBlackboard();
blackboard.postArtifacts(hitArtifacts, "KeywordSearch", context.getJobId());
hitArtifacts.clear();
} catch (NoCurrentCaseException | Blackboard.BlackboardException ex) {
logger.log(Level.SEVERE, "Failed to post KWH artifact to blackboard.", ex); //NON-NLS
}
}
if (context.fileIngestIsCancelled()) {
return;
}
}
}
}
}
/**
* Searches the chunk for exact matches and creates the appropriate keyword
* hits.
*
* @param text
* @param originalKeyword
* @param sourceID
*
* @return
*
* @throws IOException
*/
public List<UniqueKeywordHit> getExactMatchHits(String text, Keyword originalKeyword, long sourceID, int chunkId, String keywordListName) throws IOException {
final HashMap<String, String> keywordsFoundInThisDocument = new HashMap<>();
List<UniqueKeywordHit> hits = new ArrayList<>();
Analyzer analyzer = new StandardAnalyzer();
//Get the tokens of the keyword
List<String> keywordTokens = new ArrayList<>();
try (TokenStream keywordstream = analyzer.tokenStream("field", originalKeyword.getSearchTerm())) {
CharTermAttribute attr = keywordstream.addAttribute(CharTermAttribute.class);
keywordstream.reset();
while (keywordstream.incrementToken()) {
keywordTokens.add(attr.toString());
}
}
try (TokenStream stream = analyzer.tokenStream("field", text)) {
CharTermAttribute attr = stream.addAttribute(CharTermAttribute.class);
OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class);
stream.reset();
while (stream.incrementToken()) {
if (!attr.toString().equals(keywordTokens.get(0))) {
continue;
}
int startOffset = offset.startOffset();
int endOffset = offset.endOffset();
boolean match = true;
for (int index = 1; index < keywordTokens.size(); index++) {
if (stream.incrementToken()) {
if (!attr.toString().equals(keywordTokens.get(index))) {
match = false;
break;
} else {
endOffset = offset.endOffset();
}
}
}
if (match) {
String hit = text.subSequence(startOffset, endOffset).toString();
// We will only create one KeywordHit instance per document for
// a given hit.
if (keywordsFoundInThisDocument.containsKey(hit)) {
continue;
}
keywordsFoundInThisDocument.put(hit, hit);
hits.add(new UniqueKeywordHit(chunkId, sourceID, KeywordSearchUtil.makeSnippet(text, startOffset, endOffset, hit), hit, keywordListName, originalKeyword.searchTermIsWholeWord(), originalKeyword.searchTermIsLiteral(), originalKeyword.getArtifactAttributeType(), originalKeyword.getOriginalTerm()));
}
}
}
return hits;
}
/**
* Get the keyword map for the given job and source.
*
* @param jobId
* @param sourceID
*
* @return
*/
static private Map<Keyword, Map<Keyword, List<UniqueKeywordHit>>> getMap(long jobId, long sourceID) {
Map<Long, Map<Keyword, Map<Keyword, List<UniqueKeywordHit>>>> jobMap = uniqueHitMap2.get(jobId);
if (jobMap == null) {
jobMap = new ConcurrentHashMap<>();
uniqueHitMap2.put(jobId, jobMap);
}
Map<Keyword, Map<Keyword, List<UniqueKeywordHit>>> sourceMap = jobMap.get(sourceID);
if (sourceMap == null) {
sourceMap = new ConcurrentHashMap<>();
jobMap.put(sourceID, sourceMap);
}
return sourceMap;
}
// KeywordHit is not unique enough for finding duplicates, this class
// extends the KeywordHit class to make truely unique hits.
static class UniqueKeywordHit extends KeywordHit {
private final String listName;
private final boolean isLiteral;
private final boolean isWholeWord;
private final BlackboardAttribute.ATTRIBUTE_TYPE artifactAtrributeType;
private final String originalSearchTerm;
UniqueKeywordHit(int chunkId, long sourceID, String snippet, String hit, String listName, boolean isWholeWord, boolean isLiteral, BlackboardAttribute.ATTRIBUTE_TYPE artifactAtrributeType, String originalSearchTerm) {
super(chunkId, sourceID, snippet, hit);
this.listName = listName;
this.isWholeWord = isWholeWord;
this.isLiteral = isLiteral;
this.artifactAtrributeType = artifactAtrributeType;
this.originalSearchTerm = originalSearchTerm;
}
@Override
public int compareTo(KeywordHit other) {
return compare((UniqueKeywordHit) other);
}
private int compare(UniqueKeywordHit other) {
return Comparator.comparing(UniqueKeywordHit::getSolrObjectId)
.thenComparing(UniqueKeywordHit::getChunkId)
.thenComparing(UniqueKeywordHit::getHit)
.thenComparing(UniqueKeywordHit::getSnippet)
.thenComparing(UniqueKeywordHit::isWholeWord)
.thenComparing(UniqueKeywordHit::isLiteral)
.thenComparing(UniqueKeywordHit::getArtifactAtrributeType)
.thenComparing(UniqueKeywordHit::getOriginalSearchTerm)
.thenComparing(UniqueKeywordHit::getListName)
.compare(this, other);
}
@Override
public boolean equals(Object obj) {
if (null == obj) {
return false;
}
if (getClass() != obj.getClass()) {
return false;
}
final UniqueKeywordHit other = (UniqueKeywordHit) obj;
return getSnippet().equalsIgnoreCase(other.getSnippet())
&& getSolrObjectId().equals(other.getSolrObjectId())
&& getChunkId().equals(other.getChunkId())
&& getHit().equalsIgnoreCase(other.getHit())
&& listName.equalsIgnoreCase(other.getListName())
&& isLiteral == other.isLiteral()
&& isWholeWord == other.isWholeWord()
&& originalSearchTerm.equalsIgnoreCase(other.getOriginalSearchTerm())
&& (artifactAtrributeType != null ? artifactAtrributeType.equals(other.getArtifactAtrributeType()) : true);
}
@Override
public int hashCode() {
int hash = 3;
hash = 67 * hash + super.hashCode();
hash = 67 * hash + Objects.hashCode(this.listName);
hash = 67 * hash + (this.isLiteral ? 1 : 0);
hash = 67 * hash + (this.isWholeWord ? 1 : 0);
hash = 67 * hash + Objects.hashCode(this.artifactAtrributeType);
hash = 67 * hash + Objects.hashCode(this.originalSearchTerm);
return hash;
}
String getListName() {
return listName;
}
Boolean isLiteral() {
return isLiteral;
}
Boolean isWholeWord() {
return isWholeWord;
}
BlackboardAttribute.ATTRIBUTE_TYPE getArtifactAtrributeType() {
return artifactAtrributeType;
}
String getOriginalSearchTerm() {
return originalSearchTerm;
}
}
}

View File

@ -211,8 +211,10 @@ public class Keyword {
public int hashCode() {
int hash = 7;
hash = 17 * hash + this.searchTerm.hashCode();
hash = 17 * hash + this.listName.hashCode();
hash = 17 * hash + (this.isLiteral ? 1 : 0);
hash = 17 * hash + (this.isWholeWord ? 1 : 0);
hash = 17 * hash + this.originalTerm.hashCode();
return hash;
}

View File

@ -21,10 +21,12 @@ package org.sleuthkit.autopsy.keywordsearch;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.Comparator;
import java.util.Objects;
import java.util.Optional;
import org.apache.commons.lang3.StringUtils;
import org.sleuthkit.autopsy.casemodule.Case;
import org.sleuthkit.autopsy.casemodule.NoCurrentCaseException;
import org.sleuthkit.datamodel.BlackboardAttribute;
import org.sleuthkit.datamodel.SleuthkitCase;
import org.sleuthkit.datamodel.TskCoreException;
@ -68,17 +70,30 @@ class KeywordHit implements Comparable<KeywordHit> {
* documents. One contains object metadata (chunk #1) and the second and
* subsequent documents contain chunks of the text.
*/
String[] split = solrDocumentId.split(Server.CHUNK_ID_SEPARATOR);
if (split.length == 1) {
//chunk 0 has only the bare document id without the chunk id.
this.solrObjectId = Long.parseLong(solrDocumentId);
this.chunkId = 0;
if(!solrDocumentId.isEmpty()) {
String[] split = solrDocumentId.split(Server.CHUNK_ID_SEPARATOR);
if (split.length == 1) {
//chunk 0 has only the bare document id without the chunk id.
this.solrObjectId = Long.parseLong(solrDocumentId);
this.chunkId = 0;
} else {
this.solrObjectId = Long.parseLong(split[0]);
this.chunkId = Integer.parseInt(split[1]);
}
} else {
this.solrObjectId = Long.parseLong(split[0]);
this.chunkId = Integer.parseInt(split[1]);
this.solrObjectId = 0;
this.chunkId = 0;
}
}
KeywordHit(int chunkId, long sourceID, String snippet, String hit) {
this.snippet = StringUtils.stripToEmpty(snippet);
this.hit = hit;
this.chunkId = chunkId;
this.solrObjectId = sourceID;
}
String getHit() {
return hit;
}
@ -87,11 +102,11 @@ class KeywordHit implements Comparable<KeywordHit> {
return Long.toString(solrObjectId) + Server.CHUNK_ID_SEPARATOR + Long.toString(chunkId);
}
long getSolrObjectId() {
Long getSolrObjectId() {
return this.solrObjectId;
}
int getChunkId() {
Integer getChunkId() {
return this.chunkId;
}
@ -172,20 +187,25 @@ class KeywordHit implements Comparable<KeywordHit> {
return false;
}
final KeywordHit other = (KeywordHit) obj;
return this.compareTo(other) == 0;
return compareTo(other) == 0;
}
@Override
public int hashCode() {
int hash = 3;
hash = 41 * hash + (int) this.solrObjectId + this.chunkId;
int hash = 7;
hash = 37 * hash + (int) (this.solrObjectId ^ (this.solrObjectId >>> 32));
hash = 37 * hash + this.chunkId;
hash = 37 * hash + Objects.hashCode(this.snippet);
hash = 37 * hash + Objects.hashCode(this.hit);
return hash;
}
@Override
public int compareTo(KeywordHit o) {
public int compareTo(KeywordHit other) {
return Comparator.comparing(KeywordHit::getSolrObjectId)
.thenComparing(KeywordHit::getChunkId)
.compare(this, o);
.thenComparing(KeywordHit::getHit)
.thenComparing(KeywordHit::getSnippet)
.compare(this, other);
}
}

View File

@ -1,10 +1,6 @@
<?xml version="1.0" encoding="UTF-8" ?>
<Form version="1.5" maxVersion="1.7" type="org.netbeans.modules.form.forminfo.JPanelFormInfo">
<NonVisualComponents>
<Component class="javax.swing.ButtonGroup" name="timeGroup">
</Component>
</NonVisualComponents>
<AuxValues>
<AuxValue name="FormSettings_autoResourcing" type="java.lang.Integer" value="1"/>
<AuxValue name="FormSettings_autoSetComponentName" type="java.lang.Boolean" value="false"/>
@ -47,23 +43,13 @@
<EmptySpace type="separate" max="-2" attributes="0"/>
<Component id="filesIndexedValue" min="-2" max="-2" attributes="0"/>
</Group>
<Component id="frequencyLabel" alignment="0" min="-2" max="-2" attributes="0"/>
<Group type="102" alignment="0" attributes="0">
<Component id="chunksLabel" linkSize="1" min="-2" max="-2" attributes="0"/>
<EmptySpace type="separate" max="-2" attributes="0"/>
<Component id="chunksValLabel" min="-2" max="-2" attributes="0"/>
</Group>
<Group type="102" alignment="0" attributes="0">
<EmptySpace min="16" pref="16" max="-2" attributes="0"/>
<Group type="103" groupAlignment="0" attributes="0">
<Component id="timeRadioButton2" min="-2" max="-2" attributes="0"/>
<Component id="timeRadioButton1" min="-2" max="-2" attributes="0"/>
<Component id="timeRadioButton3" alignment="0" min="-2" max="-2" attributes="0"/>
<Component id="timeRadioButton4" alignment="0" min="-2" max="-2" attributes="0"/>
<Component id="timeRadioButton5" alignment="0" min="-2" max="-2" attributes="0"/>
</Group>
</Group>
</Group>
<EmptySpace min="-2" pref="132" max="-2" attributes="0"/>
</Group>
</Group>
<EmptySpace max="32767" attributes="0"/>
@ -90,19 +76,7 @@
<Component id="skipNSRLCheckBox" min="-2" max="-2" attributes="0"/>
<EmptySpace max="-2" attributes="0"/>
<Component id="showSnippetsCB" min="-2" max="-2" attributes="0"/>
<EmptySpace max="-2" attributes="0"/>
<Component id="frequencyLabel" min="-2" max="-2" attributes="0"/>
<EmptySpace max="-2" attributes="0"/>
<Component id="timeRadioButton1" min="-2" max="-2" attributes="0"/>
<EmptySpace max="-2" attributes="0"/>
<Component id="timeRadioButton2" min="-2" max="-2" attributes="0"/>
<EmptySpace max="-2" attributes="0"/>
<Component id="timeRadioButton3" min="-2" max="-2" attributes="0"/>
<EmptySpace max="-2" attributes="0"/>
<Component id="timeRadioButton4" min="-2" max="-2" attributes="0"/>
<EmptySpace max="-2" attributes="0"/>
<Component id="timeRadioButton5" min="-2" max="-2" attributes="0"/>
<EmptySpace max="-2" attributes="0"/>
<EmptySpace type="unrelated" max="-2" attributes="0"/>
<Group type="103" groupAlignment="1" attributes="0">
<Component id="informationLabel" min="-2" max="-2" attributes="0"/>
<Component id="informationSeparator" min="-2" pref="7" max="-2" attributes="0"/>
@ -119,7 +93,7 @@
</Group>
<EmptySpace type="unrelated" max="-2" attributes="0"/>
<Component id="ingestWarningLabel" min="-2" max="-2" attributes="0"/>
<EmptySpace max="32767" attributes="0"/>
<EmptySpace pref="151" max="32767" attributes="0"/>
</Group>
</Group>
</DimensionLayout>
@ -184,65 +158,6 @@
</Component>
<Component class="javax.swing.JSeparator" name="informationSeparator">
</Component>
<Component class="javax.swing.JLabel" name="frequencyLabel">
<Properties>
<Property name="text" type="java.lang.String" editor="org.netbeans.modules.i18n.form.FormI18nStringEditor">
<ResourceString bundle="org/sleuthkit/autopsy/keywordsearch/Bundle.properties" key="KeywordSearchGlobalSearchSettingsPanel.frequencyLabel.text" replaceFormat="org.openide.util.NbBundle.getMessage({sourceFileName}.class, &quot;{key}&quot;)"/>
</Property>
</Properties>
</Component>
<Component class="javax.swing.JRadioButton" name="timeRadioButton1">
<Properties>
<Property name="text" type="java.lang.String" editor="org.netbeans.modules.i18n.form.FormI18nStringEditor">
<ResourceString bundle="org/sleuthkit/autopsy/keywordsearch/Bundle.properties" key="KeywordSearchGlobalSearchSettingsPanel.timeRadioButton1.text" replaceFormat="org.openide.util.NbBundle.getMessage({sourceFileName}.class, &quot;{key}&quot;)"/>
</Property>
<Property name="toolTipText" type="java.lang.String" editor="org.netbeans.modules.i18n.form.FormI18nStringEditor">
<ResourceString bundle="org/sleuthkit/autopsy/keywordsearch/Bundle.properties" key="KeywordSearchGlobalSearchSettingsPanel.timeRadioButton1.toolTipText" replaceFormat="org.openide.util.NbBundle.getMessage({sourceFileName}.class, &quot;{key}&quot;)"/>
</Property>
</Properties>
<Events>
<EventHandler event="actionPerformed" listener="java.awt.event.ActionListener" parameters="java.awt.event.ActionEvent" handler="timeRadioButton1ActionPerformed"/>
</Events>
</Component>
<Component class="javax.swing.JRadioButton" name="timeRadioButton2">
<Properties>
<Property name="text" type="java.lang.String" editor="org.netbeans.modules.i18n.form.FormI18nStringEditor">
<ResourceString bundle="org/sleuthkit/autopsy/keywordsearch/Bundle.properties" key="KeywordSearchGlobalSearchSettingsPanel.timeRadioButton2.text" replaceFormat="org.openide.util.NbBundle.getMessage({sourceFileName}.class, &quot;{key}&quot;)"/>
</Property>
<Property name="toolTipText" type="java.lang.String" editor="org.netbeans.modules.i18n.form.FormI18nStringEditor">
<ResourceString bundle="org/sleuthkit/autopsy/keywordsearch/Bundle.properties" key="KeywordSearchGlobalSearchSettingsPanel.timeRadioButton2.toolTipText" replaceFormat="org.openide.util.NbBundle.getMessage({sourceFileName}.class, &quot;{key}&quot;)"/>
</Property>
</Properties>
<Events>
<EventHandler event="actionPerformed" listener="java.awt.event.ActionListener" parameters="java.awt.event.ActionEvent" handler="timeRadioButton2ActionPerformed"/>
</Events>
</Component>
<Component class="javax.swing.JRadioButton" name="timeRadioButton3">
<Properties>
<Property name="text" type="java.lang.String" editor="org.netbeans.modules.i18n.form.FormI18nStringEditor">
<ResourceString bundle="org/sleuthkit/autopsy/keywordsearch/Bundle.properties" key="KeywordSearchGlobalSearchSettingsPanel.timeRadioButton3.text" replaceFormat="org.openide.util.NbBundle.getMessage({sourceFileName}.class, &quot;{key}&quot;)"/>
</Property>
<Property name="toolTipText" type="java.lang.String" editor="org.netbeans.modules.i18n.form.FormI18nStringEditor">
<ResourceString bundle="org/sleuthkit/autopsy/keywordsearch/Bundle.properties" key="KeywordSearchGlobalSearchSettingsPanel.timeRadioButton3.toolTipText" replaceFormat="org.openide.util.NbBundle.getMessage({sourceFileName}.class, &quot;{key}&quot;)"/>
</Property>
</Properties>
<Events>
<EventHandler event="actionPerformed" listener="java.awt.event.ActionListener" parameters="java.awt.event.ActionEvent" handler="timeRadioButton3ActionPerformed"/>
</Events>
</Component>
<Component class="javax.swing.JRadioButton" name="timeRadioButton4">
<Properties>
<Property name="text" type="java.lang.String" editor="org.netbeans.modules.i18n.form.FormI18nStringEditor">
<ResourceString bundle="org/sleuthkit/autopsy/keywordsearch/Bundle.properties" key="KeywordSearchGlobalSearchSettingsPanel.timeRadioButton4.text_1" replaceFormat="org.openide.util.NbBundle.getMessage({sourceFileName}.class, &quot;{key}&quot;)"/>
</Property>
<Property name="toolTipText" type="java.lang.String" editor="org.netbeans.modules.i18n.form.FormI18nStringEditor">
<ResourceString bundle="org/sleuthkit/autopsy/keywordsearch/Bundle.properties" key="KeywordSearchGlobalSearchSettingsPanel.timeRadioButton4.toolTipText" replaceFormat="org.openide.util.NbBundle.getMessage({sourceFileName}.class, &quot;{key}&quot;)"/>
</Property>
</Properties>
<Events>
<EventHandler event="actionPerformed" listener="java.awt.event.ActionListener" parameters="java.awt.event.ActionEvent" handler="timeRadioButton4ActionPerformed"/>
</Events>
</Component>
<Component class="javax.swing.JCheckBox" name="showSnippetsCB">
<Properties>
<Property name="text" type="java.lang.String" editor="org.netbeans.modules.i18n.form.FormI18nStringEditor">
@ -253,19 +168,6 @@
<EventHandler event="actionPerformed" listener="java.awt.event.ActionListener" parameters="java.awt.event.ActionEvent" handler="showSnippetsCBActionPerformed"/>
</Events>
</Component>
<Component class="javax.swing.JRadioButton" name="timeRadioButton5">
<Properties>
<Property name="text" type="java.lang.String" editor="org.netbeans.modules.i18n.form.FormI18nStringEditor">
<ResourceString bundle="org/sleuthkit/autopsy/keywordsearch/Bundle.properties" key="KeywordSearchGlobalSearchSettingsPanel.timeRadioButton5.text" replaceFormat="org.openide.util.NbBundle.getMessage({sourceFileName}.class, &quot;{key}&quot;)"/>
</Property>
<Property name="toolTipText" type="java.lang.String" editor="org.netbeans.modules.i18n.form.FormI18nStringEditor">
<ResourceString bundle="org/sleuthkit/autopsy/keywordsearch/Bundle.properties" key="KeywordSearchGlobalSearchSettingsPanel.timeRadioButton5.toolTipText" replaceFormat="org.openide.util.NbBundle.getMessage({sourceFileName}.class, &quot;{key}&quot;)"/>
</Property>
</Properties>
<Events>
<EventHandler event="actionPerformed" listener="java.awt.event.ActionListener" parameters="java.awt.event.ActionEvent" handler="timeRadioButton5ActionPerformed"/>
</Events>
</Component>
<Component class="javax.swing.JLabel" name="ingestWarningLabel">
<Properties>
<Property name="icon" type="javax.swing.Icon" editor="org.netbeans.modules.form.editors2.IconEditor">

View File

@ -1,7 +1,7 @@
/*
* Autopsy Forensic Browser
*
* Copyright 2012-2018 Basis Technology Corp.
* Copyright 2012-2022 Basis Technology Corp.
* Contact: carrier <at> sleuthkit <dot> org
*
* Licensed under the Apache License, Version 2.0 (the "License");
@ -26,9 +26,7 @@ import org.netbeans.spi.options.OptionsPanelController;
import org.openide.util.NbBundle;
import org.sleuthkit.autopsy.corecomponents.OptionsPanel;
import org.sleuthkit.autopsy.coreutils.Logger;
import org.sleuthkit.autopsy.coreutils.PlatformUtil;
import org.sleuthkit.autopsy.ingest.IngestManager;
import org.sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.UpdateFrequency;
/**
* General, not per list, keyword search configuration and status display widget
@ -53,31 +51,6 @@ class KeywordSearchGlobalSearchSettingsPanel extends javax.swing.JPanel implemen
boolean ingestRunning = IngestManager.getInstance().isIngestRunning();
ingestWarningLabel.setVisible(ingestRunning);
skipNSRLCheckBox.setEnabled(!ingestRunning);
setTimeSettingEnabled(!ingestRunning);
final UpdateFrequency curFreq = KeywordSearchSettings.getUpdateFrequency();
switch (curFreq) {
case FAST:
timeRadioButton1.setSelected(true);
break;
case AVG:
timeRadioButton2.setSelected(true);
break;
case SLOW:
timeRadioButton3.setSelected(true);
break;
case SLOWEST:
timeRadioButton4.setSelected(true);
break;
case NONE:
timeRadioButton5.setSelected(true);
break;
case DEFAULT:
default:
// default value
timeRadioButton3.setSelected(true);
break;
}
}
/**
@ -89,7 +62,6 @@ class KeywordSearchGlobalSearchSettingsPanel extends javax.swing.JPanel implemen
// <editor-fold defaultstate="collapsed" desc="Generated Code">//GEN-BEGIN:initComponents
private void initComponents() {
timeGroup = new javax.swing.ButtonGroup();
skipNSRLCheckBox = new javax.swing.JCheckBox();
filesIndexedLabel = new javax.swing.JLabel();
filesIndexedValue = new javax.swing.JLabel();
@ -99,13 +71,7 @@ class KeywordSearchGlobalSearchSettingsPanel extends javax.swing.JPanel implemen
informationLabel = new javax.swing.JLabel();
settingsSeparator = new javax.swing.JSeparator();
informationSeparator = new javax.swing.JSeparator();
frequencyLabel = new javax.swing.JLabel();
timeRadioButton1 = new javax.swing.JRadioButton();
timeRadioButton2 = new javax.swing.JRadioButton();
timeRadioButton3 = new javax.swing.JRadioButton();
timeRadioButton4 = new javax.swing.JRadioButton();
showSnippetsCB = new javax.swing.JCheckBox();
timeRadioButton5 = new javax.swing.JRadioButton();
ingestWarningLabel = new javax.swing.JLabel();
skipNSRLCheckBox.setText(org.openide.util.NbBundle.getMessage(KeywordSearchGlobalSearchSettingsPanel.class, "KeywordSearchGlobalSearchSettingsPanel.skipNSRLCheckBox.text")); // NOI18N
@ -128,40 +94,6 @@ class KeywordSearchGlobalSearchSettingsPanel extends javax.swing.JPanel implemen
informationLabel.setText(org.openide.util.NbBundle.getMessage(KeywordSearchGlobalSearchSettingsPanel.class, "KeywordSearchGlobalSearchSettingsPanel.informationLabel.text")); // NOI18N
frequencyLabel.setText(org.openide.util.NbBundle.getMessage(KeywordSearchGlobalSearchSettingsPanel.class, "KeywordSearchGlobalSearchSettingsPanel.frequencyLabel.text")); // NOI18N
timeRadioButton1.setText(org.openide.util.NbBundle.getMessage(KeywordSearchGlobalSearchSettingsPanel.class, "KeywordSearchGlobalSearchSettingsPanel.timeRadioButton1.text")); // NOI18N
timeRadioButton1.setToolTipText(org.openide.util.NbBundle.getMessage(KeywordSearchGlobalSearchSettingsPanel.class, "KeywordSearchGlobalSearchSettingsPanel.timeRadioButton1.toolTipText")); // NOI18N
timeRadioButton1.addActionListener(new java.awt.event.ActionListener() {
public void actionPerformed(java.awt.event.ActionEvent evt) {
timeRadioButton1ActionPerformed(evt);
}
});
timeRadioButton2.setText(org.openide.util.NbBundle.getMessage(KeywordSearchGlobalSearchSettingsPanel.class, "KeywordSearchGlobalSearchSettingsPanel.timeRadioButton2.text")); // NOI18N
timeRadioButton2.setToolTipText(org.openide.util.NbBundle.getMessage(KeywordSearchGlobalSearchSettingsPanel.class, "KeywordSearchGlobalSearchSettingsPanel.timeRadioButton2.toolTipText")); // NOI18N
timeRadioButton2.addActionListener(new java.awt.event.ActionListener() {
public void actionPerformed(java.awt.event.ActionEvent evt) {
timeRadioButton2ActionPerformed(evt);
}
});
timeRadioButton3.setText(org.openide.util.NbBundle.getMessage(KeywordSearchGlobalSearchSettingsPanel.class, "KeywordSearchGlobalSearchSettingsPanel.timeRadioButton3.text")); // NOI18N
timeRadioButton3.setToolTipText(org.openide.util.NbBundle.getMessage(KeywordSearchGlobalSearchSettingsPanel.class, "KeywordSearchGlobalSearchSettingsPanel.timeRadioButton3.toolTipText")); // NOI18N
timeRadioButton3.addActionListener(new java.awt.event.ActionListener() {
public void actionPerformed(java.awt.event.ActionEvent evt) {
timeRadioButton3ActionPerformed(evt);
}
});
timeRadioButton4.setText(org.openide.util.NbBundle.getMessage(KeywordSearchGlobalSearchSettingsPanel.class, "KeywordSearchGlobalSearchSettingsPanel.timeRadioButton4.text_1")); // NOI18N
timeRadioButton4.setToolTipText(org.openide.util.NbBundle.getMessage(KeywordSearchGlobalSearchSettingsPanel.class, "KeywordSearchGlobalSearchSettingsPanel.timeRadioButton4.toolTipText")); // NOI18N
timeRadioButton4.addActionListener(new java.awt.event.ActionListener() {
public void actionPerformed(java.awt.event.ActionEvent evt) {
timeRadioButton4ActionPerformed(evt);
}
});
showSnippetsCB.setText(org.openide.util.NbBundle.getMessage(KeywordSearchGlobalSearchSettingsPanel.class, "KeywordSearchGlobalSearchSettingsPanel.showSnippetsCB.text")); // NOI18N
showSnippetsCB.addActionListener(new java.awt.event.ActionListener() {
public void actionPerformed(java.awt.event.ActionEvent evt) {
@ -169,14 +101,6 @@ class KeywordSearchGlobalSearchSettingsPanel extends javax.swing.JPanel implemen
}
});
timeRadioButton5.setText(org.openide.util.NbBundle.getMessage(KeywordSearchGlobalSearchSettingsPanel.class, "KeywordSearchGlobalSearchSettingsPanel.timeRadioButton5.text")); // NOI18N
timeRadioButton5.setToolTipText(org.openide.util.NbBundle.getMessage(KeywordSearchGlobalSearchSettingsPanel.class, "KeywordSearchGlobalSearchSettingsPanel.timeRadioButton5.toolTipText")); // NOI18N
timeRadioButton5.addActionListener(new java.awt.event.ActionListener() {
public void actionPerformed(java.awt.event.ActionEvent evt) {
timeRadioButton5ActionPerformed(evt);
}
});
ingestWarningLabel.setIcon(new javax.swing.ImageIcon(getClass().getResource("/org/sleuthkit/autopsy/modules/hashdatabase/warning16.png"))); // NOI18N
ingestWarningLabel.setText(org.openide.util.NbBundle.getMessage(KeywordSearchGlobalSearchSettingsPanel.class, "KeywordSearchGlobalSearchSettingsPanel.ingestWarningLabel.text")); // NOI18N
@ -207,19 +131,11 @@ class KeywordSearchGlobalSearchSettingsPanel extends javax.swing.JPanel implemen
.addComponent(filesIndexedLabel)
.addGap(18, 18, 18)
.addComponent(filesIndexedValue))
.addComponent(frequencyLabel)
.addGroup(layout.createSequentialGroup()
.addComponent(chunksLabel)
.addGap(18, 18, 18)
.addComponent(chunksValLabel))
.addGroup(layout.createSequentialGroup()
.addGap(16, 16, 16)
.addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.LEADING)
.addComponent(timeRadioButton2)
.addComponent(timeRadioButton1)
.addComponent(timeRadioButton3)
.addComponent(timeRadioButton4)
.addComponent(timeRadioButton5))))))
.addComponent(chunksValLabel)))
.addGap(132, 132, 132)))
.addContainerGap(javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE))
.addGroup(layout.createSequentialGroup()
.addComponent(settingsLabel)
@ -241,19 +157,7 @@ class KeywordSearchGlobalSearchSettingsPanel extends javax.swing.JPanel implemen
.addComponent(skipNSRLCheckBox)
.addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
.addComponent(showSnippetsCB)
.addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
.addComponent(frequencyLabel)
.addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
.addComponent(timeRadioButton1)
.addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
.addComponent(timeRadioButton2)
.addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
.addComponent(timeRadioButton3)
.addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
.addComponent(timeRadioButton4)
.addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
.addComponent(timeRadioButton5)
.addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.RELATED)
.addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.UNRELATED)
.addGroup(layout.createParallelGroup(javax.swing.GroupLayout.Alignment.TRAILING)
.addComponent(informationLabel)
.addComponent(informationSeparator, javax.swing.GroupLayout.PREFERRED_SIZE, 7, javax.swing.GroupLayout.PREFERRED_SIZE))
@ -267,14 +171,10 @@ class KeywordSearchGlobalSearchSettingsPanel extends javax.swing.JPanel implemen
.addComponent(chunksValLabel))
.addPreferredGap(javax.swing.LayoutStyle.ComponentPlacement.UNRELATED)
.addComponent(ingestWarningLabel)
.addContainerGap(javax.swing.GroupLayout.DEFAULT_SIZE, Short.MAX_VALUE))
.addContainerGap(151, Short.MAX_VALUE))
);
}// </editor-fold>//GEN-END:initComponents
private void timeRadioButton5ActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_timeRadioButton5ActionPerformed
firePropertyChange(OptionsPanelController.PROP_CHANGED, null, null);
}//GEN-LAST:event_timeRadioButton5ActionPerformed
private void skipNSRLCheckBoxActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_skipNSRLCheckBoxActionPerformed
firePropertyChange(OptionsPanelController.PROP_CHANGED, null, null);
}//GEN-LAST:event_skipNSRLCheckBoxActionPerformed
@ -283,28 +183,11 @@ class KeywordSearchGlobalSearchSettingsPanel extends javax.swing.JPanel implemen
firePropertyChange(OptionsPanelController.PROP_CHANGED, null, null);
}//GEN-LAST:event_showSnippetsCBActionPerformed
private void timeRadioButton1ActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_timeRadioButton1ActionPerformed
firePropertyChange(OptionsPanelController.PROP_CHANGED, null, null);
}//GEN-LAST:event_timeRadioButton1ActionPerformed
private void timeRadioButton2ActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_timeRadioButton2ActionPerformed
firePropertyChange(OptionsPanelController.PROP_CHANGED, null, null);
}//GEN-LAST:event_timeRadioButton2ActionPerformed
private void timeRadioButton3ActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_timeRadioButton3ActionPerformed
firePropertyChange(OptionsPanelController.PROP_CHANGED, null, null);
}//GEN-LAST:event_timeRadioButton3ActionPerformed
private void timeRadioButton4ActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_timeRadioButton4ActionPerformed
firePropertyChange(OptionsPanelController.PROP_CHANGED, null, null);
}//GEN-LAST:event_timeRadioButton4ActionPerformed
// Variables declaration - do not modify//GEN-BEGIN:variables
private javax.swing.JLabel chunksLabel;
private javax.swing.JLabel chunksValLabel;
private javax.swing.JLabel filesIndexedLabel;
private javax.swing.JLabel filesIndexedValue;
private javax.swing.JLabel frequencyLabel;
private javax.swing.JLabel informationLabel;
private javax.swing.JSeparator informationSeparator;
private javax.swing.JLabel ingestWarningLabel;
@ -312,18 +195,11 @@ class KeywordSearchGlobalSearchSettingsPanel extends javax.swing.JPanel implemen
private javax.swing.JSeparator settingsSeparator;
private javax.swing.JCheckBox showSnippetsCB;
private javax.swing.JCheckBox skipNSRLCheckBox;
private javax.swing.ButtonGroup timeGroup;
private javax.swing.JRadioButton timeRadioButton1;
private javax.swing.JRadioButton timeRadioButton2;
private javax.swing.JRadioButton timeRadioButton3;
private javax.swing.JRadioButton timeRadioButton4;
private javax.swing.JRadioButton timeRadioButton5;
// End of variables declaration//GEN-END:variables
@Override
public void store() {
KeywordSearchSettings.setSkipKnown(skipNSRLCheckBox.isSelected());
KeywordSearchSettings.setUpdateFrequency(getSelectedTimeValue());
KeywordSearchSettings.setShowSnippets(showSnippetsCB.isSelected());
}
@ -332,40 +208,10 @@ class KeywordSearchGlobalSearchSettingsPanel extends javax.swing.JPanel implemen
activateWidgets();
}
private void setTimeSettingEnabled(boolean enabled) {
timeRadioButton1.setEnabled(enabled);
timeRadioButton2.setEnabled(enabled);
timeRadioButton3.setEnabled(enabled);
timeRadioButton4.setEnabled(enabled);
timeRadioButton5.setEnabled(enabled);
frequencyLabel.setEnabled(enabled);
}
private UpdateFrequency getSelectedTimeValue() {
if (timeRadioButton1.isSelected()) {
return UpdateFrequency.FAST;
} else if (timeRadioButton2.isSelected()) {
return UpdateFrequency.AVG;
} else if (timeRadioButton3.isSelected()) {
return UpdateFrequency.SLOW;
} else if (timeRadioButton4.isSelected()) {
return UpdateFrequency.SLOWEST;
} else if (timeRadioButton5.isSelected()) {
return UpdateFrequency.NONE;
}
return UpdateFrequency.DEFAULT;
}
@NbBundle.Messages({"KeywordSearchGlobalSearchSettingsPanel.customizeComponents.windowsOCR=Enable Optical Character Recognition (OCR) (Requires Windows 64-bit)",
"KeywordSearchGlobalSearchSettingsPanel.customizeComponents.windowsLimitedOCR=Only process images which are over 100KB in size or extracted from a document. (Beta) (Requires Windows 64-bit)"})
private void customizeComponents() {
timeGroup.add(timeRadioButton1);
timeGroup.add(timeRadioButton2);
timeGroup.add(timeRadioButton3);
timeGroup.add(timeRadioButton4);
timeGroup.add(timeRadioButton5);
this.skipNSRLCheckBox.setSelected(KeywordSearchSettings.getSkipKnown());
try {

View File

@ -1,7 +1,7 @@
/*
* Autopsy Forensic Browser
*
* Copyright 2011-2021 Basis Technology Corp.
* Copyright 2011-2023 Basis Technology Corp.
* Contact: carrier <at> sleuthkit <dot> org
*
* Licensed under the Apache License, Version 2.0 (the "License");
@ -38,6 +38,7 @@ import java.util.concurrent.atomic.AtomicInteger;
import java.util.logging.Level;
import java.util.stream.Collectors;
import org.apache.tika.mime.MimeTypes;
import org.openide.util.Exceptions;
import org.openide.util.Lookup;
import org.openide.util.NbBundle;
import org.openide.util.NbBundle.Messages;
@ -69,6 +70,7 @@ import org.sleuthkit.datamodel.BlackboardAttribute;
import org.sleuthkit.datamodel.TskCoreException;
import org.sleuthkit.datamodel.TskData;
import org.sleuthkit.datamodel.TskData.FileKnown;
import org.sleuthkit.datamodel.TskException;
/**
* An ingest module on a file level Performs indexing of allocated and Solr
@ -169,33 +171,13 @@ public final class KeywordSearchIngestModule implements FileIngestModule {
EXTRACT_UTF8, ///< extract UTF8 text, true/false
};
enum UpdateFrequency {
FAST(20),
AVG(10),
SLOW(5),
SLOWEST(1),
NONE(Integer.MAX_VALUE),
DEFAULT(5);
private final int time;
UpdateFrequency(int time) {
this.time = time;
}
int getTime() {
return time;
}
};
private static final Logger logger = Logger.getLogger(KeywordSearchIngestModule.class.getName());
private final IngestServices services = IngestServices.getInstance();
private Ingester ingester = null;
private Indexer indexer;
private FileTypeDetector fileTypeDetector;
//only search images from current ingest, not images previously ingested/indexed
//accessed read-only by searcher thread
private boolean startedSearching = false;
private Lookup stringsExtractionContext;
private final KeywordSearchJobSettings settings;
private boolean initialized = false;
@ -257,18 +239,21 @@ public final class KeywordSearchIngestModule implements FileIngestModule {
initialized = false;
jobId = context.getJobId();
Server server = KeywordSearch.getServer();
if (server.coreIsOpen() == false) {
throw new IngestModuleException(Bundle.KeywordSearchIngestModule_startUp_noOpenCore_msg());
}
try {
Index indexInfo = server.getIndexInfo();
if (!indexInfo.isCompatible(IndexFinder.getCurrentSchemaVersion())) {
throw new IngestModuleException(Bundle.KeywordSearchIngestModule_startupException_indexSchemaNotSupported(indexInfo.getSchemaVersion()));
Server server = null;
if (settings.isIndexToSolrEnabled()) {
server = KeywordSearch.getServer();
if (server.coreIsOpen() == false) {
throw new IngestModuleException(Bundle.KeywordSearchIngestModule_startUp_noOpenCore_msg());
}
try {
Index indexInfo = server.getIndexInfo();
if (!indexInfo.isCompatible(IndexFinder.getCurrentSchemaVersion())) {
throw new IngestModuleException(Bundle.KeywordSearchIngestModule_startupException_indexSchemaNotSupported(indexInfo.getSchemaVersion()));
}
} catch (NoOpenCoreException ex) {
throw new IngestModuleException(Bundle.KeywordSearchIngestModule_startupMessage_failedToGetIndexSchema(), ex);
}
} catch (NoOpenCoreException ex) {
throw new IngestModuleException(Bundle.KeywordSearchIngestModule_startupMessage_failedToGetIndexSchema(), ex);
}
try {
@ -307,22 +292,24 @@ public final class KeywordSearchIngestModule implements FileIngestModule {
}
} else {
// for single-user cases need to verify connection to local SOLR service
try {
if (!server.isLocalSolrRunning()) {
throw new IngestModuleException(Bundle.KeywordSearchIngestModule_init_tryStopSolrMsg(Bundle.KeywordSearchIngestModule_init_badInitMsg()));
// server will be null if indexing is disabled
if (server != null) {
try {
if (!server.isLocalSolrRunning()) {
throw new IngestModuleException(Bundle.KeywordSearchIngestModule_init_tryStopSolrMsg(Bundle.KeywordSearchIngestModule_init_badInitMsg()));
}
} catch (KeywordSearchModuleException ex) {
//this means Solr is not properly initialized
throw new IngestModuleException(Bundle.KeywordSearchIngestModule_init_tryStopSolrMsg(Bundle.KeywordSearchIngestModule_init_badInitMsg()), ex);
}
try {
// make an actual query to verify that server is responding
// we had cases where getStatus was OK, but the connection resulted in a 404
server.queryNumIndexedDocuments();
} catch (KeywordSearchModuleException | NoOpenCoreException ex) {
throw new IngestModuleException(Bundle.KeywordSearchIngestModule_init_exception_errConnToSolr_msg(ex.getMessage()), ex);
}
} catch (KeywordSearchModuleException ex) {
//this means Solr is not properly initialized
throw new IngestModuleException(Bundle.KeywordSearchIngestModule_init_tryStopSolrMsg(Bundle.KeywordSearchIngestModule_init_badInitMsg()), ex);
}
try {
// make an actual query to verify that server is responding
// we had cases where getStatus was OK, but the connection resulted in a 404
server.queryNumIndexedDocuments();
} catch (KeywordSearchModuleException | NoOpenCoreException ex) {
throw new IngestModuleException(Bundle.KeywordSearchIngestModule_init_exception_errConnToSolr_msg(ex.getMessage()), ex);
}
// check if this job has any searchable keywords
List<KeywordList> keywordLists = XmlKeywordSearchList.getCurrent().getListsL();
boolean hasKeywordsForSearch = false;
@ -347,7 +334,6 @@ public final class KeywordSearchIngestModule implements FileIngestModule {
stringsExtractionContext = Lookups.fixed(stringsConfig);
indexer = new Indexer();
initialized = true;
}
@ -389,7 +375,7 @@ public final class KeywordSearchIngestModule implements FileIngestModule {
if (context.fileIngestIsCancelled()) {
return ProcessResult.OK;
}
indexer.indexFile(extractorOpt, abstractFile, mimeType, false);
searchFile(extractorOpt, abstractFile, mimeType, false);
return ProcessResult.OK;
}
@ -397,17 +383,7 @@ public final class KeywordSearchIngestModule implements FileIngestModule {
if (context.fileIngestIsCancelled()) {
return ProcessResult.OK;
}
indexer.indexFile(extractorOpt, abstractFile, mimeType, true);
// Start searching if it hasn't started already
if (!startedSearching) {
if (context.fileIngestIsCancelled()) {
return ProcessResult.OK;
}
List<String> keywordListNames = settings.getNamesOfEnabledKeyWordLists();
IngestSearchRunner.getInstance().startJob(context, keywordListNames);
startedSearching = true;
}
searchFile(extractorOpt, abstractFile, mimeType, true);
return ProcessResult.OK;
}
@ -425,17 +401,22 @@ public final class KeywordSearchIngestModule implements FileIngestModule {
}
if (context.fileIngestIsCancelled()) {
logger.log(Level.INFO, "Keyword search ingest module instance {0} stopping search job due to ingest cancellation", instanceNum); //NON-NLS
IngestSearchRunner.getInstance().stopJob(jobId);
logger.log(Level.INFO, "Keyword search ingest module instance {0} stopping due to ingest cancellation", instanceNum); //NON-NLS
cleanup();
return;
}
// Remove from the search list and trigger final commit and final search
IngestSearchRunner.getInstance().endJob(jobId);
// We only need to post the summary msg from the last module per job
if (refCounter.decrementAndGet(jobId) == 0) {
try {
InlineSearcher.makeArtifacts(context);
InlineSearcher.cleanup(context);
Ingester.getDefault().commit();
} catch (TskException ex) {
logger.log(Level.SEVERE, String.format("Failed to create search ingest artifacts for job %d", context.getJobId()), ex);
}
try {
final int numIndexedFiles = KeywordSearch.getServer().queryNumIndexedFiles();
logger.log(Level.INFO, "Indexed files count: {0}", numIndexedFiles); //NON-NLS
@ -478,7 +459,7 @@ public final class KeywordSearchIngestModule implements FileIngestModule {
if (mimeType.startsWith(IMAGE_MIME_TYPE_PREFIX)) {
return aFile.getSize() > LIMITED_OCR_SIZE_MIN
|| aFile.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.DERIVED;
|| aFile.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.DERIVED;
}
return false;
@ -562,317 +543,319 @@ public final class KeywordSearchIngestModule implements FileIngestModule {
* File indexer, processes and indexes known/allocated files,
* unknown/unallocated files and directories accordingly
*/
private class Indexer {
private final Logger logger = Logger.getLogger(Indexer.class.getName());
/**
* Extract text with Tika or other text extraction modules (by
* streaming) from the file Divide the file into chunks and index the
* chunks
*
* @param extractorOptional The textExtractor to use with this file or
* empty.
* @param aFile file to extract strings from, divide into
* chunks and index
* @param extractedMetadata Map that will be populated with the file's
* metadata.
*
* @return true if the file was text_ingested, false otherwise
*
* @throws IngesterException exception thrown if indexing failed
*/
private boolean extractTextAndIndex(Optional<TextExtractor> extractorOptional, AbstractFile aFile,
/**
* Extract text with Tika or other text extraction modules (by streaming)
* from the file Divide the file into chunks and index the chunks
*
* @param extractorOptional The textExtractor to use with this file or
* empty.
* @param aFile file to extract strings from, divide into chunks
* and index
* @param extractedMetadata Map that will be populated with the file's
* metadata.
*
* @return true if the file was text_ingested, false otherwise
*
* @throws IngesterException exception thrown if indexing failed
*/
private boolean extractTextAndSearch(Optional<TextExtractor> extractorOptional, AbstractFile aFile,
Map<String, String> extractedMetadata) throws IngesterException {
try {
if (!extractorOptional.isPresent()) {
return false;
}
TextExtractor extractor = extractorOptional.get();
Reader fileText = extractor.getReader();
Reader finalReader;
try {
Map<String, String> metadata = extractor.getMetadata();
if (!metadata.isEmpty()) {
// Creating the metadata artifact here causes occasional problems
// when indexing the text, so we save the metadata map to
// use after this method is complete.
extractedMetadata.putAll(metadata);
}
CharSource formattedMetadata = getMetaDataCharSource(metadata);
//Append the metadata to end of the file text
finalReader = CharSource.concat(new CharSource() {
//Wrap fileText reader for concatenation
@Override
public Reader openStream() throws IOException {
return fileText;
}
}, formattedMetadata).openStream();
} catch (IOException ex) {
logger.log(Level.WARNING, String.format("Could not format extracted metadata for file %s [id=%d]",
try {
if (!extractorOptional.isPresent()) {
return false;
}
//divide into chunks and index
Ingester.getDefault().search(getTikaOrTextExtractor(extractorOptional, aFile, extractedMetadata), aFile.getId(), aFile.getName(), aFile, context, true,settings.isIndexToSolrEnabled(), settings.getNamesOfEnabledKeyWordLists());
} catch (TextExtractor.InitReaderException ex) {
return false;
} catch(Exception ex) {
logger.log(Level.WARNING, String.format("Failed to search file %s [id=%d]",
aFile.getName(), aFile.getId()), ex);
//Just send file text.
finalReader = fileText;
}
//divide into chunks and index
return Ingester.getDefault().indexText(finalReader, aFile.getId(), aFile.getName(), aFile, context);
} catch (TextExtractor.InitReaderException ex) {
// Text extractor could not be initialized. No text will be extracted.
return false;
}
return false;
}
private void createMetadataArtifact(AbstractFile aFile, Map<String, String> metadata) {
return true;
}
String moduleName = KeywordSearchIngestModule.class.getName();
private Reader getTikaOrTextExtractor(Optional<TextExtractor> extractorOptional, AbstractFile aFile,
Map<String, String> extractedMetadata) throws TextExtractor.InitReaderException {
Collection<BlackboardAttribute> attributes = new ArrayList<>();
Collection<BlackboardArtifact> bbartifacts = new ArrayList<>();
for (Map.Entry<String, String> entry : metadata.entrySet()) {
if (METADATA_TYPES_MAP.containsKey(entry.getKey())) {
BlackboardAttribute bba = checkAttribute(entry.getKey(), entry.getValue());
if (bba != null) {
attributes.add(bba);
}
}
}
if (!attributes.isEmpty()) {
try {
BlackboardArtifact bbart = aFile.newDataArtifact(new BlackboardArtifact.Type(BlackboardArtifact.ARTIFACT_TYPE.TSK_METADATA), attributes);
bbartifacts.add(bbart);
} catch (TskCoreException ex) {
// Log error and return to continue processing
logger.log(Level.WARNING, String.format("Error creating or adding metadata artifact for file %s.", aFile.getParentPath() + aFile.getName()), ex); //NON-NLS
return;
}
if (!bbartifacts.isEmpty()) {
try {
Case.getCurrentCaseThrows().getSleuthkitCase().getBlackboard().postArtifacts(bbartifacts, moduleName, jobId);
} catch (NoCurrentCaseException | Blackboard.BlackboardException ex) {
// Log error and return to continue processing
logger.log(Level.WARNING, String.format("Unable to post blackboard artifacts for file $s.", aFile.getParentPath() + aFile.getName()), ex); //NON-NLS
return;
}
}
}
}
private BlackboardAttribute checkAttribute(String key, String value) {
String moduleName = KeywordSearchIngestModule.class.getName();
if (!value.isEmpty() && value.charAt(0) != ' ') {
if (METADATA_DATE_TYPES.contains(key)) {
SimpleDateFormat metadataDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", US);
Long metadataDateTime = Long.valueOf(0);
try {
String metadataDate = value.replaceAll("T", " ").replaceAll("Z", "");
Date usedDate = metadataDateFormat.parse(metadataDate);
metadataDateTime = usedDate.getTime() / 1000;
return new BlackboardAttribute(METADATA_TYPES_MAP.get(key), moduleName, metadataDateTime);
} catch (ParseException ex) {
// catching error and displaying date that could not be parsed then will continue on.
logger.log(Level.WARNING, String.format("Failed to parse date/time %s for metadata attribute %s.", value, key), ex); //NON-NLS
return null;
}
} else {
return new BlackboardAttribute(METADATA_TYPES_MAP.get(key), moduleName, value);
}
}
return null;
}
/**
* Pretty print the text extractor metadata.
*
* @param metadata The Metadata map to wrap as a CharSource
*
* @return A CharSource for the given Metadata
*/
@NbBundle.Messages({
"KeywordSearchIngestModule.metadataTitle=METADATA"
})
private CharSource getMetaDataCharSource(Map<String, String> metadata) {
return CharSource.wrap(new StringBuilder(
String.format("\n\n------------------------------%s------------------------------\n\n",
Bundle.KeywordSearchIngestModule_metadataTitle()))
.append(metadata.entrySet().stream().sorted(Map.Entry.comparingByKey())
.map(entry -> entry.getKey() + ": " + entry.getValue())
.collect(Collectors.joining("\n"))
));
}
/**
* Extract strings using heuristics from the file and add to index.
*
* @param aFile file to extract strings from, divide into chunks and
* index
*
* @return true if the file was text_ingested, false otherwise
*/
private boolean extractStringsAndIndex(AbstractFile aFile) {
TextExtractor extractor = extractorOptional.get();
Reader fileText = extractor.getReader();
Reader finalReader;
try {
if (context.fileIngestIsCancelled()) {
return true;
Map<String, String> metadata = extractor.getMetadata();
if (!metadata.isEmpty()) {
// Creating the metadata artifact here causes occasional problems
// when indexing the text, so we save the metadata map to
// use after this method is complete.
extractedMetadata.putAll(metadata);
}
TextExtractor stringsExtractor = TextExtractorFactory.getStringsExtractor(aFile, stringsExtractionContext);
Reader extractedTextReader = stringsExtractor.getReader();
if (Ingester.getDefault().indexStrings(extractedTextReader, aFile.getId(), aFile.getName(), aFile, KeywordSearchIngestModule.this.context)) {
putIngestStatus(jobId, aFile.getId(), IngestStatus.STRINGS_INGESTED);
return true;
} else {
logger.log(Level.WARNING, "Failed to extract strings and ingest, file ''{0}'' (id: {1}).", new Object[]{aFile.getName(), aFile.getId()}); //NON-NLS
putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_TEXTEXTRACT);
return false;
CharSource formattedMetadata = getMetaDataCharSource(metadata);
//Append the metadata to end of the file text
finalReader = CharSource.concat(new CharSource() {
//Wrap fileText reader for concatenation
@Override
public Reader openStream() throws IOException {
return fileText;
}
}, formattedMetadata).openStream();
} catch (IOException ex) {
logger.log(Level.WARNING, String.format("Could not format extracted metadata for file %s [id=%d]",
aFile.getName(), aFile.getId()), ex);
//Just send file text.
finalReader = fileText;
}
//divide into chunks and index
return finalReader;
}
private void createMetadataArtifact(AbstractFile aFile, Map<String, String> metadata) {
String moduleName = KeywordSearchIngestModule.class.getName();
Collection<BlackboardAttribute> attributes = new ArrayList<>();
Collection<BlackboardArtifact> bbartifacts = new ArrayList<>();
for (Map.Entry<String, String> entry : metadata.entrySet()) {
if (METADATA_TYPES_MAP.containsKey(entry.getKey())) {
BlackboardAttribute bba = checkAttribute(entry.getKey(), entry.getValue());
if (bba != null) {
attributes.add(bba);
}
} catch (IngesterException | TextExtractor.InitReaderException ex) {
logger.log(Level.WARNING, "Failed to extract strings and ingest, file '" + aFile.getName() + "' (id: " + aFile.getId() + ").", ex); //NON-NLS
putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING);
return false;
}
}
if (!attributes.isEmpty()) {
try {
BlackboardArtifact bbart = aFile.newDataArtifact(new BlackboardArtifact.Type(BlackboardArtifact.ARTIFACT_TYPE.TSK_METADATA), attributes);
bbartifacts.add(bbart);
} catch (TskCoreException ex) {
// Log error and return to continue processing
logger.log(Level.WARNING, String.format("Error creating or adding metadata artifact for file %s.", aFile.getParentPath() + aFile.getName()), ex); //NON-NLS
return;
}
if (!bbartifacts.isEmpty()) {
try {
Case.getCurrentCaseThrows().getSleuthkitCase().getBlackboard().postArtifacts(bbartifacts, moduleName, jobId);
} catch (NoCurrentCaseException | Blackboard.BlackboardException ex) {
// Log error and return to continue processing
logger.log(Level.WARNING, String.format("Unable to post blackboard artifacts for file $s.", aFile.getParentPath() + aFile.getName()), ex); //NON-NLS
return;
}
}
}
}
private BlackboardAttribute checkAttribute(String key, String value) {
String moduleName = KeywordSearchIngestModule.class.getName();
if (!value.isEmpty() && value.charAt(0) != ' ') {
if (METADATA_DATE_TYPES.contains(key)) {
SimpleDateFormat metadataDateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", US);
Long metadataDateTime = Long.valueOf(0);
try {
String metadataDate = value.replaceAll("T", " ").replaceAll("Z", "");
Date usedDate = metadataDateFormat.parse(metadataDate);
metadataDateTime = usedDate.getTime() / 1000;
return new BlackboardAttribute(METADATA_TYPES_MAP.get(key), moduleName, metadataDateTime);
} catch (ParseException ex) {
// catching error and displaying date that could not be parsed then will continue on.
logger.log(Level.WARNING, String.format("Failed to parse date/time %s for metadata attribute %s.", value, key), ex); //NON-NLS
return null;
}
} else {
return new BlackboardAttribute(METADATA_TYPES_MAP.get(key), moduleName, value);
}
}
return null;
}
/**
* Pretty print the text extractor metadata.
*
* @param metadata The Metadata map to wrap as a CharSource
*
* @return A CharSource for the given Metadata
*/
@NbBundle.Messages({
"KeywordSearchIngestModule.metadataTitle=METADATA"
})
private CharSource getMetaDataCharSource(Map<String, String> metadata) {
return CharSource.wrap(new StringBuilder(
String.format("\n\n------------------------------%s------------------------------\n\n",
Bundle.KeywordSearchIngestModule_metadataTitle()))
.append(metadata.entrySet().stream().sorted(Map.Entry.comparingByKey())
.map(entry -> entry.getKey() + ": " + entry.getValue())
.collect(Collectors.joining("\n"))
));
}
/**
* Extract strings using heuristics from the file and add to index.
*
* @param aFile file to extract strings from, divide into chunks and index
*
* @return true if the file was text_ingested, false otherwise
*/
private boolean extractStringsAndIndex(AbstractFile aFile) {
try {
if (context.fileIngestIsCancelled()) {
return true;
}
Reader extractedTextReader = KeywordSearchUtil.getReader(aFile, stringsExtractionContext);
Ingester.getDefault().search(extractedTextReader, aFile.getId(), aFile.getName(), aFile, KeywordSearchIngestModule.this.context, false, settings.isIndexToSolrEnabled(), settings.getNamesOfEnabledKeyWordLists());
putIngestStatus(jobId, aFile.getId(), IngestStatus.STRINGS_INGESTED);
} catch (Exception ex) {
logger.log(Level.WARNING, "Failed to extract strings and ingest, file '" + aFile.getName() + "' (id: " + aFile.getId() + ").", ex); //NON-NLS
putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING);
return false;
}
return true;
}
/**
* Adds the file to the index. Detects file type, calls extractors, etc.
*
* @param extractor The textExtractor to use with this file or empty if
* no extractor found.
* @param aFile File to analyze.
* @param mimeType The file mime type.
* @param indexContent False if only metadata should be text_ingested. True
* if content and metadata should be index.
*/
private void searchFile(Optional<TextExtractor> extractor, AbstractFile aFile, String mimeType, boolean indexContent) {
//logger.log(Level.INFO, "Processing AbstractFile: " + abstractFile.getName());
TskData.TSK_DB_FILES_TYPE_ENUM aType = aFile.getType();
/**
* Adds the file to the index. Detects file type, calls extractors, etc.
*
* @param extractor The textExtractor to use with this file or empty
* if no extractor found.
* @param aFile File to analyze.
* @param mimeType The file mime type.
* @param indexContent False if only metadata should be text_ingested.
* True if content and metadata should be index.
* Extract unicode strings from unallocated and unused blocks and carved
* text files. The reason for performing string extraction on these is
* because they all may contain multiple encodings which can cause text
* to be missed by the more specialized text extractors used below.
*/
private void indexFile(Optional<TextExtractor> extractor, AbstractFile aFile, String mimeType, boolean indexContent) {
//logger.log(Level.INFO, "Processing AbstractFile: " + abstractFile.getName());
TskData.TSK_DB_FILES_TYPE_ENUM aType = aFile.getType();
/**
* Extract unicode strings from unallocated and unused blocks and
* carved text files. The reason for performing string extraction on
* these is because they all may contain multiple encodings which
* can cause text to be missed by the more specialized text
* extractors used below.
*/
if ((aType.equals(TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS)
|| aType.equals(TskData.TSK_DB_FILES_TYPE_ENUM.UNUSED_BLOCKS))
|| (aType.equals(TskData.TSK_DB_FILES_TYPE_ENUM.CARVED) && aFile.getNameExtension().equalsIgnoreCase("txt"))) {
if (context.fileIngestIsCancelled()) {
return;
}
extractStringsAndIndex(aFile);
return;
}
final long size = aFile.getSize();
//if not to index content, or a dir, or 0 content, index meta data only
if ((indexContent == false || aFile.isDir() || size == 0)) {
try {
if (context.fileIngestIsCancelled()) {
return;
}
ingester.indexMetaDataOnly(aFile);
putIngestStatus(jobId, aFile.getId(), IngestStatus.METADATA_INGESTED);
} catch (IngesterException ex) {
putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING);
logger.log(Level.WARNING, "Unable to index meta-data for file: " + aFile.getId(), ex); //NON-NLS
}
return;
}
if ((aType.equals(TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS)
|| aType.equals(TskData.TSK_DB_FILES_TYPE_ENUM.UNUSED_BLOCKS))
|| (aType.equals(TskData.TSK_DB_FILES_TYPE_ENUM.CARVED) && aFile.getNameExtension().equalsIgnoreCase("txt"))) {
if (context.fileIngestIsCancelled()) {
return;
}
extractStringsAndIndex(aFile);
return;
}
// we skip archive formats that are opened by the archive module.
// @@@ We could have a check here to see if the archive module was enabled though...
if (ARCHIVE_MIME_TYPES.contains(mimeType)) {
try {
if (context.fileIngestIsCancelled()) {
return;
}
ingester.indexMetaDataOnly(aFile);
putIngestStatus(jobId, aFile.getId(), IngestStatus.METADATA_INGESTED);
} catch (IngesterException ex) {
putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING);
logger.log(Level.WARNING, "Unable to index meta-data for file: " + aFile.getId(), ex); //NON-NLS
}
return;
}
final long size = aFile.getSize();
//if not to index content, or a dir, or 0 content, index meta data only
boolean wasTextAdded = false;
Map<String, String> extractedMetadata = new HashMap<>();
//extract text with one of the extractors, divide into chunks and index with Solr
if ((indexContent == false || aFile.isDir() || size == 0)) {
try {
//logger.log(Level.INFO, "indexing: " + aFile.getName());
if (context.fileIngestIsCancelled()) {
return;
}
if (MimeTypes.OCTET_STREAM.equals(mimeType)) {
extractStringsAndIndex(aFile);
ingester.indexMetaDataOnly(aFile);
putIngestStatus(jobId, aFile.getId(), IngestStatus.METADATA_INGESTED);
} catch (IngesterException ex) {
putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING);
logger.log(Level.WARNING, "Unable to index meta-data for file: " + aFile.getId(), ex); //NON-NLS
}
return;
}
if (context.fileIngestIsCancelled()) {
return;
}
// we skip archive formats that are opened by the archive module.
// @@@ We could have a check here to see if the archive module was enabled though...
if (ARCHIVE_MIME_TYPES.contains(mimeType)) {
try {
if (context.fileIngestIsCancelled()) {
return;
}
if (!extractTextAndIndex(extractor, aFile, extractedMetadata)) {
// Text extractor not found for file. Extract string only.
putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_TEXTEXTRACT);
} else {
putIngestStatus(jobId, aFile.getId(), IngestStatus.TEXT_INGESTED);
wasTextAdded = true;
}
} catch (IngesterException e) {
logger.log(Level.INFO, "Could not extract text with Tika, " + aFile.getId() + ", " //NON-NLS
+ aFile.getName(), e);
ingester.indexMetaDataOnly(aFile);
putIngestStatus(jobId, aFile.getId(), IngestStatus.METADATA_INGESTED);
} catch (IngesterException ex) {
putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING);
} catch (Exception e) {
logger.log(Level.WARNING, "Error extracting text with Tika, " + aFile.getId() + ", " //NON-NLS
+ aFile.getName(), e);
putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_TEXTEXTRACT);
}
if ((wasTextAdded == false) && (aFile.getNameExtension().equalsIgnoreCase("txt") && !(aFile.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.CARVED)))) {
//Carved Files should be the only type of unallocated files capable of a txt extension and
//should be ignored by the TextFileExtractor because they may contain more than one text encoding
wasTextAdded = indexTextFile(aFile);
}
// if it wasn't supported or had an error, default to strings
if (wasTextAdded == false) {
extractStringsAndIndex(aFile);
}
// Now that the indexing is complete, create the metadata artifact (if applicable).
// It is unclear why calling this from extractTextAndIndex() generates
// errors.
if (!extractedMetadata.isEmpty()) {
createMetadataArtifact(aFile, extractedMetadata);
logger.log(Level.WARNING, "Unable to index meta-data for file: " + aFile.getId(), ex); //NON-NLS
}
return;
}
/**
* Adds the text file to the index given an encoding. Returns true if
* indexing was successful and false otherwise.
*
* @param aFile Text file to analyze
*/
private boolean indexTextFile(AbstractFile aFile) {
try {
TextFileExtractor textFileExtractor = new TextFileExtractor(aFile);
Reader textReader = textFileExtractor.getReader();
if (textReader == null) {
logger.log(Level.INFO, "Unable to extract with TextFileExtractor, Reader was null for file: {0}", aFile.getName());
} else if (Ingester.getDefault().indexText(textReader, aFile.getId(), aFile.getName(), aFile, context)) {
textReader.close();
putIngestStatus(jobId, aFile.getId(), IngestStatus.TEXT_INGESTED);
return true;
}
} catch (IngesterException | IOException | TextExtractor.InitReaderException ex) {
logger.log(Level.WARNING, "Unable to index " + aFile.getName(), ex);
boolean wasTextAdded = false;
Map<String, String> extractedMetadata = new HashMap<>();
//extract text with one of the extractors, divide into chunks and index with Solr
try {
//logger.log(Level.INFO, "indexing: " + aFile.getName());
if (context.fileIngestIsCancelled()) {
return;
}
return false;
if (MimeTypes.OCTET_STREAM.equals(mimeType)) {
extractStringsAndIndex(aFile);
return;
}
if (!extractTextAndSearch(extractor, aFile, extractedMetadata)) {
// Text extractor not found for file. Extract string only.
putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_TEXTEXTRACT);
} else {
putIngestStatus(jobId, aFile.getId(), IngestStatus.TEXT_INGESTED);
wasTextAdded = true;
}
} catch (IngesterException e) {
logger.log(Level.INFO, "Could not extract text with Tika, " + aFile.getId() + ", " //NON-NLS
+ aFile.getName(), e);
putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING);
} catch (Exception e) {
logger.log(Level.WARNING, "Error extracting text with Tika, " + aFile.getId() + ", " //NON-NLS
+ aFile.getName(), e);
putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_TEXTEXTRACT);
}
if ((wasTextAdded == false) && (aFile.getNameExtension().equalsIgnoreCase("txt") && !(aFile.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.CARVED)))) {
//Carved Files should be the only type of unallocated files capable of a txt extension and
//should be ignored by the TextFileExtractor because they may contain more than one text encoding
wasTextAdded = searchTextFile(aFile);
}
// if it wasn't supported or had an error, default to strings
if (wasTextAdded == false) {
extractStringsAndIndex(aFile);
}
// Now that the indexing is complete, create the metadata artifact (if applicable).
// It is unclear why calling this from extractTextAndIndex() generates
// errors.
if (!extractedMetadata.isEmpty()) {
createMetadataArtifact(aFile, extractedMetadata);
}
}
/**
* Adds the text file to the index given an encoding. Returns true if
* indexing was successful and false otherwise.
*
* @param aFile Text file to analyze
*/
private boolean searchTextFile(AbstractFile aFile) {
try {
TextFileExtractor textFileExtractor = new TextFileExtractor(aFile);
Reader textReader = textFileExtractor.getReader();
if (textReader == null) {
logger.log(Level.INFO, "Unable to extract with TextFileExtractor, Reader was null for file: {0}", aFile.getName());
} else {
Ingester.getDefault().search(textReader, aFile.getId(), aFile.getName(), aFile, context, true, settings.isIndexToSolrEnabled(), settings.getNamesOfEnabledKeyWordLists());
textReader.close();
putIngestStatus(jobId, aFile.getId(), IngestStatus.TEXT_INGESTED);
return true;
}
} catch (Exception ex) {
logger.log(Level.WARNING, "Unable to index " + aFile.getName(), ex);
}
return false;
}
}

View File

@ -30,7 +30,7 @@ public final class KeywordSearchJobSettings implements IngestModuleIngestJobSett
private static final long serialVersionUID = 1L;
private HashSet<String> namesOfEnabledKeywordLists;
private final HashSet<String> namesOfEnabledKeywordLists;
private HashSet<String> namesOfDisabledKeywordLists; // Added in version 1.1
/**
@ -42,6 +42,8 @@ public final class KeywordSearchJobSettings implements IngestModuleIngestJobSett
private boolean ocrOnly;
private boolean indexToSolr;
/**
* Constructs ingest job settings for the keywords search module.
*
@ -55,6 +57,7 @@ public final class KeywordSearchJobSettings implements IngestModuleIngestJobSett
this.ocrEnabled = null;
this.limitedOCREnabled = null;
this.ocrOnly = false;
this.indexToSolr = true;
}
/**
@ -69,12 +72,13 @@ public final class KeywordSearchJobSettings implements IngestModuleIngestJobSett
* @param ocrOnly True if keyword search ingest should
* be solely limited to OCR.
*/
KeywordSearchJobSettings(List<String> namesOfEnabledKeywordLists, List<String> namesOfDisabledKeywordLists, boolean ocrEnabled, boolean limitedOCREnabled, boolean ocrOnly) {
KeywordSearchJobSettings(List<String> namesOfEnabledKeywordLists, List<String> namesOfDisabledKeywordLists, boolean ocrEnabled, boolean limitedOCREnabled, boolean ocrOnly, boolean indexToSolr) {
this.namesOfEnabledKeywordLists = new HashSet<>(namesOfEnabledKeywordLists);
this.namesOfDisabledKeywordLists = new HashSet<>(namesOfDisabledKeywordLists);
this.ocrEnabled = ocrEnabled;
this.limitedOCREnabled = limitedOCREnabled;
this.ocrOnly = ocrOnly;
this.indexToSolr = indexToSolr;
}
/**
@ -197,4 +201,12 @@ public final class KeywordSearchJobSettings implements IngestModuleIngestJobSett
}
}
boolean isIndexToSolrEnabled() {
return indexToSolr;
}
void setIndexToSolrEnabled(boolean enabled){
indexToSolr = enabled;
}
}

View File

@ -16,12 +16,10 @@
<AuxValue name="FormSettings_listenerGenerationStyle" type="java.lang.Integer" value="0"/>
<AuxValue name="FormSettings_variablesLocal" type="java.lang.Boolean" value="false"/>
<AuxValue name="FormSettings_variablesModifier" type="java.lang.Integer" value="2"/>
<AuxValue name="designerSize" type="java.awt.Dimension" value="-84,-19,0,5,115,114,0,18,106,97,118,97,46,97,119,116,46,68,105,109,101,110,115,105,111,110,65,-114,-39,-41,-84,95,68,20,2,0,2,73,0,6,104,101,105,103,104,116,73,0,5,119,105,100,116,104,120,112,0,0,1,108,0,0,1,71"/>
<AuxValue name="designerSize" type="java.awt.Dimension" value="-84,-19,0,5,115,114,0,18,106,97,118,97,46,97,119,116,46,68,105,109,101,110,115,105,111,110,65,-114,-39,-41,-84,95,68,20,2,0,2,73,0,6,104,101,105,103,104,116,73,0,5,119,105,100,116,104,120,112,0,0,0,-56,0,0,1,73"/>
</AuxValues>
<Layout class="org.netbeans.modules.form.compat2.layouts.DesignBoxLayout">
<Property name="axis" type="int" value="1"/>
</Layout>
<Layout class="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout"/>
<SubComponents>
<Component class="javax.swing.JLabel" name="titleLabel">
<Properties>
@ -51,6 +49,11 @@
<AuxValues>
<AuxValue name="autoScrollPane" type="java.lang.Boolean" value="true"/>
</AuxValues>
<Constraints>
<Constraint layoutClass="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout" value="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout$GridBagConstraintsDescription">
<GridBagConstraints gridX="0" gridY="1" gridWidth="7" gridHeight="1" fill="1" ipadX="284" ipadY="-71" insetsTop="6" insetsLeft="10" insetsBottom="0" insetsRight="0" anchor="18" weightX="1.0" weightY="1.0"/>
</Constraint>
</Constraints>
<Layout class="org.netbeans.modules.form.compat2.layouts.support.JScrollPaneSupportLayout"/>
<SubComponents>
@ -79,6 +82,21 @@
</Component>
</SubComponents>
</Container>
<<<<<<< HEAD
=======
<Component class="javax.swing.JLabel" name="titleLabel">
<Properties>
<Property name="text" type="java.lang.String" editor="org.netbeans.modules.i18n.form.FormI18nStringEditor">
<ResourceString bundle="org/sleuthkit/autopsy/keywordsearch/Bundle.properties" key="KeywordSearchJobSettingsPanel.titleLabel.text" replaceFormat="org.openide.util.NbBundle.getMessage({sourceFileName}.class, &quot;{key}&quot;)"/>
</Property>
</Properties>
<Constraints>
<Constraint layoutClass="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout" value="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout$GridBagConstraintsDescription">
<GridBagConstraints gridX="0" gridY="0" gridWidth="2" gridHeight="1" fill="0" ipadX="0" ipadY="0" insetsTop="7" insetsLeft="10" insetsBottom="0" insetsRight="0" anchor="18" weightX="0.0" weightY="0.0"/>
</Constraint>
</Constraints>
</Component>
>>>>>>> aae905c78496f2405c2af41a79e6fbb3a4c00e1c
<Component class="javax.swing.JLabel" name="languagesLabel">
<Properties>
<Property name="text" type="java.lang.String" editor="org.netbeans.modules.i18n.form.FormI18nStringEditor">
@ -92,6 +110,11 @@
</Property>
<Property name="verticalTextPosition" type="int" value="3"/>
</Properties>
<Constraints>
<Constraint layoutClass="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout" value="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout$GridBagConstraintsDescription">
<GridBagConstraints gridX="0" gridY="2" gridWidth="8" gridHeight="1" fill="0" ipadX="25" ipadY="-22" insetsTop="6" insetsLeft="10" insetsBottom="0" insetsRight="0" anchor="18" weightX="0.0" weightY="0.0"/>
</Constraint>
</Constraints>
</Component>
<Component class="javax.swing.JLabel" name="languagesValLabel">
<Properties>
@ -107,6 +130,11 @@
</Border>
</Property>
</Properties>
<Constraints>
<Constraint layoutClass="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout" value="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout$GridBagConstraintsDescription">
<GridBagConstraints gridX="0" gridY="3" gridWidth="6" gridHeight="1" fill="0" ipadX="270" ipadY="0" insetsTop="6" insetsLeft="20" insetsBottom="0" insetsRight="0" anchor="18" weightX="0.0" weightY="0.0"/>
</Constraint>
</Constraints>
</Component>
<Component class="javax.swing.JLabel" name="encodingsLabel">
<Properties>
@ -119,6 +147,11 @@
</Border>
</Property>
</Properties>
<Constraints>
<Constraint layoutClass="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout" value="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout$GridBagConstraintsDescription">
<GridBagConstraints gridX="0" gridY="4" gridWidth="1" gridHeight="1" fill="0" ipadX="0" ipadY="0" insetsTop="11" insetsLeft="10" insetsBottom="0" insetsRight="0" anchor="18" weightX="0.0" weightY="0.0"/>
</Constraint>
</Constraints>
</Component>
<Component class="javax.swing.JLabel" name="keywordSearchEncodings">
<Properties>
@ -131,6 +164,11 @@
</Border>
</Property>
</Properties>
<Constraints>
<Constraint layoutClass="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout" value="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout$GridBagConstraintsDescription">
<GridBagConstraints gridX="1" gridY="4" gridWidth="1" gridHeight="1" fill="0" ipadX="0" ipadY="0" insetsTop="11" insetsLeft="10" insetsBottom="0" insetsRight="0" anchor="18" weightX="0.0" weightY="0.0"/>
</Constraint>
</Constraints>
</Component>
<Component class="javax.swing.JCheckBox" name="ocrCheckBox">
<Properties>
@ -141,6 +179,11 @@
<Events>
<EventHandler event="actionPerformed" listener="java.awt.event.ActionListener" parameters="java.awt.event.ActionEvent" handler="ocrCheckBoxActionPerformed"/>
</Events>
<Constraints>
<Constraint layoutClass="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout" value="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout$GridBagConstraintsDescription">
<GridBagConstraints gridX="0" gridY="5" gridWidth="2" gridHeight="1" fill="0" ipadX="0" ipadY="0" insetsTop="7" insetsLeft="10" insetsBottom="0" insetsRight="0" anchor="18" weightX="0.0" weightY="0.0"/>
</Constraint>
</Constraints>
</Component>
<Component class="javax.swing.JCheckBox" name="limitedOcrCheckbox">
<Properties>
@ -157,6 +200,11 @@
<Events>
<EventHandler event="actionPerformed" listener="java.awt.event.ActionListener" parameters="java.awt.event.ActionEvent" handler="limitedOcrCheckboxActionPerformed"/>
</Events>
<Constraints>
<Constraint layoutClass="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout" value="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout$GridBagConstraintsDescription">
<GridBagConstraints gridX="0" gridY="7" gridWidth="2" gridHeight="1" fill="0" ipadX="216" ipadY="0" insetsTop="0" insetsLeft="31" insetsBottom="0" insetsRight="0" anchor="18" weightX="0.0" weightY="0.0"/>
</Constraint>
</Constraints>
</Component>
<Component class="javax.swing.JCheckBox" name="ocrOnlyCheckbox">
<Properties>
@ -172,6 +220,24 @@
<Events>
<EventHandler event="actionPerformed" listener="java.awt.event.ActionListener" parameters="java.awt.event.ActionEvent" handler="ocrOnlyCheckboxActionPerformed"/>
</Events>
<Constraints>
<Constraint layoutClass="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout" value="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout$GridBagConstraintsDescription">
<GridBagConstraints gridX="0" gridY="6" gridWidth="2" gridHeight="1" fill="0" ipadX="0" ipadY="0" insetsTop="0" insetsLeft="31" insetsBottom="0" insetsRight="0" anchor="18" weightX="0.0" weightY="0.0"/>
</Constraint>
</Constraints>
</Component>
<Component class="javax.swing.JCheckBox" name="solrCheckbox">
<Properties>
<Property name="selected" type="boolean" value="true"/>
<Property name="text" type="java.lang.String" editor="org.netbeans.modules.i18n.form.FormI18nStringEditor">
<ResourceString bundle="org/sleuthkit/autopsy/keywordsearch/Bundle.properties" key="KeywordSearchJobSettingsPanel.solrCheckbox.text" replaceFormat="org.openide.util.NbBundle.getMessage({sourceFileName}.class, &quot;{key}&quot;)"/>
</Property>
</Properties>
<Constraints>
<Constraint layoutClass="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout" value="org.netbeans.modules.form.compat2.layouts.DesignGridBagLayout$GridBagConstraintsDescription">
<GridBagConstraints gridX="0" gridY="8" gridWidth="2" gridHeight="1" fill="0" ipadX="0" ipadY="0" insetsTop="7" insetsLeft="10" insetsBottom="0" insetsRight="0" anchor="18" weightX="0.0" weightY="0.0"/>
</Constraint>
</Constraints>
</Component>
</SubComponents>
</Form>

View File

@ -40,6 +40,8 @@ import org.sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.StringsExtr
*/
@SuppressWarnings("PMD.SingularField") // UI widgets cause lots of false positives
public final class KeywordSearchJobSettingsPanel extends IngestModuleIngestJobSettingsPanel implements PropertyChangeListener {
private static final long serialVersionUID = 1L;
private final KeywordListsTableModel tableModel = new KeywordListsTableModel();
private final List<String> keywordListNames = new ArrayList<>();
private final Map<String, Boolean> keywordListStates = new HashMap<>();
@ -65,6 +67,7 @@ public final class KeywordSearchJobSettingsPanel extends IngestModuleIngestJobSe
ocrCheckBox.setSelected(settings.isOCREnabled());
limitedOcrCheckbox.setSelected(settings.isLimitedOCREnabled());
ocrOnlyCheckbox.setSelected(settings.isOCROnly());
solrCheckbox.setSelected(settings.isIndexToSolrEnabled());
handleOcrEnabled(settings.isOCREnabled());
}
@ -194,7 +197,7 @@ public final class KeywordSearchJobSettingsPanel extends IngestModuleIngestJobSe
}
}
return new KeywordSearchJobSettings(enabledListNames, disabledListNames,
this.ocrCheckBox.isSelected(), this.limitedOcrCheckbox.isSelected(), this.ocrOnlyCheckbox.isSelected());
this.ocrCheckBox.isSelected(), this.limitedOcrCheckbox.isSelected(), this.ocrOnlyCheckbox.isSelected(), this.solrCheckbox.isSelected());
}
void reset(KeywordSearchJobSettings newSettings) {
@ -253,6 +256,7 @@ public final class KeywordSearchJobSettingsPanel extends IngestModuleIngestJobSe
@SuppressWarnings("unchecked")
// <editor-fold defaultstate="collapsed" desc="Generated Code">//GEN-BEGIN:initComponents
private void initComponents() {
java.awt.GridBagConstraints gridBagConstraints;
titleLabel = new javax.swing.JLabel();
listsScrollPane = new javax.swing.JScrollPane();
@ -264,12 +268,17 @@ public final class KeywordSearchJobSettingsPanel extends IngestModuleIngestJobSe
ocrCheckBox = new javax.swing.JCheckBox();
limitedOcrCheckbox = new javax.swing.JCheckBox();
ocrOnlyCheckbox = new javax.swing.JCheckBox();
solrCheckbox = new javax.swing.JCheckBox();
setPreferredSize(new java.awt.Dimension(300, 170));
<<<<<<< HEAD
setLayout(new javax.swing.BoxLayout(this, javax.swing.BoxLayout.Y_AXIS));
titleLabel.setText(org.openide.util.NbBundle.getMessage(KeywordSearchJobSettingsPanel.class, "KeywordSearchJobSettingsPanel.titleLabel.text")); // NOI18N
add(titleLabel);
=======
setLayout(new java.awt.GridBagLayout());
>>>>>>> aae905c78496f2405c2af41a79e6fbb3a4c00e1c
listsScrollPane.setBorder(javax.swing.BorderFactory.createEtchedBorder());
listsScrollPane.setAlignmentX(0.0F);
@ -293,12 +302,37 @@ public final class KeywordSearchJobSettingsPanel extends IngestModuleIngestJobSe
listsScrollPane.setViewportView(listsTable);
listsTable.setDefaultRenderer(String.class, new SimpleTableCellRenderer());
<<<<<<< HEAD
add(listsScrollPane);
=======
gridBagConstraints = new java.awt.GridBagConstraints();
gridBagConstraints.gridx = 0;
gridBagConstraints.gridy = 1;
gridBagConstraints.gridwidth = 7;
gridBagConstraints.fill = java.awt.GridBagConstraints.BOTH;
gridBagConstraints.ipadx = 284;
gridBagConstraints.ipady = -71;
gridBagConstraints.anchor = java.awt.GridBagConstraints.NORTHWEST;
gridBagConstraints.weightx = 1.0;
gridBagConstraints.weighty = 1.0;
gridBagConstraints.insets = new java.awt.Insets(6, 10, 0, 0);
add(listsScrollPane, gridBagConstraints);
titleLabel.setText(org.openide.util.NbBundle.getMessage(KeywordSearchJobSettingsPanel.class, "KeywordSearchJobSettingsPanel.titleLabel.text")); // NOI18N
gridBagConstraints = new java.awt.GridBagConstraints();
gridBagConstraints.gridx = 0;
gridBagConstraints.gridy = 0;
gridBagConstraints.gridwidth = 2;
gridBagConstraints.anchor = java.awt.GridBagConstraints.NORTHWEST;
gridBagConstraints.insets = new java.awt.Insets(7, 10, 0, 0);
add(titleLabel, gridBagConstraints);
>>>>>>> aae905c78496f2405c2af41a79e6fbb3a4c00e1c
languagesLabel.setText(org.openide.util.NbBundle.getMessage(KeywordSearchJobSettingsPanel.class, "KeywordSearchJobSettingsPanel.languagesLabel.text")); // NOI18N
languagesLabel.setToolTipText(org.openide.util.NbBundle.getMessage(KeywordSearchJobSettingsPanel.class, "KeywordSearchJobSettingsPanel.languagesLabel.toolTipText")); // NOI18N
languagesLabel.setPreferredSize(new java.awt.Dimension(294, 35));
languagesLabel.setVerticalTextPosition(javax.swing.SwingConstants.BOTTOM);
<<<<<<< HEAD
add(languagesLabel);
languagesValLabel.setText(org.openide.util.NbBundle.getMessage(KeywordSearchJobSettingsPanel.class, "KeywordSearchJobSettingsPanel.languagesValLabel.text")); // NOI18N
@ -313,6 +347,44 @@ public final class KeywordSearchJobSettingsPanel extends IngestModuleIngestJobSe
keywordSearchEncodings.setText(org.openide.util.NbBundle.getMessage(KeywordSearchJobSettingsPanel.class, "KeywordSearchJobSettingsPanel.keywordSearchEncodings.text")); // NOI18N
keywordSearchEncodings.setBorder(javax.swing.BorderFactory.createEmptyBorder(5, 1, 5, 1));
add(keywordSearchEncodings);
=======
gridBagConstraints = new java.awt.GridBagConstraints();
gridBagConstraints.gridx = 0;
gridBagConstraints.gridy = 2;
gridBagConstraints.gridwidth = 8;
gridBagConstraints.ipadx = 25;
gridBagConstraints.ipady = -22;
gridBagConstraints.anchor = java.awt.GridBagConstraints.NORTHWEST;
gridBagConstraints.insets = new java.awt.Insets(6, 10, 0, 0);
add(languagesLabel, gridBagConstraints);
languagesValLabel.setText(org.openide.util.NbBundle.getMessage(KeywordSearchJobSettingsPanel.class, "KeywordSearchJobSettingsPanel.languagesValLabel.text")); // NOI18N
languagesValLabel.setToolTipText(org.openide.util.NbBundle.getMessage(KeywordSearchJobSettingsPanel.class, "KeywordSearchJobSettingsPanel.languagesValLabel.toolTipText")); // NOI18N
gridBagConstraints = new java.awt.GridBagConstraints();
gridBagConstraints.gridx = 0;
gridBagConstraints.gridy = 3;
gridBagConstraints.gridwidth = 6;
gridBagConstraints.ipadx = 270;
gridBagConstraints.anchor = java.awt.GridBagConstraints.NORTHWEST;
gridBagConstraints.insets = new java.awt.Insets(6, 20, 0, 0);
add(languagesValLabel, gridBagConstraints);
encodingsLabel.setText(org.openide.util.NbBundle.getMessage(KeywordSearchJobSettingsPanel.class, "KeywordSearchJobSettingsPanel.encodingsLabel.text")); // NOI18N
gridBagConstraints = new java.awt.GridBagConstraints();
gridBagConstraints.gridx = 0;
gridBagConstraints.gridy = 4;
gridBagConstraints.anchor = java.awt.GridBagConstraints.NORTHWEST;
gridBagConstraints.insets = new java.awt.Insets(11, 10, 0, 0);
add(encodingsLabel, gridBagConstraints);
keywordSearchEncodings.setText(org.openide.util.NbBundle.getMessage(KeywordSearchJobSettingsPanel.class, "KeywordSearchJobSettingsPanel.keywordSearchEncodings.text")); // NOI18N
gridBagConstraints = new java.awt.GridBagConstraints();
gridBagConstraints.gridx = 1;
gridBagConstraints.gridy = 4;
gridBagConstraints.anchor = java.awt.GridBagConstraints.NORTHWEST;
gridBagConstraints.insets = new java.awt.Insets(11, 10, 0, 0);
add(keywordSearchEncodings, gridBagConstraints);
>>>>>>> aae905c78496f2405c2af41a79e6fbb3a4c00e1c
ocrCheckBox.setText(org.openide.util.NbBundle.getMessage(KeywordSearchJobSettingsPanel.class, "KeywordSearchJobSettingsPanel.ocrCheckBox.text")); // NOI18N
ocrCheckBox.addActionListener(new java.awt.event.ActionListener() {
@ -320,7 +392,17 @@ public final class KeywordSearchJobSettingsPanel extends IngestModuleIngestJobSe
ocrCheckBoxActionPerformed(evt);
}
});
<<<<<<< HEAD
add(ocrCheckBox);
=======
gridBagConstraints = new java.awt.GridBagConstraints();
gridBagConstraints.gridx = 0;
gridBagConstraints.gridy = 5;
gridBagConstraints.gridwidth = 2;
gridBagConstraints.anchor = java.awt.GridBagConstraints.NORTHWEST;
gridBagConstraints.insets = new java.awt.Insets(7, 10, 0, 0);
add(ocrCheckBox, gridBagConstraints);
>>>>>>> aae905c78496f2405c2af41a79e6fbb3a4c00e1c
limitedOcrCheckbox.setText(org.openide.util.NbBundle.getMessage(KeywordSearchJobSettingsPanel.class, "KeywordSearchJobSettingsPanel.limitedOcrCheckbox.text")); // NOI18N
limitedOcrCheckbox.setBorder(javax.swing.BorderFactory.createEmptyBorder(1, 20, 1, 1));
@ -330,7 +412,18 @@ public final class KeywordSearchJobSettingsPanel extends IngestModuleIngestJobSe
limitedOcrCheckboxActionPerformed(evt);
}
});
<<<<<<< HEAD
add(limitedOcrCheckbox);
=======
gridBagConstraints = new java.awt.GridBagConstraints();
gridBagConstraints.gridx = 0;
gridBagConstraints.gridy = 7;
gridBagConstraints.gridwidth = 2;
gridBagConstraints.ipadx = 216;
gridBagConstraints.anchor = java.awt.GridBagConstraints.NORTHWEST;
gridBagConstraints.insets = new java.awt.Insets(0, 31, 0, 0);
add(limitedOcrCheckbox, gridBagConstraints);
>>>>>>> aae905c78496f2405c2af41a79e6fbb3a4c00e1c
ocrOnlyCheckbox.setText(org.openide.util.NbBundle.getMessage(KeywordSearchJobSettingsPanel.class, "KeywordSearchJobSettingsPanel.ocrOnlyCheckbox.text")); // NOI18N
ocrOnlyCheckbox.setBorder(javax.swing.BorderFactory.createEmptyBorder(1, 20, 1, 1));
@ -339,7 +432,27 @@ public final class KeywordSearchJobSettingsPanel extends IngestModuleIngestJobSe
ocrOnlyCheckboxActionPerformed(evt);
}
});
<<<<<<< HEAD
add(ocrOnlyCheckbox);
=======
gridBagConstraints = new java.awt.GridBagConstraints();
gridBagConstraints.gridx = 0;
gridBagConstraints.gridy = 6;
gridBagConstraints.gridwidth = 2;
gridBagConstraints.anchor = java.awt.GridBagConstraints.NORTHWEST;
gridBagConstraints.insets = new java.awt.Insets(0, 31, 0, 0);
add(ocrOnlyCheckbox, gridBagConstraints);
solrCheckbox.setSelected(true);
solrCheckbox.setText(org.openide.util.NbBundle.getMessage(KeywordSearchJobSettingsPanel.class, "KeywordSearchJobSettingsPanel.solrCheckbox.text")); // NOI18N
gridBagConstraints = new java.awt.GridBagConstraints();
gridBagConstraints.gridx = 0;
gridBagConstraints.gridy = 8;
gridBagConstraints.gridwidth = 2;
gridBagConstraints.anchor = java.awt.GridBagConstraints.NORTHWEST;
gridBagConstraints.insets = new java.awt.Insets(7, 10, 0, 0);
add(solrCheckbox, gridBagConstraints);
>>>>>>> aae905c78496f2405c2af41a79e6fbb3a4c00e1c
}// </editor-fold>//GEN-END:initComponents
private void ocrCheckBoxActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_ocrCheckBoxActionPerformed
@ -365,6 +478,7 @@ public final class KeywordSearchJobSettingsPanel extends IngestModuleIngestJobSe
private javax.swing.JTable listsTable;
private javax.swing.JCheckBox ocrCheckBox;
private javax.swing.JCheckBox ocrOnlyCheckbox;
private javax.swing.JCheckBox solrCheckbox;
private javax.swing.JLabel titleLabel;
// End of variables declaration//GEN-END:variables
}

View File

@ -130,7 +130,12 @@ public class KeywordSearchModuleFactory implements IngestModuleFactory {
@Override
public DataArtifactIngestModule createDataArtifactIngestModule(IngestModuleIngestJobSettings settings) {
return new KwsDataArtifactIngestModule();
if (!(settings instanceof KeywordSearchJobSettings)) {
throw new IllegalArgumentException(NbBundle.getMessage(this.getClass(),
"KeywordSearchModuleFactory.createFileIngestModule.exception.msg"));
}
return new KwsDataArtifactIngestModule((KeywordSearchJobSettings) settings);
}
@Override
@ -140,7 +145,12 @@ public class KeywordSearchModuleFactory implements IngestModuleFactory {
@Override
public AnalysisResultIngestModule createAnalysisResultIngestModule(IngestModuleIngestJobSettings settings) {
return new KwsAnalysisResultIngestModule();
if (!(settings instanceof KeywordSearchJobSettings)) {
throw new IllegalArgumentException(NbBundle.getMessage(this.getClass(),
"KeywordSearchModuleFactory.createFileIngestModule.exception.msg"));
}
return new KwsAnalysisResultIngestModule((KeywordSearchJobSettings) settings);
}
}

View File

@ -29,7 +29,6 @@ import org.sleuthkit.autopsy.coreutils.ModuleSettings;
import org.sleuthkit.autopsy.coreutils.StringExtract;
import org.sleuthkit.autopsy.coreutils.StringExtract.StringExtractUnicodeTable.SCRIPT;
import org.sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.StringsExtractOptions;
import org.sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.UpdateFrequency;
//This file contains constants and settings for KeywordSearch
class KeywordSearchSettings {
@ -46,34 +45,9 @@ class KeywordSearchSettings {
static final boolean LIMITED_OCR_ENABLED_DEFAULT = false;
private static boolean skipKnown = true;
private static final Logger logger = Logger.getLogger(KeywordSearchSettings.class.getName());
private static UpdateFrequency UpdateFreq = UpdateFrequency.DEFAULT;
private static List<StringExtract.StringExtractUnicodeTable.SCRIPT> stringExtractScripts = new ArrayList<>();
private static Map<String, String> stringExtractOptions = new HashMap<>();
/**
* Gets the update Frequency from KeywordSearch_Options.properties
*
* @return KeywordSearchIngestModule's update frequency
*/
static UpdateFrequency getUpdateFrequency() {
if (ModuleSettings.getConfigSetting(PROPERTIES_OPTIONS, "UpdateFrequency") != null) { //NON-NLS
return UpdateFrequency.valueOf(ModuleSettings.getConfigSetting(PROPERTIES_OPTIONS, "UpdateFrequency")); //NON-NLS
}
//if it failed, return the default/last known value
logger.log(Level.WARNING, "Could not read property for UpdateFrequency, returning backup value."); //NON-NLS
return UpdateFrequency.DEFAULT;
}
/**
* Sets the update frequency and writes to KeywordSearch_Options.properties
*
* @param freq Sets KeywordSearchIngestModule to this value.
*/
static void setUpdateFrequency(UpdateFrequency freq) {
ModuleSettings.setConfigSetting(PROPERTIES_OPTIONS, "UpdateFrequency", freq.name()); //NON-NLS
UpdateFreq = freq;
}
/**
* Sets whether or not to skip adding known good files to the search during
* index.
@ -243,11 +217,6 @@ class KeywordSearchSettings {
logger.log(Level.INFO, "No configuration for NSRL found, generating default..."); //NON-NLS
KeywordSearchSettings.setSkipKnown(true);
}
//setting default Update Frequency
if (!ModuleSettings.settingExists(KeywordSearchSettings.PROPERTIES_OPTIONS, "UpdateFrequency")) { //NON-NLS
logger.log(Level.INFO, "No configuration for Update Frequency found, generating default..."); //NON-NLS
KeywordSearchSettings.setUpdateFrequency(UpdateFrequency.DEFAULT);
}
//setting default Extract UTF8
if (!ModuleSettings.settingExists(KeywordSearchSettings.PROPERTIES_OPTIONS, StringsExtractOptions.EXTRACT_UTF8.toString())) {
logger.log(Level.INFO, "No configuration for UTF8 found, generating default..."); //NON-NLS

View File

@ -20,12 +20,23 @@ package org.sleuthkit.autopsy.keywordsearch;
import java.awt.Component;
import java.io.File;
import java.io.Reader;
import java.util.regex.Matcher;
import org.sleuthkit.autopsy.coreutils.Logger;
import javax.swing.JOptionPane;
import org.openide.util.Lookup;
import org.openide.windows.WindowManager;
import org.sleuthkit.autopsy.textextractors.TextExtractor;
import org.sleuthkit.autopsy.textextractors.TextExtractorFactory;
import org.sleuthkit.datamodel.AbstractFile;
import org.sleuthkit.datamodel.BlackboardArtifact;
import org.sleuthkit.datamodel.Content;
import org.sleuthkit.datamodel.TskCoreException;
class KeywordSearchUtil {
private static final String SNIPPET_DELIMITER = String.valueOf(Character.toChars(171));
public enum DIALOG_MESSAGE_TYPE {
ERROR, WARN, INFO
@ -143,6 +154,36 @@ class KeywordSearchUtil {
return query;
}
/**
* Make a snippet from the given content that has the given hit plus some
* surrounding context.
*
* @param content The content to extract the snippet from.
*
* @param hitMatcher The Matcher that has the start/end info for where the
* hit is in the content.
* @param hit The actual hit in the content.
*
* @return A snippet extracted from content that contains hit plus some
* surrounding context.
*/
static String makeSnippet(String content, Matcher hitMatcher, String hit) {
// Get the snippet from the document.
final int end = hitMatcher.end();
final int start = hitMatcher.start();
return makeSnippet(content, start, end, hit);
}
static String makeSnippet(String content, int startOffset, int endOffset, String hit) {
// Get the snippet from the document.
int maxIndex = content.length() - 1;
return content.substring(Integer.max(0, startOffset - 20), Integer.max(0, startOffset))
+ SNIPPET_DELIMITER + hit + SNIPPET_DELIMITER
+ content.substring(Integer.min(maxIndex, endOffset), Integer.min(maxIndex, endOffset + 20));
}
/**
* Is the Keyword Search list at absPath an XML list?
*
@ -154,4 +195,40 @@ class KeywordSearchUtil {
//TODO: make this more robust, if necessary
return new File(absPath).getName().endsWith(".xml"); //NON-NLS
}
static Reader getReader(Content content) throws TextExtractorFactory.NoTextExtractorFound, TextExtractor.InitReaderException{
return getReader(content, null);
}
static Reader getReader(Content content, Lookup stringsExtractionContext) throws TextExtractorFactory.NoTextExtractorFound, TextExtractor.InitReaderException{
Reader reader = null;
if (content instanceof BlackboardArtifact) {
BlackboardArtifact artifact = (BlackboardArtifact) content;
if (artifact.getArtifactID() > 0) {
/*
* Artifact indexing is only supported for artifacts that use
* negative artifact ids to avoid overlapping with the object
* ids of other types of Content.
*/
return null;
}
TextExtractor blackboardExtractor = TextExtractorFactory.getExtractor(content, null);
reader = blackboardExtractor.getReader();
} else if (content instanceof AbstractFile) {
TextExtractor stringsExtractor = TextExtractorFactory.getStringsExtractor( content, stringsExtractionContext);
reader = stringsExtractor.getReader();
} else {
try {
TextExtractor contentExtractor = TextExtractorFactory.getExtractor(content, null);
reader = contentExtractor.getReader();
} catch (TextExtractorFactory.NoTextExtractorFound | TextExtractor.InitReaderException ex) {
// Try the StringsTextExtractor if Tika extractions fails.
TextExtractor stringsExtractor = TextExtractorFactory.getStringsExtractor(content, null);
reader = stringsExtractor.getReader();
}
}
return reader;
}
}

View File

@ -18,16 +18,14 @@
*/
package org.sleuthkit.autopsy.keywordsearch;
import java.io.Reader;
import java.util.logging.Level;
import org.openide.util.Lookup;
import org.sleuthkit.autopsy.coreutils.Logger;
import org.sleuthkit.autopsy.ingest.AnalysisResultIngestModule;
import org.sleuthkit.autopsy.ingest.IngestJobContext;
import org.sleuthkit.autopsy.ingest.IngestModule;
import org.sleuthkit.autopsy.keywordsearchservice.KeywordSearchService;
import org.sleuthkit.datamodel.AnalysisResult;
import org.sleuthkit.datamodel.BlackboardArtifact;
import org.sleuthkit.datamodel.TskCoreException;
/**
* An analysis result ingest module that indexes text for keyword search. All
@ -40,21 +38,33 @@ public class KwsAnalysisResultIngestModule implements AnalysisResultIngestModule
private static final Logger LOGGER = Logger.getLogger(KeywordSearchIngestModule.class.getName());
private static final int TSK_KEYWORD_HIT_TYPE_ID = BlackboardArtifact.Type.TSK_KEYWORD_HIT.getTypeID();
private IngestJobContext context;
private KeywordSearchService searchService;
private final KeywordSearchJobSettings settings;
KwsAnalysisResultIngestModule(KeywordSearchJobSettings settings) {
this.settings = settings;
}
@Override
public void startUp(IngestJobContext context) throws IngestModule.IngestModuleException {
this.context = context;
searchService = Lookup.getDefault().lookup(KeywordSearchService.class);
}
@Override
public IngestModule.ProcessResult process(AnalysisResult result) {
try {
if (result.getType().getTypeID() != TSK_KEYWORD_HIT_TYPE_ID) {
searchService.index(result);
Ingester ingester = Ingester.getDefault();
Reader blackboardExtractedTextReader = KeywordSearchUtil.getReader(result);
String sourceName = result.getDisplayName() + "_" + result.getArtifactID();
ingester.indexMetaDataOnly(result, sourceName);
ingester.search(blackboardExtractedTextReader,
result.getArtifactID(),
sourceName, result,
context, true,
settings.isIndexToSolrEnabled(),
settings.getNamesOfEnabledKeyWordLists());
}
} catch (TskCoreException ex) {
} catch (Exception ex) {
LOGGER.log(Level.SEVERE, String.format("Error indexing analysis result '%s' (job ID=%d)", result, context.getJobId()), ex); //NON-NLS
return IngestModule.ProcessResult.ERROR;
}

View File

@ -18,12 +18,13 @@
*/
package org.sleuthkit.autopsy.keywordsearch;
import java.io.Reader;
import java.util.logging.Level;
import org.openide.util.Lookup;
import org.sleuthkit.autopsy.coreutils.Logger;
import org.sleuthkit.autopsy.ingest.DataArtifactIngestModule;
import org.sleuthkit.autopsy.ingest.IngestJobContext;
import org.sleuthkit.autopsy.keywordsearchservice.KeywordSearchService;
import org.sleuthkit.autopsy.textextractors.TextExtractor;
import org.sleuthkit.autopsy.textextractors.TextExtractorFactory;
import org.sleuthkit.datamodel.BlackboardArtifact;
import org.sleuthkit.datamodel.DataArtifact;
import org.sleuthkit.datamodel.TskCoreException;
@ -39,21 +40,33 @@ public class KwsDataArtifactIngestModule implements DataArtifactIngestModule {
private static final Logger LOGGER = Logger.getLogger(KeywordSearchIngestModule.class.getName());
private static final int TSK_ASSOCIATED_OBJECT_TYPE_ID = BlackboardArtifact.Type.TSK_ASSOCIATED_OBJECT.getTypeID();
private IngestJobContext context;
private KeywordSearchService searchService;
private final KeywordSearchJobSettings settings;
KwsDataArtifactIngestModule(KeywordSearchJobSettings settings) {
this.settings = settings;
}
@Override
public void startUp(IngestJobContext context) throws IngestModuleException {
this.context = context;
searchService = Lookup.getDefault().lookup(KeywordSearchService.class);
}
@Override
public ProcessResult process(DataArtifact artifact) {
try {
if (artifact.getType().getTypeID() != TSK_ASSOCIATED_OBJECT_TYPE_ID) {
searchService.index(artifact);
Ingester ingester = Ingester.getDefault();
Reader blackboardExtractedTextReader = KeywordSearchUtil.getReader(artifact);
String sourceName = artifact.getDisplayName() + "_" + artifact.getArtifactID();
ingester.indexMetaDataOnly(artifact, sourceName);
ingester.search(blackboardExtractedTextReader,
artifact.getArtifactID(),
sourceName, artifact,
context, true,
settings.isIndexToSolrEnabled(),
settings.getNamesOfEnabledKeyWordLists());
}
} catch (TskCoreException ex) {
} catch (Exception ex) {
LOGGER.log(Level.SEVERE, String.format("Error indexing data artifact '%s' (job ID=%d)", artifact, context.getJobId()), ex); //NON-NLS
return ProcessResult.ERROR;
}

View File

@ -46,7 +46,6 @@ import org.sleuthkit.autopsy.datamodel.CreditCards;
import static org.sleuthkit.autopsy.keywordsearch.KeywordSearchSettings.MODULE_NAME;
import org.sleuthkit.datamodel.AbstractFile;
import org.sleuthkit.datamodel.Account;
import org.sleuthkit.datamodel.AccountFileInstance;
import org.sleuthkit.datamodel.BlackboardArtifact;
import org.sleuthkit.datamodel.BlackboardAttribute;
import org.sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE;
@ -431,7 +430,7 @@ final class RegexQuery implements KeywordSearchQuery {
keywordsFoundInThisDocument.put(hit, hit);
if (artifactAttributeType == null) {
hits.add(new KeywordHit(docId, makeSnippet(content, hitMatcher, hit), hit));
hits.add(new KeywordHit(docId, KeywordSearchUtil.makeSnippet(content, hitMatcher, hit), hit));
} else {
switch (artifactAttributeType) {
case TSK_EMAIL:
@ -442,7 +441,7 @@ final class RegexQuery implements KeywordSearchQuery {
*/
if (hit.length() >= MIN_EMAIL_ADDR_LENGTH
&& DomainValidator.getInstance(true).isValidTld(hit.substring(hit.lastIndexOf('.')))) {
hits.add(new KeywordHit(docId, makeSnippet(content, hitMatcher, hit), hit));
hits.add(new KeywordHit(docId, KeywordSearchUtil.makeSnippet(content, hitMatcher, hit), hit));
}
break;
@ -459,14 +458,14 @@ final class RegexQuery implements KeywordSearchQuery {
if (ccnMatcher.find()) {
final String group = ccnMatcher.group("ccn");
if (CreditCardValidator.isValidCCN(group)) {
hits.add(new KeywordHit(docId, makeSnippet(content, hitMatcher, hit), hit));
hits.add(new KeywordHit(docId, KeywordSearchUtil.makeSnippet(content, hitMatcher, hit), hit));
}
}
}
break;
default:
hits.add(new KeywordHit(docId, makeSnippet(content, hitMatcher, hit), hit));
hits.add(new KeywordHit(docId, KeywordSearchUtil.makeSnippet(content, hitMatcher, hit), hit));
break;
}
}
@ -486,30 +485,6 @@ final class RegexQuery implements KeywordSearchQuery {
return hits;
}
/**
* Make a snippet from the given content that has the given hit plus some
* surrounding context.
*
* @param content The content to extract the snippet from.
*
* @param hitMatcher The Matcher that has the start/end info for where the
* hit is in the content.
* @param hit The actual hit in the content.
*
* @return A snippet extracted from content that contains hit plus some
* surrounding context.
*/
private String makeSnippet(String content, Matcher hitMatcher, String hit) {
// Get the snippet from the document.
int maxIndex = content.length() - 1;
final int end = hitMatcher.end();
final int start = hitMatcher.start();
return content.substring(Integer.max(0, start - 20), Integer.max(0, start))
+ SNIPPET_DELIMITER + hit + SNIPPET_DELIMITER
+ content.substring(Integer.min(maxIndex, end), Integer.min(maxIndex, end + 20));
}
@Override
public void addFilter(KeywordQueryFilter filter) {
this.filters.add(filter);
@ -573,6 +548,11 @@ final class RegexQuery implements KeywordSearchQuery {
*/
@Override
public BlackboardArtifact createKeywordHitArtifact(Content content, Keyword foundKeyword, KeywordHit hit, String snippet, String listName, Long ingestJobId) {
return createKeywordHitArtifact(content, originalKeyword, foundKeyword, hit, snippet, listName, ingestJobId);
}
public static BlackboardArtifact createKeywordHitArtifact(Content content, Keyword originalKW, Keyword foundKeyword, KeywordHit hit, String snippet, String listName, Long ingestJobId) {
final String MODULE_NAME = KeywordSearchModuleFactory.getModuleName();
if (content == null) {
@ -583,8 +563,8 @@ final class RegexQuery implements KeywordSearchQuery {
/*
* Credit Card number hits are handled differently
*/
if (originalKeyword.getArtifactAttributeType() == ATTRIBUTE_TYPE.TSK_CARD_NUMBER) {
createCCNAccount(content, foundKeyword, hit, snippet, listName, ingestJobId);
if (originalKW.getArtifactAttributeType() == ATTRIBUTE_TYPE.TSK_CARD_NUMBER) {
createCCNAccount(content, originalKW, foundKeyword, hit, snippet, listName, ingestJobId);
return null;
}
@ -594,8 +574,10 @@ final class RegexQuery implements KeywordSearchQuery {
*/
Collection<BlackboardAttribute> attributes = new ArrayList<>();
attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD, MODULE_NAME, foundKeyword.getSearchTerm()));
attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP, MODULE_NAME, getQueryString()));
attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD, MODULE_NAME, foundKeyword.getSearchTerm().toLowerCase()));
if(!originalKW.searchTermIsWholeWord() || !originalKW.searchTermIsLiteral()) {
attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP, MODULE_NAME, originalKW.getSearchTerm()));
}
if (StringUtils.isNotBlank(listName)) {
attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_SET_NAME, MODULE_NAME, listName));
@ -608,8 +590,12 @@ final class RegexQuery implements KeywordSearchQuery {
-> attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_ASSOCIATED_ARTIFACT, MODULE_NAME, artifactID))
);
if (originalKeyword.searchTermIsLiteral()) {
attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE, MODULE_NAME, KeywordSearch.QueryType.SUBSTRING.ordinal()));
if (originalKW.searchTermIsLiteral()) {
if(!originalKW.searchTermIsWholeWord()) {
attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE, MODULE_NAME, KeywordSearch.QueryType.SUBSTRING.ordinal()));
} else {
attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE, MODULE_NAME, KeywordSearch.QueryType.LITERAL.ordinal()));
}
} else {
attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE, MODULE_NAME, KeywordSearch.QueryType.REGEX.ordinal()));
}
@ -625,11 +611,11 @@ final class RegexQuery implements KeywordSearchQuery {
}
}
private void createCCNAccount(Content content, Keyword foundKeyword, KeywordHit hit, String snippet, String listName, Long ingestJobId) {
private static void createCCNAccount(Content content, Keyword originalKW, Keyword foundKeyword, KeywordHit hit, String snippet, String listName, Long ingestJobId) {
final String MODULE_NAME = KeywordSearchModuleFactory.getModuleName();
if (originalKeyword.getArtifactAttributeType() != ATTRIBUTE_TYPE.TSK_CARD_NUMBER) {
if (originalKW.getArtifactAttributeType() != ATTRIBUTE_TYPE.TSK_CARD_NUMBER) {
LOGGER.log(Level.SEVERE, "Keyword hit is not a credit card number"); //NON-NLS
return;
}

View File

@ -93,28 +93,22 @@ public class SolrSearchService implements KeywordSearchService, AutopsyService {
return;
}
try {
TextExtractor blackboardExtractor = TextExtractorFactory.getExtractor(content, null);
Reader blackboardExtractedTextReader = blackboardExtractor.getReader();
Reader blackboardExtractedTextReader = KeywordSearchUtil.getReader(content);
String sourceName = artifact.getDisplayName() + "_" + artifact.getArtifactID();
ingester.indexMetaDataOnly(artifact, sourceName);
ingester.indexText(blackboardExtractedTextReader, artifact.getArtifactID(), sourceName, content, null);
} catch (Ingester.IngesterException | TextExtractorFactory.NoTextExtractorFound | TextExtractor.InitReaderException ex) {
// Will not cause an inline search becauce the keyword list is null
ingester.search(blackboardExtractedTextReader, artifact.getArtifactID(), sourceName, content, null, true, true, null);
} catch (Exception ex) {
throw new TskCoreException("Error indexing artifact", ex);
}
} else {
try {
TextExtractor contentExtractor = TextExtractorFactory.getExtractor(content, null);
Reader contentExtractedTextReader = contentExtractor.getReader();
ingester.indexText(contentExtractedTextReader, content.getId(), content.getName(), content, null);
} catch (TextExtractorFactory.NoTextExtractorFound | Ingester.IngesterException | TextExtractor.InitReaderException ex) {
try {
// Try the StringsTextExtractor if Tika extractions fails.
TextExtractor stringsExtractor = TextExtractorFactory.getStringsExtractor(content, null);
Reader stringsExtractedTextReader = stringsExtractor.getReader();
ingester.indexStrings(stringsExtractedTextReader, content.getId(), content.getName(), content, null);
} catch (Ingester.IngesterException | TextExtractor.InitReaderException ex1) {
throw new TskCoreException("Error indexing content", ex1);
}
Reader reader = KeywordSearchUtil.getReader(content);
// Will not cause an inline search becauce the keyword list is null
ingester.search(reader, content.getId(), content.getName(), content, null, true, true, null);
} catch (Exception ex) {
throw new TskCoreException("Error indexing content", ex);
}
// only do a Solr commit if ingest is not running. If ingest is running, the changes will
// be committed via a periodic commit or via final commit after the ingest job has finished.
@ -421,11 +415,11 @@ public class SolrSearchService implements KeywordSearchService, AutopsyService {
try {
String sourceName = artifact.getDisplayName() + "_" + artifact.getArtifactID();
TextExtractor blackboardExtractor = TextExtractorFactory.getExtractor((Content) artifact, null);
TextExtractor blackboardExtractor = TextExtractorFactory.getExtractor(artifact, null);
Reader blackboardExtractedTextReader = blackboardExtractor.getReader();
ingester.indexMetaDataOnly(artifact, sourceName);
ingester.indexText(blackboardExtractedTextReader, artifact.getId(), sourceName, artifact, null);
} catch (Ingester.IngesterException | TextExtractorFactory.NoTextExtractorFound | TextExtractor.InitReaderException ex) {
ingester.search(blackboardExtractedTextReader, artifact.getId(), sourceName, artifact, null, true, true, null);
} catch (Exception ex) {
throw new TskCoreException(ex.getCause().getMessage(), ex);
}
}

View File

@ -420,15 +420,6 @@ class ExtractRegistry extends Extract {
Report report = currentCase.addReport(regOutputFiles.fullPlugins,
NbBundle.getMessage(this.getClass(), "ExtractRegistry.parentModuleName.noSpace"),
"RegRipper " + regFile.getUniquePath(), regFile); //NON-NLS
// Index the report content so that it will be available for keyword search.
KeywordSearchService searchService = Lookup.getDefault().lookup(KeywordSearchService.class);
if (null == searchService) {
logger.log(Level.WARNING, "Keyword search service not found. Report will not be indexed");
} else {
searchService.index(report);
report.close();
}
} catch (TskCoreException e) {
this.addErrorMessage("Error adding regripper output as Autopsy report: " + e.getLocalizedMessage()); //NON-NLS
}