Merge pull request #7789 from eugene7646/indexed_text_tab_8505

Indexed text tab (8505)
This commit is contained in:
eugene7646 2023-06-10 10:24:56 -04:00 committed by GitHub
commit 2d0601a26a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 538 additions and 135 deletions

View File

@ -24,7 +24,6 @@ import org.openide.util.NbBundle.Messages;
import org.openide.util.lookup.ServiceProvider; import org.openide.util.lookup.ServiceProvider;
import org.sleuthkit.autopsy.corecomponentinterfaces.DataContentViewer; import org.sleuthkit.autopsy.corecomponentinterfaces.DataContentViewer;
import org.sleuthkit.datamodel.AbstractFile; import org.sleuthkit.datamodel.AbstractFile;
import org.sleuthkit.datamodel.BlackboardArtifact;
/** /**
* A DataContentViewer that displays text with the TextViewers available. * A DataContentViewer that displays text with the TextViewers available.

View File

@ -99,7 +99,7 @@ public class TextContentViewerPanel extends javax.swing.JPanel implements DataCo
/** /**
* Determine the isPreffered score for the content viewer which is * Determine the isPreffered score for the content viewer which is
* displaying this panel. Score is depenedent on the score of the supported * displaying this panel. Score is dependent on the score of the supported
* TextViewers which exist. * TextViewers which exist.
* *
* @param node * @param node

View File

@ -50,7 +50,7 @@ import org.sleuthkit.datamodel.TskCoreException;
* and HighlightedText are very similar and could probably use some refactoring * and HighlightedText are very similar and could probably use some refactoring
* to reduce code duplication. * to reduce code duplication.
*/ */
class AccountsText implements IndexedText { class AccountsText implements ExtractedText {
private static final Logger logger = Logger.getLogger(AccountsText.class.getName()); private static final Logger logger = Logger.getLogger(AccountsText.class.getName());
private static final boolean DEBUG = (Version.getBuildType() == Version.Type.DEVELOPMENT); private static final boolean DEBUG = (Version.getBuildType() == Version.Type.DEVELOPMENT);
@ -312,7 +312,7 @@ class AccountsText implements IndexedText {
return "<html><pre>" + highlightedText + "</pre></html>"; //NON-NLS return "<html><pre>" + highlightedText + "</pre></html>"; //NON-NLS
} catch (Exception ex) { } catch (Exception ex) {
logger.log(Level.SEVERE, "Error getting highlighted text for Solr doc id " + this.solrObjectId + ", chunkID " + this.currentPage, ex); //NON-NLS logger.log(Level.SEVERE, "Error getting highlighted text for Solr doc id " + this.solrObjectId + ", chunkID " + this.currentPage, ex); //NON-NLS
return Bundle.IndexedText_errorMessage_errorGettingText(); return Bundle.ExtractedText_errorMessage_errorGettingText();
} }
} }

View File

@ -42,7 +42,7 @@ AbstractKeywordSearchPerformer.search.emptyKeywordErrorBody=Keyword list is empt
AbstractKeywordSearchPerformer.search.noFilesInIdxMsg=<html>No files are in index yet. <br />If Solr keyword search indexing was enabled, wait for ingest to complete</html> AbstractKeywordSearchPerformer.search.noFilesInIdxMsg=<html>No files are in index yet. <br />If Solr keyword search indexing was enabled, wait for ingest to complete</html>
AbstractKeywordSearchPerformer.search.noFilesIdxdMsg=<html>No files were indexed.<br />Re-ingest the image with the Keyword Search Module and Solr indexing enabled. </html> AbstractKeywordSearchPerformer.search.noFilesIdxdMsg=<html>No files were indexed.<br />Re-ingest the image with the Keyword Search Module and Solr indexing enabled. </html>
ExtractedContentViewer.toolTip=Displays extracted text from files and keyword-search results. Requires Keyword Search ingest to be run on a file to activate this viewer. ExtractedContentViewer.toolTip=Displays extracted text from files and keyword-search results. Requires Keyword Search ingest to be run on a file to activate this viewer.
ExtractedContentViewer.getTitle=Indexed Text ExtractedContentViewer.getTitle=Extracted Text
HighlightedMatchesSource.toString=Search Results HighlightedMatchesSource.toString=Search Results
Installer.reportPortError=Indexing server port {0} is not available. Check if your security software does not block {1} and consider changing {2} in {3} property file in the application user folder. Then try rebooting your system if another process was causing the conflict. Installer.reportPortError=Indexing server port {0} is not available. Check if your security software does not block {1} and consider changing {2} in {3} property file in the application user folder. Then try rebooting your system if another process was causing the conflict.
Installer.reportStopPortError=Indexing server stop port {0} is not available. Consider changing {1} in {2} property file in the application user folder. Installer.reportStopPortError=Indexing server stop port {0} is not available. Consider changing {1} in {2} property file in the application user folder.

View File

@ -15,6 +15,7 @@ ExtractAllTermsReport.error.noOpenCase=No currently open case.
ExtractAllTermsReport.export.error=Error During Unique Word Extraction ExtractAllTermsReport.export.error=Error During Unique Word Extraction
ExtractAllTermsReport.exportComplete=Unique Word Extraction Complete ExtractAllTermsReport.exportComplete=Unique Word Extraction Complete
ExtractAllTermsReport.getName.text=Extract Unique Words ExtractAllTermsReport.getName.text=Extract Unique Words
# {0} - Number of extracted terms
ExtractAllTermsReport.numberExtractedTerms=Extracted {0} terms... ExtractAllTermsReport.numberExtractedTerms=Extracted {0} terms...
ExtractAllTermsReport.search.ingestInProgressBody=<html>Keyword Search Ingest is currently running.<br />Not all files have been indexed and unique word extraction might yield incomplete results.<br />Do you want to proceed with unique word extraction anyway?</html> ExtractAllTermsReport.search.ingestInProgressBody=<html>Keyword Search Ingest is currently running.<br />Not all files have been indexed and unique word extraction might yield incomplete results.<br />Do you want to proceed with unique word extraction anyway?</html>
ExtractAllTermsReport.search.noFilesInIdxMsg=No files are in index yet. If Solr keyword search indexing and Solr indexing were enabled, wait for ingest to complete. ExtractAllTermsReport.search.noFilesInIdxMsg=No files are in index yet. If Solr keyword search indexing and Solr indexing were enabled, wait for ingest to complete.
@ -22,13 +23,15 @@ ExtractAllTermsReport.search.noFilesInIdxMsg2=No files are in index yet. Re-inge
ExtractAllTermsReport.search.searchIngestInProgressTitle=Keyword Search Ingest in Progress ExtractAllTermsReport.search.searchIngestInProgressTitle=Keyword Search Ingest in Progress
ExtractAllTermsReport.startExport=Starting Unique Word Extraction ExtractAllTermsReport.startExport=Starting Unique Word Extraction
ExtractedContentPanel.setMarkup.panelTxt=<span style='font-style:italic'>Loading text... Please wait</span> ExtractedContentPanel.setMarkup.panelTxt=<span style='font-style:italic'>Loading text... Please wait</span>
# {0} - Content name
ExtractedContentPanel.SetMarkup.progress.loading=Loading text for {0} ExtractedContentPanel.SetMarkup.progress.loading=Loading text for {0}
ExtractedText.errorMessage.errorGettingText=<span style='font-style:italic'>Error retrieving text.</span>
ExtractedText.FileText=File Text
ExtractedText.warningMessage.knownFile=<span style='font-style:italic'>This file is a known file (based on MD5 hash) and does not have indexed text.</span>
ExtractedText.warningMessage.noTextAvailable=<span style='font-style:italic'>No text available for this file.</span>
GlobalEditListPanel.editKeyword.title=Edit Keyword GlobalEditListPanel.editKeyword.title=Edit Keyword
GlobalEditListPanel.warning.text=Boundary characters ^ and $ do not match word boundaries. Consider\nreplacing with an explicit list of boundary characters, such as [ \\.,] GlobalEditListPanel.warning.text=Boundary characters ^ and $ do not match word boundaries. Consider\nreplacing with an explicit list of boundary characters, such as [ \\.,]
GlobalEditListPanel.warning.title=Warning GlobalEditListPanel.warning.title=Warning
IndexedText.errorMessage.errorGettingText=<span style='font-style:italic'>Error retrieving indexed text.</span>
IndexedText.warningMessage.knownFile=<span style='font-style:italic'>This file is a known file (based on MD5 hash) and does not have indexed text.</span>
IndexedText.warningMessage.noTextAvailable=<span style='font-style:italic'>No indexed text for this file.</span>
KeywordSearchGlobalSearchSettingsPanel.customizeComponents.windowsLimitedOCR=Only process images which are over 100KB in size or extracted from a document. (Beta) (Requires Windows 64-bit) KeywordSearchGlobalSearchSettingsPanel.customizeComponents.windowsLimitedOCR=Only process images which are over 100KB in size or extracted from a document. (Beta) (Requires Windows 64-bit)
KeywordSearchGlobalSearchSettingsPanel.customizeComponents.windowsOCR=Enable Optical Character Recognition (OCR) (Requires Windows 64-bit) KeywordSearchGlobalSearchSettingsPanel.customizeComponents.windowsOCR=Enable Optical Character Recognition (OCR) (Requires Windows 64-bit)
KeywordSearchGlobalSettingsPanel.Title=Global Keyword Search Settings KeywordSearchGlobalSettingsPanel.Title=Global Keyword Search Settings
@ -49,7 +52,7 @@ KeywordSearchResultFactory.createNodeForKey.noResultsFound.text=No results found
KeywordSearchResultFactory.query.exception.msg=Could not perform the query KeywordSearchResultFactory.query.exception.msg=Could not perform the query
OpenIDE-Module-Display-Category=Ingest Module OpenIDE-Module-Display-Category=Ingest Module
OpenIDE-Module-Long-Description=Keyword Search ingest module.\n\nThe module indexes files found in the disk image at ingest time.\nIt then periodically runs the search on the indexed files using one or more keyword lists (containing pure words and/or regular expressions) and posts results.\n\nThe module also contains additional tools integrated in the main GUI, such as keyword list configuration, keyword search bar in the top-right corner, extracted text viewer and search results viewer showing highlighted keywords found. OpenIDE-Module-Long-Description=Keyword Search ingest module.\n\nThe module indexes files found in the disk image at ingest time.\nIt then periodically runs the search on the indexed files using one or more keyword lists (containing pure words and/or regular expressions) and posts results.\n\n\The module also contains additional tools integrated in the main GUI, such as keyword list configuration, keyword search bar in the top-right corner, extracted text viewer and search results viewer showing highlighted keywords found.
OpenIDE-Module-Name=KeywordSearch OpenIDE-Module-Name=KeywordSearch
OptionsCategory_Name_KeywordSearchOptions=Keyword Search OptionsCategory_Name_KeywordSearchOptions=Keyword Search
OptionsCategory_Keywords_KeywordSearchOptions=Keyword Search OptionsCategory_Keywords_KeywordSearchOptions=Keyword Search
@ -91,7 +94,7 @@ AbstractKeywordSearchPerformer.search.emptyKeywordErrorBody=Keyword list is empt
AbstractKeywordSearchPerformer.search.noFilesInIdxMsg=<html>No files are in index yet. <br />If Solr keyword search indexing was enabled, wait for ingest to complete</html> AbstractKeywordSearchPerformer.search.noFilesInIdxMsg=<html>No files are in index yet. <br />If Solr keyword search indexing was enabled, wait for ingest to complete</html>
AbstractKeywordSearchPerformer.search.noFilesIdxdMsg=<html>No files were indexed.<br />Re-ingest the image with the Keyword Search Module and Solr indexing enabled. </html> AbstractKeywordSearchPerformer.search.noFilesIdxdMsg=<html>No files were indexed.<br />Re-ingest the image with the Keyword Search Module and Solr indexing enabled. </html>
ExtractedContentViewer.toolTip=Displays extracted text from files and keyword-search results. Requires Keyword Search ingest to be run on a file to activate this viewer. ExtractedContentViewer.toolTip=Displays extracted text from files and keyword-search results. Requires Keyword Search ingest to be run on a file to activate this viewer.
ExtractedContentViewer.getTitle=Indexed Text ExtractedContentViewer.getTitle=Extracted Text
HighlightedMatchesSource.toString=Search Results HighlightedMatchesSource.toString=Search Results
Installer.reportPortError=Indexing server port {0} is not available. Check if your security software does not block {1} and consider changing {2} in {3} property file in the application user folder. Then try rebooting your system if another process was causing the conflict. Installer.reportPortError=Indexing server port {0} is not available. Check if your security software does not block {1} and consider changing {2} in {3} property file in the application user folder. Then try rebooting your system if another process was causing the conflict.
Installer.reportStopPortError=Indexing server stop port {0} is not available. Consider changing {1} in {2} property file in the application user folder. Installer.reportStopPortError=Indexing server stop port {0} is not available. Consider changing {1} in {2} property file in the application user folder.
@ -137,8 +140,6 @@ KeywordSearchIngestModule.init.onlyIdxKwSkipMsg=Only indexing will be done and k
KeywordSearchIngestModule.doInBackGround.displayName=Periodic Keyword Search KeywordSearchIngestModule.doInBackGround.displayName=Periodic Keyword Search
KeywordSearchIngestModule.doInBackGround.finalizeMsg=Finalizing KeywordSearchIngestModule.doInBackGround.finalizeMsg=Finalizing
KeywordSearchIngestModule.doInBackGround.pendingMsg=(Pending) KeywordSearchIngestModule.doInBackGround.pendingMsg=(Pending)
RawText.FileText=File Text
RawText.ResultText=Result Text
SearchRunner.doInBackGround.cancelMsg=(Cancelling...) SearchRunner.doInBackGround.cancelMsg=(Cancelling...)
KeywordSearchIngestModule.postIndexSummary.knowFileHeaderLbl=Files with known types KeywordSearchIngestModule.postIndexSummary.knowFileHeaderLbl=Files with known types
KeywordSearchIngestModule.postIndexSummary.fileGenStringsHead=Files with general strings extracted KeywordSearchIngestModule.postIndexSummary.fileGenStringsHead=Files with general strings extracted
@ -224,6 +225,7 @@ KeywordSearchSettings.properties_options.text={0}_Options
KeywordSearchSettings.propertiesNSRL.text={0}_NSRL KeywordSearchSettings.propertiesNSRL.text={0}_NSRL
KeywordSearchSettings.propertiesScripts.text={0}_Scripts KeywordSearchSettings.propertiesScripts.text={0}_Scripts
NoOpenCoreException.err.noOpenSorlCore.msg=No currently open Solr core. NoOpenCoreException.err.noOpenSorlCore.msg=No currently open Solr core.
# {0} - colelction name
Server.deleteCore.exception.msg=Failed to delete Solr colelction {0} Server.deleteCore.exception.msg=Failed to delete Solr colelction {0}
Server.exceptionMessage.unableToBackupCollection=Unable to backup Solr collection Server.exceptionMessage.unableToBackupCollection=Unable to backup Solr collection
Server.exceptionMessage.unableToCreateCollection=Unable to create Solr collection Server.exceptionMessage.unableToCreateCollection=Unable to create Solr collection
@ -336,6 +338,8 @@ GlobalListsManagementPanel.copyListButton.text=Copy List
GlobalListsManagementPanel.renameListButton.text=Edit List Name GlobalListsManagementPanel.renameListButton.text=Edit List Name
GlobalEditListPanel.editWordButton.text=Edit Keyword GlobalEditListPanel.editWordButton.text=Edit Keyword
SolrConnectionCheck.Port=Invalid port number. SolrConnectionCheck.Port=Invalid port number.
SolrIndexedText.FileText=File Text
SolrIndexedText.ResultText=Result Text
SolrSearch.checkingForLatestIndex.msg=Looking for text index with latest Solr and schema version SolrSearch.checkingForLatestIndex.msg=Looking for text index with latest Solr and schema version
SolrSearch.complete.msg=Text index successfully opened SolrSearch.complete.msg=Text index successfully opened
SolrSearch.creatingNewIndex.msg=Creating new text index SolrSearch.creatingNewIndex.msg=Creating new text index

View File

@ -565,7 +565,7 @@
<Component class="javax.swing.JComboBox" name="sourceComboBox"> <Component class="javax.swing.JComboBox" name="sourceComboBox">
<Properties> <Properties>
<Property name="model" type="javax.swing.ComboBoxModel" editor="org.netbeans.modules.form.RADConnectionPropertyEditor"> <Property name="model" type="javax.swing.ComboBoxModel" editor="org.netbeans.modules.form.RADConnectionPropertyEditor">
<Connection code="new javax.swing.DefaultComboBoxModel&lt;org.sleuthkit.autopsy.keywordsearch.IndexedText&gt;()" type="code"/> <Connection code="new javax.swing.DefaultComboBoxModel&lt;org.sleuthkit.autopsy.keywordsearch.ExtractedText&gt;()" type="code"/>
</Property> </Property>
<Property name="maximumSize" type="java.awt.Dimension" editor="org.netbeans.beaninfo.editors.DimensionEditor"> <Property name="maximumSize" type="java.awt.Dimension" editor="org.netbeans.beaninfo.editors.DimensionEditor">
<Dimension value="[150, 32767]"/> <Dimension value="[150, 32767]"/>
@ -579,7 +579,7 @@
</Properties> </Properties>
<AuxValues> <AuxValues>
<AuxValue name="JavaCodeGenerator_CreateCodeCustom" type="java.lang.String" value="new javax.swing.JComboBox&lt;&gt;()"/> <AuxValue name="JavaCodeGenerator_CreateCodeCustom" type="java.lang.String" value="new javax.swing.JComboBox&lt;&gt;()"/>
<AuxValue name="JavaCodeGenerator_TypeParameters" type="java.lang.String" value="&lt;org.sleuthkit.autopsy.keywordsearch.IndexedText&gt;"/> <AuxValue name="JavaCodeGenerator_TypeParameters" type="java.lang.String" value="&lt;org.sleuthkit.autopsy.keywordsearch.ExtractedText&gt;"/>
</AuxValues> </AuxValues>
</Component> </Component>
</SubComponents> </SubComponents>

View File

@ -1,7 +1,7 @@
/* /*
* Autopsy Forensic Browser * Autopsy Forensic Browser
* *
* Copyright 2011-2021 Basis Technology Corp. * Copyright 2011-2023 Basis Technology Corp.
* Contact: carrier <at> sleuthkit <dot> org * Contact: carrier <at> sleuthkit <dot> org
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
@ -396,7 +396,7 @@ class ExtractedContentPanel extends javax.swing.JPanel implements ResizableTextP
textSourcePanel.add(jLabel1); textSourcePanel.add(jLabel1);
textSourcePanel.add(fillerSmall12); textSourcePanel.add(fillerSmall12);
sourceComboBox.setModel(new javax.swing.DefaultComboBoxModel<org.sleuthkit.autopsy.keywordsearch.IndexedText>()); sourceComboBox.setModel(new javax.swing.DefaultComboBoxModel<org.sleuthkit.autopsy.keywordsearch.ExtractedText>());
sourceComboBox.setMaximumSize(new java.awt.Dimension(150, 32767)); sourceComboBox.setMaximumSize(new java.awt.Dimension(150, 32767));
sourceComboBox.setMinimumSize(new java.awt.Dimension(150, 25)); sourceComboBox.setMinimumSize(new java.awt.Dimension(150, 25));
sourceComboBox.setPreferredSize(new java.awt.Dimension(150, 25)); sourceComboBox.setPreferredSize(new java.awt.Dimension(150, 25));
@ -443,7 +443,7 @@ class ExtractedContentPanel extends javax.swing.JPanel implements ResizableTextP
private javax.swing.JLabel pagesLabel; private javax.swing.JLabel pagesLabel;
private javax.swing.JPopupMenu rightClickMenu; private javax.swing.JPopupMenu rightClickMenu;
private javax.swing.JMenuItem selectAllMenuItem; private javax.swing.JMenuItem selectAllMenuItem;
private javax.swing.JComboBox<org.sleuthkit.autopsy.keywordsearch.IndexedText> sourceComboBox; private javax.swing.JComboBox<org.sleuthkit.autopsy.keywordsearch.ExtractedText> sourceComboBox;
private javax.swing.JPanel textSourcePanel; private javax.swing.JPanel textSourcePanel;
private javax.swing.JPanel zoomPanel; private javax.swing.JPanel zoomPanel;
// End of variables declaration//GEN-END:variables // End of variables declaration//GEN-END:variables
@ -457,10 +457,10 @@ class ExtractedContentPanel extends javax.swing.JPanel implements ResizableTextP
* default) * default)
* *
* @param contentName The name of the content to be displayed * @param contentName The name of the content to be displayed
* @param sources A list of IndexedText that have different 'views' of * @param sources A list of ExtractedText that have different 'views' of
* the content. the content.
*/ */
final void setSources(String contentName, List<IndexedText> sources) { final void setSources(String contentName, List<ExtractedText> sources) {
this.lastKnownAnchor = null; this.lastKnownAnchor = null;
this.contentName = contentName; this.contentName = contentName;
setPanelText(null, false); setPanelText(null, false);
@ -480,8 +480,8 @@ class ExtractedContentPanel extends javax.swing.JPanel implements ResizableTextP
* *
* @return currently selected Source * @return currently selected Source
*/ */
public IndexedText getSelectedSource() { public ExtractedText getSelectedSource() {
return (IndexedText) sourceComboBox.getSelectedItem(); return (ExtractedText) sourceComboBox.getSelectedItem();
} }
private void setPanelText(String text, boolean detectDirection) { private void setPanelText(String text, boolean detectDirection) {
@ -556,7 +556,11 @@ class ExtractedContentPanel extends javax.swing.JPanel implements ResizableTextP
* @param total total number of pages to update the display with * @param total total number of pages to update the display with
*/ */
void updateTotalPagesDisplay(int total) { void updateTotalPagesDisplay(int total) {
pageTotalLabel.setText(Integer.toString(total)); if (total >= 0) {
pageTotalLabel.setText(Integer.toString(total));
} else {
pageTotalLabel.setText("-");
}
} }
/** /**
@ -632,7 +636,7 @@ class ExtractedContentPanel extends javax.swing.JPanel implements ResizableTextP
* *
* @param source the selected source * @param source the selected source
*/ */
void updateControls(IndexedText source) { void updateControls(ExtractedText source) {
updatePageControls(source); updatePageControls(source);
updateSearchControls(source); updateSearchControls(source);
} }
@ -642,7 +646,7 @@ class ExtractedContentPanel extends javax.swing.JPanel implements ResizableTextP
* *
* @param source selected source * @param source selected source
*/ */
void updatePageControls(IndexedText source) { void updatePageControls(ExtractedText source) {
if (source == null) { if (source == null) {
enableNextPageControl(false); enableNextPageControl(false);
enablePrevPageControl(false); enablePrevPageControl(false);
@ -655,13 +659,8 @@ class ExtractedContentPanel extends javax.swing.JPanel implements ResizableTextP
int totalPages = source.getNumberPages(); int totalPages = source.getNumberPages();
updateTotalPagesDisplay(totalPages); updateTotalPagesDisplay(totalPages);
if (totalPages < 2) { enableNextPageControl(source.hasNextPage());
enableNextPageControl(false); enablePrevPageControl(source.hasPreviousPage());
enablePrevPageControl(false);
} else {
enableNextPageControl(source.hasNextPage());
enablePrevPageControl(source.hasPreviousPage());
}
} }
/** /**
@ -669,7 +668,7 @@ class ExtractedContentPanel extends javax.swing.JPanel implements ResizableTextP
* *
* @param source selected source * @param source selected source
*/ */
void updateSearchControls(IndexedText source) { void updateSearchControls(ExtractedText source) {
//setup search controls //setup search controls
if (source != null && source.isSearchable()) { if (source != null && source.isSearchable()) {
updateCurrentMatchDisplay(source.currentItem()); updateCurrentMatchDisplay(source.currentItem());
@ -689,7 +688,7 @@ class ExtractedContentPanel extends javax.swing.JPanel implements ResizableTextP
* *
* @param source * @param source
*/ */
private void scrollToCurrentHit(final IndexedText source) { private void scrollToCurrentHit(final ExtractedText source) {
if (source == null || !source.isSearchable()) { if (source == null || !source.isSearchable()) {
return; return;
} }
@ -705,7 +704,7 @@ class ExtractedContentPanel extends javax.swing.JPanel implements ResizableTextP
* be invoked from GUI thread only. * be invoked from GUI thread only.
*/ */
@NbBundle.Messages("ExtractedContentPanel.setMarkup.panelTxt=<span style='font-style:italic'>Loading text... Please wait</span>") @NbBundle.Messages("ExtractedContentPanel.setMarkup.panelTxt=<span style='font-style:italic'>Loading text... Please wait</span>")
private void setMarkup(IndexedText source) { private void setMarkup(ExtractedText source) {
setPanelText(Bundle.ExtractedContentPanel_setMarkup_panelTxt(), false); setPanelText(Bundle.ExtractedContentPanel_setMarkup_panelTxt(), false);
new SetMarkupWorker(contentName, source).execute(); new SetMarkupWorker(contentName, source).execute();
} }
@ -719,11 +718,11 @@ class ExtractedContentPanel extends javax.swing.JPanel implements ResizableTextP
private final String contentName; private final String contentName;
private final IndexedText source; private final ExtractedText source;
private ProgressHandle progress; private ProgressHandle progress;
SetMarkupWorker(String contentName, IndexedText source) { SetMarkupWorker(String contentName, ExtractedText source) {
this.contentName = contentName; this.contentName = contentName;
this.source = source; this.source = source;
} }
@ -754,7 +753,7 @@ class ExtractedContentPanel extends javax.swing.JPanel implements ResizableTextP
} }
} catch (InterruptedException | CancellationException | ExecutionException ex) { } catch (InterruptedException | CancellationException | ExecutionException ex) {
logger.log(Level.SEVERE, "Error getting marked up text", ex); //NON-NLS logger.log(Level.SEVERE, "Error getting marked up text", ex); //NON-NLS
setPanelText(Bundle.IndexedText_errorMessage_errorGettingText(), true); setPanelText(Bundle.ExtractedText_errorMessage_errorGettingText(), true);
} }
updateControls(source); updateControls(source);

View File

@ -1,7 +1,7 @@
/* /*
* Autopsy Forensic Browser * Autopsy Forensic Browser
* *
* Copyright 2011-2018 Basis Technology Corp. * Copyright 2011-2023 Basis Technology Corp.
* Contact: carrier <at> sleuthkit <dot> org * Contact: carrier <at> sleuthkit <dot> org
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
@ -24,14 +24,15 @@ import org.openide.util.NbBundle;
* Interface to provide HTML text to display in ExtractedContentViewer. There is * Interface to provide HTML text to display in ExtractedContentViewer. There is
* a SOLR implementation of this that interfaces with SOLR to highlight the * a SOLR implementation of this that interfaces with SOLR to highlight the
* keyword hits and a version that does not do markup so that you can simply * keyword hits and a version that does not do markup so that you can simply
* view the stored text. * view the stored text. There is also an implementation that extracts text from
* a file using one os TextExtractors.
*/ */
@NbBundle.Messages({ @NbBundle.Messages({
"IndexedText.errorMessage.errorGettingText=<span style='font-style:italic'>Error retrieving indexed text.</span>", "ExtractedText.errorMessage.errorGettingText=<span style='font-style:italic'>Error retrieving text.</span>",
"IndexedText.warningMessage.knownFile=<span style='font-style:italic'>This file is a known file (based on MD5 hash) and does not have indexed text.</span>", "ExtractedText.warningMessage.knownFile=<span style='font-style:italic'>This file is a known file (based on MD5 hash) and does not have indexed text.</span>",
"IndexedText.warningMessage.noTextAvailable=<span style='font-style:italic'>No indexed text for this file.</span>" "ExtractedText.warningMessage.noTextAvailable=<span style='font-style:italic'>No text available for this file.</span>"
}) })
interface IndexedText { interface ExtractedText {
/** /**
* @return text optionally marked up with the subset of HTML that Swing * @return text optionally marked up with the subset of HTML that Swing

View File

@ -1,7 +1,7 @@
/* /*
* Autopsy Forensic Browser * Autopsy Forensic Browser
* *
* Copyright 2011-2019 Basis Technology Corp. * Copyright 2011-2023 Basis Technology Corp.
* Contact: carrier <at> sleuthkit <dot> org * Contact: carrier <at> sleuthkit <dot> org
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
@ -22,10 +22,15 @@ import java.awt.Component;
import java.awt.Cursor; import java.awt.Cursor;
import java.awt.event.ActionEvent; import java.awt.event.ActionEvent;
import java.awt.event.ActionListener; import java.awt.event.ActionListener;
import java.beans.PropertyChangeEvent;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.EnumSet;
import java.util.LinkedHashMap;
import java.util.List; import java.util.List;
import java.util.Map;
import java.util.logging.Level; import java.util.logging.Level;
import org.apache.tika.mime.MimeTypes;
import org.openide.nodes.Node; import org.openide.nodes.Node;
import org.openide.util.Lookup; import org.openide.util.Lookup;
import org.openide.util.NbBundle; import org.openide.util.NbBundle;
@ -35,6 +40,9 @@ import org.sleuthkit.autopsy.casemodule.NoCurrentCaseException;
import org.sleuthkit.autopsy.corecomponentinterfaces.TextViewer; import org.sleuthkit.autopsy.corecomponentinterfaces.TextViewer;
import org.sleuthkit.autopsy.coreutils.Logger; import org.sleuthkit.autopsy.coreutils.Logger;
import org.sleuthkit.autopsy.keywordsearch.AdHocSearchChildFactory.AdHocQueryResult; import org.sleuthkit.autopsy.keywordsearch.AdHocSearchChildFactory.AdHocQueryResult;
import org.sleuthkit.autopsy.modules.filetypeid.FileTypeDetector;
import org.sleuthkit.autopsy.textextractors.TextExtractor;
import org.sleuthkit.autopsy.textextractors.TextExtractorFactory;
import org.sleuthkit.datamodel.AbstractFile; import org.sleuthkit.datamodel.AbstractFile;
import org.sleuthkit.datamodel.Account; import org.sleuthkit.datamodel.Account;
import org.sleuthkit.datamodel.BlackboardArtifact; import org.sleuthkit.datamodel.BlackboardArtifact;
@ -45,6 +53,7 @@ import static org.sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE.TSK_ASS
import org.sleuthkit.datamodel.Content; import org.sleuthkit.datamodel.Content;
import org.sleuthkit.datamodel.Report; import org.sleuthkit.datamodel.Report;
import org.sleuthkit.datamodel.TskCoreException; import org.sleuthkit.datamodel.TskCoreException;
import org.sleuthkit.datamodel.TskData;
/** /**
* A text viewer that displays the indexed text associated with a file or an * A text viewer that displays the indexed text associated with a file or an
@ -60,15 +69,30 @@ public class ExtractedTextViewer implements TextViewer {
private ExtractedContentPanel panel; private ExtractedContentPanel panel;
private volatile Node currentNode = null; private volatile Node currentNode = null;
private IndexedText currentSource = null; private ExtractedText currentSource = null;
private FileTypeDetector fileTypeDetector = null;
// cache of last 10 solrHasFullyIndexedContent() requests sent to Solr.
private SolrIsFullyIndexedCache solrCache = null;
/** /**
* Constructs a text viewer that displays the indexed text associated with a * Constructs a text viewer that displays the indexed text associated with a
* file or an artifact, possibly marked up with HTML to highlight keyword * file or an artifact, possibly marked up with HTML to highlight keyword
* hits. * hits. If text for the Content has not been fully indexed by Solr then
* attempt to extract text using one of text extractors.
*/ */
public ExtractedTextViewer() { public ExtractedTextViewer() {
// This constructor is intentionally empty. try {
fileTypeDetector = new FileTypeDetector();
} catch (FileTypeDetector.FileTypeDetectorInitException ex) {
logger.log(Level.SEVERE, "Failed to initialize FileTypeDetector", ex); //NON-NLS
}
solrCache = new SolrIsFullyIndexedCache();
// clear the cache when case opens or closes
Case.addEventTypeSubscriber(EnumSet.of(Case.Events.CURRENT_CASE), (PropertyChangeEvent evt) -> {
solrCache.clearCache();
});
} }
/** /**
@ -99,7 +123,7 @@ public class ExtractedTextViewer implements TextViewer {
* Assemble a collection of all of the indexed text "sources" for the * Assemble a collection of all of the indexed text "sources" for the
* node. * node.
*/ */
List<IndexedText> sources = new ArrayList<>(); List<ExtractedText> sources = new ArrayList<>();
Lookup nodeLookup = node.getLookup(); Lookup nodeLookup = node.getLookup();
/** /**
@ -115,7 +139,7 @@ public class ExtractedTextViewer implements TextViewer {
* First, get text with highlighted hits if this node is for a search * First, get text with highlighted hits if this node is for a search
* result. * result.
*/ */
IndexedText highlightedHitText = null; ExtractedText highlightedHitText = null;
if (adHocQueryResult != null) { if (adHocQueryResult != null) {
/* /*
* The node is an ad hoc search result node. * The node is an ad hoc search result node.
@ -153,10 +177,25 @@ public class ExtractedTextViewer implements TextViewer {
* Next, add the "raw" (not highlighted) text, if any, for any file * Next, add the "raw" (not highlighted) text, if any, for any file
* associated with the node. * associated with the node.
*/ */
IndexedText rawContentText = null; ExtractedText rawContentText = null;
if (file != null) { if (file != null) {
rawContentText = new RawText(file, file.getId());
sources.add(rawContentText); // see if Solr has fully indexed this file
if (solrHasFullyIndexedContent(file.getId())) {
rawContentText = new SolrIndexedText(file, file.getId());
sources.add(rawContentText);
} else {
// Solr does not have fully indexed content.
// see if it's a file type for which we can extract text
if (ableToExtractTextFromFile(file)) {
try {
rawContentText = new FileReaderExtractedText(file);
sources.add(rawContentText);
} catch (TextExtractorFactory.NoTextExtractorFound | TextExtractor.InitReaderException ex) {
// do nothing
}
}
}
} }
/* /*
@ -164,15 +203,18 @@ public class ExtractedTextViewer implements TextViewer {
* associated with the node. * associated with the node.
*/ */
if (report != null) { if (report != null) {
rawContentText = new RawText(report, report.getId()); // see if Solr has fully indexed this file
sources.add(rawContentText); if (solrHasFullyIndexedContent(report.getId())) {
rawContentText = new SolrIndexedText(report, report.getId());
sources.add(rawContentText);
}
} }
/* /*
* Finally, add the "raw" (not highlighted) text, if any, for any * Finally, add the "raw" (not highlighted) text, if any, for any
* artifact associated with the node. * artifact associated with the node.
*/ */
IndexedText rawArtifactText = null; ExtractedText rawArtifactText = null;
try { try {
rawArtifactText = getRawArtifactText(artifact); rawArtifactText = getRawArtifactText(artifact);
if (rawArtifactText != null) { if (rawArtifactText != null) {
@ -192,7 +234,7 @@ public class ExtractedTextViewer implements TextViewer {
} }
// Push the text sources into the panel. // Push the text sources into the panel.
for (IndexedText source : sources) { for (ExtractedText source : sources) {
int currentPage = source.getCurrentPage(); int currentPage = source.getCurrentPage();
if (currentPage == 0 && source.hasNextPage()) { if (currentPage == 0 && source.hasNextPage()) {
source.nextPage(); source.nextPage();
@ -208,8 +250,8 @@ public class ExtractedTextViewer implements TextViewer {
} }
static private IndexedText getRawArtifactText(BlackboardArtifact artifact) throws TskCoreException, NoCurrentCaseException { private ExtractedText getRawArtifactText(BlackboardArtifact artifact) throws TskCoreException, NoCurrentCaseException {
IndexedText rawArtifactText = null; ExtractedText rawArtifactText = null;
if (null != artifact) { if (null != artifact) {
/* /*
* For keyword hit artifacts, add the text of the artifact that hit, * For keyword hit artifacts, add the text of the artifact that hit,
@ -222,18 +264,21 @@ public class ExtractedTextViewer implements TextViewer {
if (attribute != null) { if (attribute != null) {
long artifactId = attribute.getValueLong(); long artifactId = attribute.getValueLong();
BlackboardArtifact associatedArtifact = Case.getCurrentCaseThrows().getSleuthkitCase().getBlackboardArtifact(artifactId); BlackboardArtifact associatedArtifact = Case.getCurrentCaseThrows().getSleuthkitCase().getBlackboardArtifact(artifactId);
rawArtifactText = new RawText(associatedArtifact, associatedArtifact.getArtifactID()); if (solrHasFullyIndexedContent(associatedArtifact.getArtifactID())) {
rawArtifactText = new SolrIndexedText(associatedArtifact, associatedArtifact.getArtifactID());
}
} }
} else { } else {
rawArtifactText = new RawText(artifact, artifact.getArtifactID()); if (solrHasFullyIndexedContent(artifact.getArtifactID())) {
rawArtifactText = new SolrIndexedText(artifact, artifact.getArtifactID());
}
} }
} }
return rawArtifactText; return rawArtifactText;
} }
static private IndexedText getAccountsText(Content content, Lookup nodeLookup) throws TskCoreException { static private ExtractedText getAccountsText(Content content, Lookup nodeLookup) throws TskCoreException {
/* /*
* get all the credit card artifacts * get all the credit card artifacts
*/ */
@ -247,7 +292,7 @@ public class ExtractedTextViewer implements TextViewer {
} }
private void scrollToCurrentHit() { private void scrollToCurrentHit() {
final IndexedText source = panel.getSelectedSource(); final ExtractedText source = panel.getSelectedSource();
if (source == null || !source.isSearchable()) { if (source == null || !source.isSearchable()) {
return; return;
} }
@ -340,8 +385,18 @@ public class ExtractedTextViewer implements TextViewer {
* data source instead of a file. * data source instead of a file.
*/ */
AbstractFile file = node.getLookup().lookup(AbstractFile.class); AbstractFile file = node.getLookup().lookup(AbstractFile.class);
if (file != null && solrHasContent(file.getId())) { if (file != null) {
return true;
// see if Solr has fully indexed this file
if (solrHasFullyIndexedContent(file.getId())) {
return true;
}
// Solr does not have fully indexed content.
// see if it's a file type for which we can extract text
if (ableToExtractTextFromFile(file)) {
return true;
}
} }
/* /*
@ -351,7 +406,7 @@ public class ExtractedTextViewer implements TextViewer {
* indexed text for the artifact. * indexed text for the artifact.
*/ */
if (artifact != null) { if (artifact != null) {
return solrHasContent(artifact.getArtifactID()); return solrHasFullyIndexedContent(artifact.getArtifactID());
} }
/* /*
@ -361,7 +416,7 @@ public class ExtractedTextViewer implements TextViewer {
*/ */
Report report = node.getLookup().lookup(Report.class); Report report = node.getLookup().lookup(Report.class);
if (report != null) { if (report != null) {
return solrHasContent(report.getId()); return solrHasFullyIndexedContent(report.getId());
} }
/* /*
@ -381,36 +436,102 @@ public class ExtractedTextViewer implements TextViewer {
* panel hasn't been created yet) * panel hasn't been created yet)
* *
* @param contentName The name of the content to be displayed * @param contentName The name of the content to be displayed
* @param sources A list of IndexedText that have different 'views' of * @param sources A list of ExtractedText that have different 'views' of
* the content. the content.
*/ */
private void setPanel(String contentName, List<IndexedText> sources) { private void setPanel(String contentName, List<ExtractedText> sources) {
if (panel != null) { if (panel != null) {
panel.setSources(contentName, sources); panel.setSources(contentName, sources);
} }
} }
/** /**
* Check if Solr has extracted content for a given node * Check if Solr has indexed ALL of the content for a given node. Note that
* in some situations Solr only indexes parts of a file. This happens when
* an in-line KWS finds a KW hit in the file - only the chunks with the KW
* hit (+/- 1 chunk) get indexed by Solr. That is not enough for the
* purposes of this text viewer as we need to display all of the text in the
* file.
* *
* @param objectId * @param objectId
* *
* @return true if Solr has content, else false * @return true if Solr has content, else false
*/ */
private boolean solrHasContent(Long objectId) { private boolean solrHasFullyIndexedContent(Long objectId) {
// check if we have cached this decision
if (solrCache.containsKey(objectId)) {
return solrCache.getCombination(objectId);
}
final Server solrServer = KeywordSearch.getServer(); final Server solrServer = KeywordSearch.getServer();
if (solrServer.coreIsOpen() == false) { if (solrServer.coreIsOpen() == false) {
solrCache.putCombination(objectId, false);
return false; return false;
} }
// verify that all of the chunks in the file have been indexed.
try { try {
return solrServer.queryIsIndexed(objectId); boolean isFullyIndexed = solrServer.queryIsFullyIndexed(objectId);
solrCache.putCombination(objectId, isFullyIndexed);
return isFullyIndexed;
} catch (NoOpenCoreException | KeywordSearchModuleException ex) { } catch (NoOpenCoreException | KeywordSearchModuleException ex) {
logger.log(Level.SEVERE, "Error querying Solr server", ex); //NON-NLS logger.log(Level.SEVERE, "Error querying Solr server", ex); //NON-NLS
solrCache.putCombination(objectId, false);
return false; return false;
} }
} }
/**
* Check if we can extract text for this file type using one of our text extractors.
* NOTE: the logic in this method should be similar and based on the
* logic of how KeywordSearchIngestModule decides which files to index.
*
* @param file Abstract File
*
* @return true if text can be extracted from file, else false
*/
private boolean ableToExtractTextFromFile(AbstractFile file) {
TskData.TSK_DB_FILES_TYPE_ENUM fileType = file.getType();
if (fileType.equals(TskData.TSK_DB_FILES_TYPE_ENUM.VIRTUAL_DIR)) {
return false;
}
if ((fileType.equals(TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS)
|| fileType.equals(TskData.TSK_DB_FILES_TYPE_ENUM.UNUSED_BLOCKS))
|| (fileType.equals(TskData.TSK_DB_FILES_TYPE_ENUM.CARVED))) {
return false;
}
final long size = file.getSize();
if (file.isDir() || size == 0) {
return false;
}
String mimeType = fileTypeDetector.getMIMEType(file).trim().toLowerCase();
if (KeywordSearchIngestModule.ARCHIVE_MIME_TYPES.contains(mimeType)) {
return false;
}
if (MimeTypes.OCTET_STREAM.equals(mimeType)) {
return false;
}
// Often times there is an exception when trying to initiale a reader,
// thus making that specific file "unsupported". The only way to identify
// this situation is to initialize the reader.
try {
FileReaderExtractedText tmp = new FileReaderExtractedText(file);
} catch (TextExtractorFactory.NoTextExtractorFound | TextExtractor.InitReaderException ex) {
return false;
}
return true;
}
/** /**
* Listener to select the next match found in the text * Listener to select the next match found in the text
*/ */
@ -418,7 +539,7 @@ public class ExtractedTextViewer implements TextViewer {
@Override @Override
public void actionPerformed(ActionEvent e) { public void actionPerformed(ActionEvent e) {
IndexedText source = panel.getSelectedSource(); ExtractedText source = panel.getSelectedSource();
if (source == null) { if (source == null) {
// reset // reset
panel.updateControls(null); panel.updateControls(null);
@ -461,7 +582,7 @@ public class ExtractedTextViewer implements TextViewer {
@Override @Override
public void actionPerformed(ActionEvent e) { public void actionPerformed(ActionEvent e) {
IndexedText source = panel.getSelectedSource(); ExtractedText source = panel.getSelectedSource();
final boolean hasPreviousItem = source.hasPreviousItem(); final boolean hasPreviousItem = source.hasPreviousItem();
final boolean hasPreviousPage = source.hasPreviousPage(); final boolean hasPreviousPage = source.hasPreviousPage();
int indexVal; int indexVal;
@ -598,4 +719,39 @@ public class ExtractedTextViewer implements TextViewer {
previousPage(); previousPage();
} }
} }
/**
* This class maintains a cache of last 10 solrHasFullyIndexedContent()
* requests sent to Solr.
*/
private class SolrIsFullyIndexedCache {
private static final int CACHE_SIZE = 10;
private final LinkedHashMap<Long, Boolean> cache;
private SolrIsFullyIndexedCache() {
this.cache = new LinkedHashMap<Long, Boolean>(CACHE_SIZE, 0.75f, true) {
@Override
protected boolean removeEldestEntry(Map.Entry<Long, Boolean> eldest) {
return size() > CACHE_SIZE;
}
};
}
public void putCombination(long key, boolean value) {
cache.put(key, value);
}
public Boolean getCombination(long key) {
return cache.get(key);
}
public void clearCache() {
cache.clear();
}
public boolean containsKey(long key) {
return cache.containsKey(key);
}
}
} }

View File

@ -0,0 +1,234 @@
/*
* Autopsy Forensic Browser
*
* Copyright 2023 Basis Technology Corp.
* Contact: carrier <at> sleuthkit <dot> org
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.sleuthkit.autopsy.keywordsearch;
import com.google.common.io.CharSource;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.util.HashMap;
import java.util.Map;
import java.util.logging.Level;
import org.openide.util.NbBundle;
import org.sleuthkit.autopsy.coreutils.EscapeUtil;
import org.sleuthkit.autopsy.coreutils.Logger;
import org.sleuthkit.autopsy.textextractors.TextExtractor;
import org.sleuthkit.autopsy.textextractors.TextExtractorFactory;
import org.sleuthkit.datamodel.AbstractFile;
/**
* A "source" for abstractFile viewer that displays "raw" extracted text for a
* file. Only supports file types for which there are text extractors. Uses
* chunking algorithm used by KeywordSearchIngestModule. The readers used in
* chunking don't have ability to go backwards or to fast forward to a specific
* offset. Therefore there is no way to scroll pages back, or to determine how
* many total pages there are.
*/
class FileReaderExtractedText implements ExtractedText {
private int numPages = 0;
private int currentPage = 0;
private final AbstractFile abstractFile;
private Chunker chunker = null;
private static final Logger logger = Logger.getLogger(FileReaderExtractedText.class.getName());
/**
* Construct a new ExtractedText object for the given abstract file.
*
* @param file Abstract file.
*/
FileReaderExtractedText(AbstractFile file) throws TextExtractorFactory.NoTextExtractorFound, TextExtractor.InitReaderException {
this.abstractFile = file;
this.numPages = -1; // We don't know how many pages there are until we reach end of the document
TextExtractor extractor = TextExtractorFactory.getExtractor(abstractFile, null);
Map<String, String> extractedMetadata = new HashMap<>();
Reader sourceReader = getTikaOrTextExtractor(extractor, abstractFile, extractedMetadata);
//Get a reader for the content of the given source
BufferedReader reader = new BufferedReader(sourceReader);
this.chunker = new Chunker(reader);
}
@Override
public int getCurrentPage() {
return this.currentPage;
}
@Override
public boolean hasNextPage() {
if (chunker.hasNext()) {
return true;
}
return false;
}
@Override
public boolean hasPreviousPage() {
return false;
}
@Override
public int nextPage() {
if (!hasNextPage()) {
throw new IllegalStateException(
NbBundle.getMessage(this.getClass(), "ExtractedContentViewer.nextPage.exception.msg"));
}
++currentPage;
return currentPage;
}
@Override
public int previousPage() {
if (!hasPreviousPage()) {
throw new IllegalStateException(
NbBundle.getMessage(this.getClass(), "ExtractedContentViewer.previousPage.exception.msg"));
}
--currentPage;
return currentPage;
}
@Override
public boolean hasNextItem() {
throw new UnsupportedOperationException(
NbBundle.getMessage(this.getClass(), "ExtractedContentViewer.hasNextItem.exception.msg"));
}
@Override
public boolean hasPreviousItem() {
throw new UnsupportedOperationException(
NbBundle.getMessage(this.getClass(), "ExtractedContentViewer.hasPreviousItem.exception.msg"));
}
@Override
public int nextItem() {
throw new UnsupportedOperationException(
NbBundle.getMessage(this.getClass(), "ExtractedContentViewer.nextItem.exception.msg"));
}
@Override
public int previousItem() {
throw new UnsupportedOperationException(
NbBundle.getMessage(this.getClass(), "ExtractedContentViewer.previousItem.exception.msg"));
}
@Override
public int currentItem() {
throw new UnsupportedOperationException(
NbBundle.getMessage(this.getClass(), "ExtractedContentViewer.currentItem.exception.msg"));
}
@Override
public String getText() {
try {
return getContentText(currentPage);
} catch (Exception ex) {
logger.log(Level.SEVERE, "Couldn't get extracted text", ex); //NON-NLS
}
return Bundle.ExtractedText_errorMessage_errorGettingText();
}
@NbBundle.Messages({
"ExtractedText.FileText=File Text"})
@Override
public String toString() {
return Bundle.ExtractedText_FileText();
}
@Override
public boolean isSearchable() {
return false;
}
@Override
public String getAnchorPrefix() {
return "";
}
@Override
public int getNumberHits() {
return 0;
}
@Override
public int getNumberPages() {
return numPages;
}
/**
* Extract text from abstractFile
*
* @param currentPage currently used page
*
* @return the extracted text
*/
private String getContentText(int currentPage) throws TextExtractor.InitReaderException, IOException, Exception {
String indexedText;
if (chunker.hasNext()) {
Chunker.Chunk chunk = chunker.next();
chunk.setChunkId(currentPage);
if (chunker.hasException()) {
logger.log(Level.WARNING, "Error chunking content from " + abstractFile.getId() + ": " + abstractFile.getName(), chunker.getException());
throw chunker.getException();
}
indexedText = chunk.toString();
} else {
return Bundle.ExtractedText_errorMessage_errorGettingText();
}
indexedText = EscapeUtil.escapeHtml(indexedText).trim();
StringBuilder sb = new StringBuilder(indexedText.length() + 20);
sb.append("<pre>").append(indexedText).append("</pre>"); //NON-NLS
return sb.toString();
}
private Reader getTikaOrTextExtractor(TextExtractor extractor, AbstractFile aFile,
Map<String, String> extractedMetadata) throws TextExtractor.InitReaderException {
Reader fileText = extractor.getReader();
Reader finalReader;
try {
Map<String, String> metadata = extractor.getMetadata();
if (!metadata.isEmpty()) {
// save the metadata map to use after this method is complete.
extractedMetadata.putAll(metadata);
}
CharSource formattedMetadata = KeywordSearchIngestModule.getMetaDataCharSource(metadata);
//Append the metadata to end of the file text
finalReader = CharSource.concat(new CharSource() {
//Wrap fileText reader for concatenation
@Override
public Reader openStream() throws IOException {
return fileText;
}
}, formattedMetadata).openStream();
} catch (IOException ex) {
logger.log(Level.WARNING, String.format("Could not format extracted metadata for file %s [id=%d]",
aFile.getName(), aFile.getId()), ex);
//Just send file text.
finalReader = fileText;
}
//divide into chunks
return finalReader;
}
}

View File

@ -52,7 +52,7 @@ import org.sleuthkit.datamodel.TskCoreException;
* Highlights hits for a given document. Knows about pages and such for the * Highlights hits for a given document. Knows about pages and such for the
* content viewer. * content viewer.
*/ */
class HighlightedText implements IndexedText { class HighlightedText implements ExtractedText {
private static final Logger logger = Logger.getLogger(HighlightedText.class.getName()); private static final Logger logger = Logger.getLogger(HighlightedText.class.getName());
@ -475,7 +475,7 @@ class HighlightedText implements IndexedText {
return "<html><pre>" + highlightedContent + "</pre></html>"; //NON-NLS return "<html><pre>" + highlightedContent + "</pre></html>"; //NON-NLS
} catch (TskCoreException | KeywordSearchModuleException | NoOpenCoreException ex) { } catch (TskCoreException | KeywordSearchModuleException | NoOpenCoreException ex) {
logger.log(Level.SEVERE, "Error getting highlighted text for Solr doc id " + solrObjectId + ", chunkID " + chunkID + ", highlight query: " + highlightField, ex); //NON-NLS logger.log(Level.SEVERE, "Error getting highlighted text for Solr doc id " + solrObjectId + ", chunkID " + chunkID + ", highlight query: " + highlightField, ex); //NON-NLS
return Bundle.IndexedText_errorMessage_errorGettingText(); return Bundle.ExtractedText_errorMessage_errorGettingText();
} }
} }
@ -519,7 +519,7 @@ class HighlightedText implements IndexedText {
*/ */
static String attemptManualHighlighting(SolrDocumentList solrDocumentList, String highlightField, Collection<String> keywords) { static String attemptManualHighlighting(SolrDocumentList solrDocumentList, String highlightField, Collection<String> keywords) {
if (solrDocumentList.isEmpty()) { if (solrDocumentList.isEmpty()) {
return Bundle.IndexedText_errorMessage_errorGettingText(); return Bundle.ExtractedText_errorMessage_errorGettingText();
} }
// It doesn't make sense for there to be more than a single document in // It doesn't make sense for there to be more than a single document in

View File

@ -38,7 +38,6 @@ import java.util.concurrent.atomic.AtomicInteger;
import java.util.logging.Level; import java.util.logging.Level;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import org.apache.tika.mime.MimeTypes; import org.apache.tika.mime.MimeTypes;
import org.openide.util.Exceptions;
import org.openide.util.Lookup; import org.openide.util.Lookup;
import org.openide.util.NbBundle; import org.openide.util.NbBundle;
import org.openide.util.NbBundle.Messages; import org.openide.util.NbBundle.Messages;
@ -96,7 +95,7 @@ public final class KeywordSearchIngestModule implements FileIngestModule {
* generally text extractors should ignore archives and let unpacking * generally text extractors should ignore archives and let unpacking
* modules take care of them * modules take care of them
*/ */
private static final List<String> ARCHIVE_MIME_TYPES static final List<String> ARCHIVE_MIME_TYPES
= ImmutableList.of( = ImmutableList.of(
//ignore unstructured binary and compressed data, for which string extraction or unzipper works better //ignore unstructured binary and compressed data, for which string extraction or unzipper works better
"application/x-7z-compressed", //NON-NLS "application/x-7z-compressed", //NON-NLS
@ -683,7 +682,7 @@ public final class KeywordSearchIngestModule implements FileIngestModule {
@NbBundle.Messages({ @NbBundle.Messages({
"KeywordSearchIngestModule.metadataTitle=METADATA" "KeywordSearchIngestModule.metadataTitle=METADATA"
}) })
private CharSource getMetaDataCharSource(Map<String, String> metadata) { static CharSource getMetaDataCharSource(Map<String, String> metadata) {
return CharSource.wrap(new StringBuilder( return CharSource.wrap(new StringBuilder(
String.format("\n\n------------------------------%s------------------------------\n\n", String.format("\n\n------------------------------%s------------------------------\n\n",
Bundle.KeywordSearchIngestModule_metadataTitle())) Bundle.KeywordSearchIngestModule_metadataTitle()))

View File

@ -1635,23 +1635,29 @@ public class Server {
} }
/** /**
* Return true if the file is indexed (either as a whole as a chunk) * Return true if the file is fully indexed (no chunks are missing)
* *
* @param contentID * @param contentID
* *
* @return true if it is indexed * @return true if it is fully indexed
* *
* @throws KeywordSearchModuleException * @throws KeywordSearchModuleException
* @throws NoOpenCoreException * @throws NoOpenCoreException
*/ */
public boolean queryIsIndexed(long contentID) throws KeywordSearchModuleException, NoOpenCoreException { public boolean queryIsFullyIndexed(long contentID) throws KeywordSearchModuleException, NoOpenCoreException {
currentCoreLock.readLock().lock(); currentCoreLock.readLock().lock();
try { try {
if (null == currentCollection) { if (null == currentCollection) {
throw new NoOpenCoreException(); throw new NoOpenCoreException();
} }
try { try {
return currentCollection.queryIsIndexed(contentID); int totalNumChunks = currentCollection.queryTotalNumFileChunks(contentID);
if (totalNumChunks == 0) {
return false;
}
int numIndexedChunks = currentCollection.queryNumIndexedChunks(contentID);
return numIndexedChunks == totalNumChunks;
} catch (Exception ex) { } catch (Exception ex) {
// intentional "catch all" as Solr is known to throw all kinds of Runtime exceptions // intentional "catch all" as Solr is known to throw all kinds of Runtime exceptions
throw new KeywordSearchModuleException(NbBundle.getMessage(this.getClass(), "Server.queryIsIdxd.exception.msg"), ex); throw new KeywordSearchModuleException(NbBundle.getMessage(this.getClass(), "Server.queryIsIdxd.exception.msg"), ex);
@ -1680,7 +1686,7 @@ public class Server {
throw new NoOpenCoreException(); throw new NoOpenCoreException();
} }
try { try {
return currentCollection.queryNumFileChunks(fileID); return currentCollection.queryTotalNumFileChunks(fileID);
} catch (Exception ex) { } catch (Exception ex) {
// intentional "catch all" as Solr is known to throw all kinds of Runtime exceptions // intentional "catch all" as Solr is known to throw all kinds of Runtime exceptions
throw new KeywordSearchModuleException(NbBundle.getMessage(this.getClass(), "Server.queryNumFileChunks.exception.msg"), ex); throw new KeywordSearchModuleException(NbBundle.getMessage(this.getClass(), "Server.queryNumFileChunks.exception.msg"), ex);
@ -2484,7 +2490,7 @@ public class Server {
} }
/** /**
* Return true if the file is indexed (either as a whole as a chunk) * Return true if the file is indexed (either as a whole or as a chunk)
* *
* @param contentID * @param contentID
* *
@ -2502,17 +2508,20 @@ public class Server {
} }
/** /**
* Execute query that gets number of indexed file chunks for a file * Execute query that gets total number of file chunks for a file. NOTE:
* this does not imply that all of the chunks have been indexed. This
* parameter simply stores the total number of chunks that the file had
* (as determined during chunking).
* *
* @param contentID file id of the original file broken into chunks and * @param contentID file id of the original file broken into chunks and
* indexed * indexed
* *
* @return int representing number of indexed file chunks, 0 if there is * @return int representing number of file chunks, 0 if there is no
* no chunks * chunks
* *
* @throws SolrServerException * @throws SolrServerException
*/ */
private int queryNumFileChunks(long contentID) throws SolrServerException, IOException { private int queryTotalNumFileChunks(long contentID) throws SolrServerException, IOException {
final SolrQuery q = new SolrQuery(); final SolrQuery q = new SolrQuery();
q.setQuery("*:*"); q.setQuery("*:*");
String filterQuery = Schema.ID.toString() + ":" + KeywordSearchUtil.escapeLuceneQuery(Long.toString(contentID)); String filterQuery = Schema.ID.toString() + ":" + KeywordSearchUtil.escapeLuceneQuery(Long.toString(contentID));
@ -2522,7 +2531,7 @@ public class Server {
SolrDocumentList solrDocuments = query(q).getResults(); SolrDocumentList solrDocuments = query(q).getResults();
if (!solrDocuments.isEmpty()) { if (!solrDocuments.isEmpty()) {
SolrDocument solrDocument = solrDocuments.get(0); SolrDocument solrDocument = solrDocuments.get(0);
if (solrDocument != null) { if (solrDocument != null && !solrDocument.isEmpty()) {
Object fieldValue = solrDocument.getFieldValue(Schema.NUM_CHUNKS.toString()); Object fieldValue = solrDocument.getFieldValue(Schema.NUM_CHUNKS.toString());
return (Integer)fieldValue; return (Integer)fieldValue;
} }
@ -2532,11 +2541,27 @@ public class Server {
logger.log(Level.SEVERE, "Error getting content from Solr. Solr document id " + contentID + ", query: " + filterQuery, ex); //NON-NLS logger.log(Level.SEVERE, "Error getting content from Solr. Solr document id " + contentID + ", query: " + filterQuery, ex); //NON-NLS
return 0; return 0;
} }
// File not indexed
// ERROR: we should never get here
logger.log(Level.SEVERE, "Error getting content from Solr. Solr document id " + contentID + ", query: " + filterQuery); //NON-NLS
return 0; return 0;
} }
/**
* Execute query that gets number of indexed chunks for a specific Solr
* document, without actually returning the content.
*
* @param contentID file id of the original file broken into chunks and
* indexed
*
* @return int representing number of indexed chunks
*
* @throws SolrServerException
*/
int queryNumIndexedChunks(long contentID) throws SolrServerException, IOException {
SolrQuery q = new SolrQuery(Server.Schema.ID + ":" + KeywordSearchUtil.escapeLuceneQuery(Long.toString(contentID)) + Server.CHUNK_ID_SEPARATOR + "*");
q.setRows(0);
int numChunks = (int) query(q).getResults().getNumFound();
return numChunks;
}
} }
class ServerAction extends AbstractAction { class ServerAction extends AbstractAction {

View File

@ -1,7 +1,7 @@
/* /*
* Autopsy Forensic Browser * Autopsy Forensic Browser
* *
* Copyright 2011-2018 Basis Technology Corp. * Copyright 2011-2023 Basis Technology Corp.
* Contact: carrier <at> sleuthkit <dot> org * Contact: carrier <at> sleuthkit <dot> org
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
@ -30,9 +30,9 @@ import org.sleuthkit.datamodel.TskData;
/** /**
* A "source" for the extracted content viewer that displays "raw" (not * A "source" for the extracted content viewer that displays "raw" (not
* highlighted) indexed text for a file or an artifact. * highlighted) Solr indexed text for a file or an artifact.
*/ */
class RawText implements IndexedText { class SolrIndexedText implements ExtractedText {
private int numPages = 0; private int numPages = 0;
private int currentPage = 0; private int currentPage = 0;
@ -40,15 +40,12 @@ class RawText implements IndexedText {
private final Content content; private final Content content;
private final BlackboardArtifact blackboardArtifact; private final BlackboardArtifact blackboardArtifact;
private final long objectId; private final long objectId;
//keep last content cached private static final Logger logger = Logger.getLogger(SolrIndexedText.class.getName());
private String cachedString;
private int cachedChunk;
private static final Logger logger = Logger.getLogger(RawText.class.getName());
/** /**
* Construct a new RawText object for the given content and object id. This * Construct a new SolrIndexedText object for the given content and object id. This
* constructor needs both a content object and an object id because the * constructor needs both a content object and an object id because the
* RawText implementation attempts to provide useful messages in the text * SolrIndexedText implementation attempts to provide useful messages in the text
* content viewer for (a) the case where a file has not been indexed because * content viewer for (a) the case where a file has not been indexed because
* known files are being skipped and (b) the case where the file content has * known files are being skipped and (b) the case where the file content has
* not yet been indexed. * not yet been indexed.
@ -56,14 +53,14 @@ class RawText implements IndexedText {
* @param content Used to get access to file names and "known" status. * @param content Used to get access to file names and "known" status.
* @param objectId Either a file id or an artifact id. * @param objectId Either a file id or an artifact id.
*/ */
RawText(Content content, long objectId) { SolrIndexedText(Content content, long objectId) {
this.content = content; this.content = content;
this.blackboardArtifact = null; this.blackboardArtifact = null;
this.objectId = objectId; this.objectId = objectId;
initialize(); initialize();
} }
RawText(BlackboardArtifact bba, long objectId) { SolrIndexedText(BlackboardArtifact bba, long objectId) {
this.content = null; this.content = null;
this.blackboardArtifact = bba; this.blackboardArtifact = bba;
this.objectId = objectId; this.objectId = objectId;
@ -155,18 +152,18 @@ class RawText implements IndexedText {
} catch (SolrServerException | NoOpenCoreException ex) { } catch (SolrServerException | NoOpenCoreException ex) {
logger.log(Level.SEVERE, "Couldn't get extracted text", ex); //NON-NLS logger.log(Level.SEVERE, "Couldn't get extracted text", ex); //NON-NLS
} }
return Bundle.IndexedText_errorMessage_errorGettingText(); return Bundle.ExtractedText_errorMessage_errorGettingText();
} }
@NbBundle.Messages({ @NbBundle.Messages({
"RawText.FileText=File Text", "SolrIndexedText.FileText=File Text",
"RawText.ResultText=Result Text"}) "SolrIndexedText.ResultText=Result Text"})
@Override @Override
public String toString() { public String toString() {
if (null != content) { if (null != content) {
return Bundle.RawText_FileText(); return Bundle.SolrIndexedText_FileText();
} else { } else {
return Bundle.RawText_ResultText(); return Bundle.SolrIndexedText_ResultText();
} }
} }
@ -239,43 +236,32 @@ class RawText implements IndexedText {
//we know it's AbstractFile, but do quick check to make sure if we index other objects in future //we know it's AbstractFile, but do quick check to make sure if we index other objects in future
boolean isKnown = TskData.FileKnown.KNOWN.equals(((AbstractFile) content).getKnown()); boolean isKnown = TskData.FileKnown.KNOWN.equals(((AbstractFile) content).getKnown());
if (isKnown && KeywordSearchSettings.getSkipKnown()) { if (isKnown && KeywordSearchSettings.getSkipKnown()) {
msg = Bundle.IndexedText_warningMessage_knownFile(); msg = Bundle.ExtractedText_warningMessage_knownFile();
} }
} }
if (msg == null) { if (msg == null) {
msg = Bundle.IndexedText_warningMessage_noTextAvailable(); msg = Bundle.ExtractedText_warningMessage_noTextAvailable();
} }
return msg; return msg;
} }
int chunkId = currentPage; int chunkId = currentPage;
//check if cached
if (cachedString != null) {
if (cachedChunk == chunkId) {
return cachedString;
}
}
//not cached //not cached
String indexedText = solrServer.getSolrContent(this.objectId, chunkId); String indexedText = solrServer.getSolrContent(this.objectId, chunkId);
if (indexedText == null) { if (indexedText == null) {
if (content instanceof AbstractFile) { if (content instanceof AbstractFile) {
return Bundle.IndexedText_errorMessage_errorGettingText(); return Bundle.ExtractedText_errorMessage_errorGettingText();
} else { } else {
return Bundle.IndexedText_warningMessage_noTextAvailable(); return Bundle.ExtractedText_warningMessage_noTextAvailable();
} }
} else if (indexedText.isEmpty()) { } else if (indexedText.isEmpty()) {
return Bundle.IndexedText_warningMessage_noTextAvailable(); return Bundle.ExtractedText_warningMessage_noTextAvailable();
} }
cachedString = EscapeUtil.escapeHtml(indexedText).trim(); indexedText = EscapeUtil.escapeHtml(indexedText).trim();
StringBuilder sb = new StringBuilder(cachedString.length() + 20); StringBuilder sb = new StringBuilder(indexedText.length() + 20);
sb.append("<pre>").append(cachedString).append("</pre>"); //NON-NLS sb.append("<pre>").append(indexedText).append("</pre>"); //NON-NLS
cachedString = sb.toString(); return sb.toString();
cachedChunk = chunkId;
return cachedString;
} }
/** /**
@ -290,7 +276,7 @@ class RawText implements IndexedText {
private String getArtifactText() throws NoOpenCoreException, SolrServerException { private String getArtifactText() throws NoOpenCoreException, SolrServerException {
String indexedText = KeywordSearch.getServer().getSolrContent(this.objectId, 1); String indexedText = KeywordSearch.getServer().getSolrContent(this.objectId, 1);
if (indexedText == null || indexedText.isEmpty()) { if (indexedText == null || indexedText.isEmpty()) {
return Bundle.IndexedText_errorMessage_errorGettingText(); return Bundle.ExtractedText_errorMessage_errorGettingText();
} }
indexedText = EscapeUtil.escapeHtml(indexedText).trim(); indexedText = EscapeUtil.escapeHtml(indexedText).trim();