diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ExtractedContentPanel.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ExtractedContentPanel.java index d4e3975b97..41d384d559 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ExtractedContentPanel.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ExtractedContentPanel.java @@ -1,7 +1,7 @@ /* * Autopsy Forensic Browser * - * Copyright 2011-2021 Basis Technology Corp. + * Copyright 2011-2023 Basis Technology Corp. * Contact: carrier sleuthkit org * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -659,14 +659,8 @@ class ExtractedContentPanel extends javax.swing.JPanel implements ResizableTextP int totalPages = source.getNumberPages(); updateTotalPagesDisplay(totalPages); - // ELTODO - //if (totalPages < 2) { - // enableNextPageControl(false); - // enablePrevPageControl(false); - //} else { - enableNextPageControl(source.hasNextPage()); - enablePrevPageControl(source.hasPreviousPage()); - //} + enableNextPageControl(source.hasNextPage()); + enablePrevPageControl(source.hasPreviousPage()); } /** diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ExtractedText.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ExtractedText.java index e42f878b70..edb8641b29 100755 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ExtractedText.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ExtractedText.java @@ -32,46 +32,33 @@ import org.sleuthkit.autopsy.textextractors.TextExtractor; import org.sleuthkit.autopsy.textextractors.TextExtractorFactory; import org.sleuthkit.datamodel.AbstractFile; -/** ELTODO - * A "source" for the extracted abstractFile viewer that displays "raw" (not - * highlighted) indexed text for a file or an artifact. +/** + * A "source" for abstractFile viewer that displays "raw" extracted text for a + * file. Only supports file types for which there are text extractors. Uses + * chunking algorithm used by KeywordSearchIngestModule. The readers used in + * chunking don't have ability to go backwards or to fast forward to a specific + * offset. Therefore there is no way to scroll pages back, or to determine how + * many total pages there are. */ class ExtractedText implements IndexedText { private int numPages = 0; private int currentPage = 0; private final AbstractFile abstractFile; - private final long objectId; private Chunker chunker = null; private static final Logger logger = Logger.getLogger(ExtractedText.class.getName()); /** - * Construct a new ExtractedText object for the given content and object id. - * This constructor needs both a content object and an object id because the - * ExtractedText implementation attempts to provide useful messages in the - * text content viewer for (a) the case where a file has not been indexed - * because known files are being skipped and (b) the case where the file - * content has not yet been indexed. + * Construct a new ExtractedText object for the given abstract file. * - * @param file Abstract file. - * @param objectId Either a file id or an artifact id. + * @param file Abstract file. */ - ExtractedText(AbstractFile file, long objectId) throws TextExtractorFactory.NoTextExtractorFound, TextExtractor.InitReaderException { + ExtractedText(AbstractFile file) throws TextExtractorFactory.NoTextExtractorFound, TextExtractor.InitReaderException { this.abstractFile = file; - this.objectId = objectId; this.numPages = -1; // We don't know how many pages there are until we reach end of the document initialize(); } - /** - * Return the ID that this object is associated with -- to help with caching - * - * @return - */ - public long getObjectId() { - return this.objectId; - } - @Override public int getCurrentPage() { return this.currentPage; @@ -177,9 +164,6 @@ class ExtractedText implements IndexedText { return numPages; } - /** - * Set the internal values, such as pages - */ private void initialize() throws TextExtractorFactory.NoTextExtractorFound, TextExtractor.InitReaderException { TextExtractor extractor = TextExtractorFactory.getExtractor(abstractFile, null); @@ -194,7 +178,6 @@ class ExtractedText implements IndexedText { /** * Extract text from abstractFile * - * @param node a node that has extracted abstractFile * @param currentPage currently used page * * @return the extracted text @@ -209,7 +192,7 @@ class ExtractedText implements IndexedText { logger.log(Level.WARNING, "Error chunking content from " + abstractFile.getId() + ": " + abstractFile.getName(), chunker.getException()); throw chunker.getException(); } - + indexedText = chunk.toString(); } else { return Bundle.IndexedText_errorMessage_errorGettingText(); @@ -229,9 +212,7 @@ class ExtractedText implements IndexedText { try { Map metadata = extractor.getMetadata(); if (!metadata.isEmpty()) { - // Creating the metadata artifact here causes occasional problems - // when indexing the text, so we save the metadata map to - // use after this method is complete. + // save the metadata map to use after this method is complete. extractedMetadata.putAll(metadata); } CharSource formattedMetadata = KeywordSearchIngestModule.getMetaDataCharSource(metadata); @@ -249,9 +230,8 @@ class ExtractedText implements IndexedText { //Just send file text. finalReader = fileText; } - //divide into chunks and index + //divide into chunks return finalReader; - } } diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ExtractedTextViewer.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ExtractedTextViewer.java index e715f73e0d..6047c2db60 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ExtractedTextViewer.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ExtractedTextViewer.java @@ -1,7 +1,7 @@ /* * Autopsy Forensic Browser * - * Copyright 2011-2019 Basis Technology Corp. + * Copyright 2011-2023 Basis Technology Corp. * Contact: carrier sleuthkit org * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -28,7 +28,6 @@ import java.util.List; import java.util.logging.Level; import org.apache.tika.mime.MimeTypes; import org.openide.nodes.Node; -import org.openide.util.Exceptions; import org.openide.util.Lookup; import org.openide.util.NbBundle; import org.openide.util.lookup.ServiceProvider; @@ -36,7 +35,6 @@ import org.sleuthkit.autopsy.casemodule.Case; import org.sleuthkit.autopsy.casemodule.NoCurrentCaseException; import org.sleuthkit.autopsy.corecomponentinterfaces.TextViewer; import org.sleuthkit.autopsy.coreutils.Logger; -import org.sleuthkit.autopsy.ingest.IngestModule; import org.sleuthkit.autopsy.keywordsearch.AdHocSearchChildFactory.AdHocQueryResult; import org.sleuthkit.autopsy.modules.filetypeid.FileTypeDetector; import org.sleuthkit.autopsy.textextractors.TextExtractor; @@ -178,7 +176,7 @@ public class ExtractedTextViewer implements TextViewer { // see if it's a file type for which we can extract text if (ableToExtractTextFromFile(file)) { try { - rawContentText = new ExtractedText(file, file.getId()); + rawContentText = new ExtractedText(file); sources.add(rawContentText); } catch (TextExtractorFactory.NoTextExtractorFound | TextExtractor.InitReaderException ex) { // do nothing @@ -451,7 +449,9 @@ public class ExtractedTextViewer implements TextViewer { } /** - * Check if we can extract text for this file type. + * Check if we can extract text for this file type using one of our text extractors. + * NOTE: the logic in this method should be similar and based on the + * logic of how KeywordSearchIngestModule decides which files to index. * * @param file Abstract File * @@ -465,12 +465,6 @@ public class ExtractedTextViewer implements TextViewer { return false; } - /** - * Extract unicode strings from unallocated and unused blocks and carved - * text files. The reason for performing string extraction on these is - * because they all may contain multiple encodings which can cause text - * to be missed by the more specialized text extractors. - */ if ((fileType.equals(TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS) || fileType.equals(TskData.TSK_DB_FILES_TYPE_ENUM.UNUSED_BLOCKS)) || (fileType.equals(TskData.TSK_DB_FILES_TYPE_ENUM.CARVED))) { @@ -478,22 +472,10 @@ public class ExtractedTextViewer implements TextViewer { } final long size = file.getSize(); - //if not to index content, or a dir, or 0 content, index meta data only - if (file.isDir() || size == 0) { return false; } - // ELTODO do we need to skip text files here? probably not. - if (file.getNameExtension().equalsIgnoreCase("txt")) { - return false; - } - - // ELTODO do we need to skip known files here? probably not. - if (KeywordSearchSettings.getSkipKnown() && file.getKnown().equals(TskData.FileKnown.KNOWN)) { - return false; - } - String mimeType = fileTypeDetector.getMIMEType(file).trim().toLowerCase(); if (KeywordSearchIngestModule.ARCHIVE_MIME_TYPES.contains(mimeType)) { @@ -501,7 +483,7 @@ public class ExtractedTextViewer implements TextViewer { } if (MimeTypes.OCTET_STREAM.equals(mimeType)) { - // ELTODO return false; + return false; } return true; diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchIngestModule.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchIngestModule.java index 782f966616..cd6f255def 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchIngestModule.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchIngestModule.java @@ -38,7 +38,6 @@ import java.util.concurrent.atomic.AtomicInteger; import java.util.logging.Level; import java.util.stream.Collectors; import org.apache.tika.mime.MimeTypes; -import org.openide.util.Exceptions; import org.openide.util.Lookup; import org.openide.util.NbBundle; import org.openide.util.NbBundle.Messages;