diff --git a/Core/src/org/sleuthkit/autopsy/contentviewers/textcontentviewer/TextContentViewerPanel.java b/Core/src/org/sleuthkit/autopsy/contentviewers/textcontentviewer/TextContentViewerPanel.java index 1bc3339867..781dabc867 100644 --- a/Core/src/org/sleuthkit/autopsy/contentviewers/textcontentviewer/TextContentViewerPanel.java +++ b/Core/src/org/sleuthkit/autopsy/contentviewers/textcontentviewer/TextContentViewerPanel.java @@ -99,7 +99,7 @@ public class TextContentViewerPanel extends javax.swing.JPanel implements DataCo /** * Determine the isPreffered score for the content viewer which is - * displaying this panel. Score is depenedent on the score of the supported + * displaying this panel. Score is dependent on the score of the supported * TextViewers which exist. * * @param node diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ExtractedText.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ExtractedText.java index edb8641b29..c0e4e5f6c4 100755 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ExtractedText.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ExtractedText.java @@ -56,7 +56,15 @@ class ExtractedText implements IndexedText { ExtractedText(AbstractFile file) throws TextExtractorFactory.NoTextExtractorFound, TextExtractor.InitReaderException { this.abstractFile = file; this.numPages = -1; // We don't know how many pages there are until we reach end of the document - initialize(); + + TextExtractor extractor = TextExtractorFactory.getExtractor(abstractFile, null); + + Map extractedMetadata = new HashMap<>(); + Reader sourceReader = getTikaOrTextExtractor(extractor, abstractFile, extractedMetadata); + + //Get a reader for the content of the given source + BufferedReader reader = new BufferedReader(sourceReader); + this.chunker = new Chunker(reader); } @Override @@ -164,17 +172,6 @@ class ExtractedText implements IndexedText { return numPages; } - private void initialize() throws TextExtractorFactory.NoTextExtractorFound, TextExtractor.InitReaderException { - TextExtractor extractor = TextExtractorFactory.getExtractor(abstractFile, null); - - Map extractedMetadata = new HashMap<>(); - Reader sourceReader = getTikaOrTextExtractor(extractor, abstractFile, extractedMetadata); - - //Get a reader for the content of the given source - BufferedReader reader = new BufferedReader(sourceReader); - chunker = new Chunker(reader); - } - /** * Extract text from abstractFile * diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ExtractedTextViewer.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ExtractedTextViewer.java index 6047c2db60..3f28c97d25 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ExtractedTextViewer.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ExtractedTextViewer.java @@ -67,6 +67,9 @@ public class ExtractedTextViewer implements TextViewer { private volatile Node currentNode = null; private IndexedText currentSource = null; private FileTypeDetector fileTypeDetector = null; + + private long cachedObjId = -1; + private boolean chachedIsFullyIndexed = false; /** * Constructs a text viewer that displays the indexed text associated with a @@ -426,25 +429,39 @@ public class ExtractedTextViewer implements TextViewer { } /** - * Check if Solr has extracted content for a given node + * Check if Solr has indexed ALL of the content for a given node. Note that + * in some situations Solr only indexes parts of a file. This happens when + * an in-line KWS finds a KW hit in the file - only the chunks with the KW + * hit (+/- 1 chunk) get indexed by Solr. That is not enough for the + * purposes of this text viewer as we need to display all of the text in the + * file. * * @param objectId * * @return true if Solr has content, else false */ private boolean solrHasFullyIndexedContent(Long objectId) { + + // check if we have cached this decision + if (objectId == cachedObjId) { + return chachedIsFullyIndexed; + } + + cachedObjId = objectId; final Server solrServer = KeywordSearch.getServer(); if (solrServer.coreIsOpen() == false) { - return false; + chachedIsFullyIndexed = false; + return chachedIsFullyIndexed; } - // ELTODO get total number of chunks in the file, and verify that - // all of the chunks have been indexed. + // verify that all of the chunks in the file have been indexed. try { - return solrServer.queryIsIndexed(objectId); + chachedIsFullyIndexed = solrServer.queryIsFullyIndexed(objectId); + return chachedIsFullyIndexed; } catch (NoOpenCoreException | KeywordSearchModuleException ex) { logger.log(Level.SEVERE, "Error querying Solr server", ex); //NON-NLS - return false; + chachedIsFullyIndexed = false; + return chachedIsFullyIndexed; } } diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java index d54d6964dd..2a580c4a6a 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java @@ -1635,23 +1635,29 @@ public class Server { } /** - * Return true if the file is indexed (either as a whole as a chunk) + * Return true if the file is fully indexed (no chunks are missing) * * @param contentID * - * @return true if it is indexed + * @return true if it is fully indexed * * @throws KeywordSearchModuleException * @throws NoOpenCoreException */ - public boolean queryIsIndexed(long contentID) throws KeywordSearchModuleException, NoOpenCoreException { + public boolean queryIsFullyIndexed(long contentID) throws KeywordSearchModuleException, NoOpenCoreException { currentCoreLock.readLock().lock(); try { if (null == currentCollection) { throw new NoOpenCoreException(); } try { - return currentCollection.queryIsIndexed(contentID); + int totalNumChunks = currentCollection.queryTotalNumFileChunks(contentID); + if (totalNumChunks == 0) { + return false; + } + + int numIndexedChunks = currentCollection.queryNumIndexedChunks(contentID); + return numIndexedChunks == totalNumChunks; } catch (Exception ex) { // intentional "catch all" as Solr is known to throw all kinds of Runtime exceptions throw new KeywordSearchModuleException(NbBundle.getMessage(this.getClass(), "Server.queryIsIdxd.exception.msg"), ex); @@ -1680,7 +1686,7 @@ public class Server { throw new NoOpenCoreException(); } try { - return currentCollection.queryNumFileChunks(fileID); + return currentCollection.queryTotalNumFileChunks(fileID); } catch (Exception ex) { // intentional "catch all" as Solr is known to throw all kinds of Runtime exceptions throw new KeywordSearchModuleException(NbBundle.getMessage(this.getClass(), "Server.queryNumFileChunks.exception.msg"), ex); @@ -2484,7 +2490,7 @@ public class Server { } /** - * Return true if the file is indexed (either as a whole as a chunk) + * Return true if the file is indexed (either as a whole or as a chunk) * * @param contentID * @@ -2502,17 +2508,20 @@ public class Server { } /** - * Execute query that gets number of indexed file chunks for a file + * Execute query that gets total number of file chunks for a file. NOTE: + * this does not imply that all of the chunks have been indexed. This + * parameter simply stores the total number of chunks that the file had + * (as determined during chunking). * * @param contentID file id of the original file broken into chunks and - * indexed + * indexed * - * @return int representing number of indexed file chunks, 0 if there is - * no chunks + * @return int representing number of file chunks, 0 if there is no + * chunks * * @throws SolrServerException */ - private int queryNumFileChunks(long contentID) throws SolrServerException, IOException { + private int queryTotalNumFileChunks(long contentID) throws SolrServerException, IOException { final SolrQuery q = new SolrQuery(); q.setQuery("*:*"); String filterQuery = Schema.ID.toString() + ":" + KeywordSearchUtil.escapeLuceneQuery(Long.toString(contentID)); @@ -2537,6 +2546,24 @@ public class Server { logger.log(Level.SEVERE, "Error getting content from Solr. Solr document id " + contentID + ", query: " + filterQuery); //NON-NLS return 0; } + + /** + * Execute query that gets number of indexed chunks for a specific Solr + * document, without actually returning the content. + * + * @param contentID file id of the original file broken into chunks and + * indexed + * + * @return int representing number of indexed chunks + * + * @throws SolrServerException + */ + int queryNumIndexedChunks(long contentID) throws SolrServerException, IOException { + SolrQuery q = new SolrQuery(Server.Schema.ID + ":" + contentID + Server.CHUNK_ID_SEPARATOR + "*"); + q.setRows(0); + int numChunks = (int) query(q).getResults().getNumFound(); + return numChunks; + } } class ServerAction extends AbstractAction {