mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-06 21:00:22 +00:00
Improvements and fixes and caching
This commit is contained in:
parent
e3da0cae14
commit
cd83205382
@ -99,7 +99,7 @@ public class TextContentViewerPanel extends javax.swing.JPanel implements DataCo
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Determine the isPreffered score for the content viewer which is
|
* Determine the isPreffered score for the content viewer which is
|
||||||
* displaying this panel. Score is depenedent on the score of the supported
|
* displaying this panel. Score is dependent on the score of the supported
|
||||||
* TextViewers which exist.
|
* TextViewers which exist.
|
||||||
*
|
*
|
||||||
* @param node
|
* @param node
|
||||||
|
@ -56,7 +56,15 @@ class ExtractedText implements IndexedText {
|
|||||||
ExtractedText(AbstractFile file) throws TextExtractorFactory.NoTextExtractorFound, TextExtractor.InitReaderException {
|
ExtractedText(AbstractFile file) throws TextExtractorFactory.NoTextExtractorFound, TextExtractor.InitReaderException {
|
||||||
this.abstractFile = file;
|
this.abstractFile = file;
|
||||||
this.numPages = -1; // We don't know how many pages there are until we reach end of the document
|
this.numPages = -1; // We don't know how many pages there are until we reach end of the document
|
||||||
initialize();
|
|
||||||
|
TextExtractor extractor = TextExtractorFactory.getExtractor(abstractFile, null);
|
||||||
|
|
||||||
|
Map<String, String> extractedMetadata = new HashMap<>();
|
||||||
|
Reader sourceReader = getTikaOrTextExtractor(extractor, abstractFile, extractedMetadata);
|
||||||
|
|
||||||
|
//Get a reader for the content of the given source
|
||||||
|
BufferedReader reader = new BufferedReader(sourceReader);
|
||||||
|
this.chunker = new Chunker(reader);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -164,17 +172,6 @@ class ExtractedText implements IndexedText {
|
|||||||
return numPages;
|
return numPages;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void initialize() throws TextExtractorFactory.NoTextExtractorFound, TextExtractor.InitReaderException {
|
|
||||||
TextExtractor extractor = TextExtractorFactory.getExtractor(abstractFile, null);
|
|
||||||
|
|
||||||
Map<String, String> extractedMetadata = new HashMap<>();
|
|
||||||
Reader sourceReader = getTikaOrTextExtractor(extractor, abstractFile, extractedMetadata);
|
|
||||||
|
|
||||||
//Get a reader for the content of the given source
|
|
||||||
BufferedReader reader = new BufferedReader(sourceReader);
|
|
||||||
chunker = new Chunker(reader);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extract text from abstractFile
|
* Extract text from abstractFile
|
||||||
*
|
*
|
||||||
|
@ -67,6 +67,9 @@ public class ExtractedTextViewer implements TextViewer {
|
|||||||
private volatile Node currentNode = null;
|
private volatile Node currentNode = null;
|
||||||
private IndexedText currentSource = null;
|
private IndexedText currentSource = null;
|
||||||
private FileTypeDetector fileTypeDetector = null;
|
private FileTypeDetector fileTypeDetector = null;
|
||||||
|
|
||||||
|
private long cachedObjId = -1;
|
||||||
|
private boolean chachedIsFullyIndexed = false;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructs a text viewer that displays the indexed text associated with a
|
* Constructs a text viewer that displays the indexed text associated with a
|
||||||
@ -426,25 +429,39 @@ public class ExtractedTextViewer implements TextViewer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check if Solr has extracted content for a given node
|
* Check if Solr has indexed ALL of the content for a given node. Note that
|
||||||
|
* in some situations Solr only indexes parts of a file. This happens when
|
||||||
|
* an in-line KWS finds a KW hit in the file - only the chunks with the KW
|
||||||
|
* hit (+/- 1 chunk) get indexed by Solr. That is not enough for the
|
||||||
|
* purposes of this text viewer as we need to display all of the text in the
|
||||||
|
* file.
|
||||||
*
|
*
|
||||||
* @param objectId
|
* @param objectId
|
||||||
*
|
*
|
||||||
* @return true if Solr has content, else false
|
* @return true if Solr has content, else false
|
||||||
*/
|
*/
|
||||||
private boolean solrHasFullyIndexedContent(Long objectId) {
|
private boolean solrHasFullyIndexedContent(Long objectId) {
|
||||||
|
|
||||||
|
// check if we have cached this decision
|
||||||
|
if (objectId == cachedObjId) {
|
||||||
|
return chachedIsFullyIndexed;
|
||||||
|
}
|
||||||
|
|
||||||
|
cachedObjId = objectId;
|
||||||
final Server solrServer = KeywordSearch.getServer();
|
final Server solrServer = KeywordSearch.getServer();
|
||||||
if (solrServer.coreIsOpen() == false) {
|
if (solrServer.coreIsOpen() == false) {
|
||||||
return false;
|
chachedIsFullyIndexed = false;
|
||||||
|
return chachedIsFullyIndexed;
|
||||||
}
|
}
|
||||||
|
|
||||||
// ELTODO get total number of chunks in the file, and verify that
|
// verify that all of the chunks in the file have been indexed.
|
||||||
// all of the chunks have been indexed.
|
|
||||||
try {
|
try {
|
||||||
return solrServer.queryIsIndexed(objectId);
|
chachedIsFullyIndexed = solrServer.queryIsFullyIndexed(objectId);
|
||||||
|
return chachedIsFullyIndexed;
|
||||||
} catch (NoOpenCoreException | KeywordSearchModuleException ex) {
|
} catch (NoOpenCoreException | KeywordSearchModuleException ex) {
|
||||||
logger.log(Level.SEVERE, "Error querying Solr server", ex); //NON-NLS
|
logger.log(Level.SEVERE, "Error querying Solr server", ex); //NON-NLS
|
||||||
return false;
|
chachedIsFullyIndexed = false;
|
||||||
|
return chachedIsFullyIndexed;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1635,23 +1635,29 @@ public class Server {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return true if the file is indexed (either as a whole as a chunk)
|
* Return true if the file is fully indexed (no chunks are missing)
|
||||||
*
|
*
|
||||||
* @param contentID
|
* @param contentID
|
||||||
*
|
*
|
||||||
* @return true if it is indexed
|
* @return true if it is fully indexed
|
||||||
*
|
*
|
||||||
* @throws KeywordSearchModuleException
|
* @throws KeywordSearchModuleException
|
||||||
* @throws NoOpenCoreException
|
* @throws NoOpenCoreException
|
||||||
*/
|
*/
|
||||||
public boolean queryIsIndexed(long contentID) throws KeywordSearchModuleException, NoOpenCoreException {
|
public boolean queryIsFullyIndexed(long contentID) throws KeywordSearchModuleException, NoOpenCoreException {
|
||||||
currentCoreLock.readLock().lock();
|
currentCoreLock.readLock().lock();
|
||||||
try {
|
try {
|
||||||
if (null == currentCollection) {
|
if (null == currentCollection) {
|
||||||
throw new NoOpenCoreException();
|
throw new NoOpenCoreException();
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
return currentCollection.queryIsIndexed(contentID);
|
int totalNumChunks = currentCollection.queryTotalNumFileChunks(contentID);
|
||||||
|
if (totalNumChunks == 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
int numIndexedChunks = currentCollection.queryNumIndexedChunks(contentID);
|
||||||
|
return numIndexedChunks == totalNumChunks;
|
||||||
} catch (Exception ex) {
|
} catch (Exception ex) {
|
||||||
// intentional "catch all" as Solr is known to throw all kinds of Runtime exceptions
|
// intentional "catch all" as Solr is known to throw all kinds of Runtime exceptions
|
||||||
throw new KeywordSearchModuleException(NbBundle.getMessage(this.getClass(), "Server.queryIsIdxd.exception.msg"), ex);
|
throw new KeywordSearchModuleException(NbBundle.getMessage(this.getClass(), "Server.queryIsIdxd.exception.msg"), ex);
|
||||||
@ -1680,7 +1686,7 @@ public class Server {
|
|||||||
throw new NoOpenCoreException();
|
throw new NoOpenCoreException();
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
return currentCollection.queryNumFileChunks(fileID);
|
return currentCollection.queryTotalNumFileChunks(fileID);
|
||||||
} catch (Exception ex) {
|
} catch (Exception ex) {
|
||||||
// intentional "catch all" as Solr is known to throw all kinds of Runtime exceptions
|
// intentional "catch all" as Solr is known to throw all kinds of Runtime exceptions
|
||||||
throw new KeywordSearchModuleException(NbBundle.getMessage(this.getClass(), "Server.queryNumFileChunks.exception.msg"), ex);
|
throw new KeywordSearchModuleException(NbBundle.getMessage(this.getClass(), "Server.queryNumFileChunks.exception.msg"), ex);
|
||||||
@ -2484,7 +2490,7 @@ public class Server {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return true if the file is indexed (either as a whole as a chunk)
|
* Return true if the file is indexed (either as a whole or as a chunk)
|
||||||
*
|
*
|
||||||
* @param contentID
|
* @param contentID
|
||||||
*
|
*
|
||||||
@ -2502,17 +2508,20 @@ public class Server {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Execute query that gets number of indexed file chunks for a file
|
* Execute query that gets total number of file chunks for a file. NOTE:
|
||||||
|
* this does not imply that all of the chunks have been indexed. This
|
||||||
|
* parameter simply stores the total number of chunks that the file had
|
||||||
|
* (as determined during chunking).
|
||||||
*
|
*
|
||||||
* @param contentID file id of the original file broken into chunks and
|
* @param contentID file id of the original file broken into chunks and
|
||||||
* indexed
|
* indexed
|
||||||
*
|
*
|
||||||
* @return int representing number of indexed file chunks, 0 if there is
|
* @return int representing number of file chunks, 0 if there is no
|
||||||
* no chunks
|
* chunks
|
||||||
*
|
*
|
||||||
* @throws SolrServerException
|
* @throws SolrServerException
|
||||||
*/
|
*/
|
||||||
private int queryNumFileChunks(long contentID) throws SolrServerException, IOException {
|
private int queryTotalNumFileChunks(long contentID) throws SolrServerException, IOException {
|
||||||
final SolrQuery q = new SolrQuery();
|
final SolrQuery q = new SolrQuery();
|
||||||
q.setQuery("*:*");
|
q.setQuery("*:*");
|
||||||
String filterQuery = Schema.ID.toString() + ":" + KeywordSearchUtil.escapeLuceneQuery(Long.toString(contentID));
|
String filterQuery = Schema.ID.toString() + ":" + KeywordSearchUtil.escapeLuceneQuery(Long.toString(contentID));
|
||||||
@ -2537,6 +2546,24 @@ public class Server {
|
|||||||
logger.log(Level.SEVERE, "Error getting content from Solr. Solr document id " + contentID + ", query: " + filterQuery); //NON-NLS
|
logger.log(Level.SEVERE, "Error getting content from Solr. Solr document id " + contentID + ", query: " + filterQuery); //NON-NLS
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Execute query that gets number of indexed chunks for a specific Solr
|
||||||
|
* document, without actually returning the content.
|
||||||
|
*
|
||||||
|
* @param contentID file id of the original file broken into chunks and
|
||||||
|
* indexed
|
||||||
|
*
|
||||||
|
* @return int representing number of indexed chunks
|
||||||
|
*
|
||||||
|
* @throws SolrServerException
|
||||||
|
*/
|
||||||
|
int queryNumIndexedChunks(long contentID) throws SolrServerException, IOException {
|
||||||
|
SolrQuery q = new SolrQuery(Server.Schema.ID + ":" + contentID + Server.CHUNK_ID_SEPARATOR + "*");
|
||||||
|
q.setRows(0);
|
||||||
|
int numChunks = (int) query(q).getResults().getNumFound();
|
||||||
|
return numChunks;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
class ServerAction extends AbstractAction {
|
class ServerAction extends AbstractAction {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user