Improvements and fixes and caching

This commit is contained in:
eugene.livis 2023-06-06 16:02:27 -04:00
parent e3da0cae14
commit cd83205382
4 changed files with 71 additions and 30 deletions

View File

@ -99,7 +99,7 @@ public class TextContentViewerPanel extends javax.swing.JPanel implements DataCo
/** /**
* Determine the isPreffered score for the content viewer which is * Determine the isPreffered score for the content viewer which is
* displaying this panel. Score is depenedent on the score of the supported * displaying this panel. Score is dependent on the score of the supported
* TextViewers which exist. * TextViewers which exist.
* *
* @param node * @param node

View File

@ -56,7 +56,15 @@ class ExtractedText implements IndexedText {
ExtractedText(AbstractFile file) throws TextExtractorFactory.NoTextExtractorFound, TextExtractor.InitReaderException { ExtractedText(AbstractFile file) throws TextExtractorFactory.NoTextExtractorFound, TextExtractor.InitReaderException {
this.abstractFile = file; this.abstractFile = file;
this.numPages = -1; // We don't know how many pages there are until we reach end of the document this.numPages = -1; // We don't know how many pages there are until we reach end of the document
initialize();
TextExtractor extractor = TextExtractorFactory.getExtractor(abstractFile, null);
Map<String, String> extractedMetadata = new HashMap<>();
Reader sourceReader = getTikaOrTextExtractor(extractor, abstractFile, extractedMetadata);
//Get a reader for the content of the given source
BufferedReader reader = new BufferedReader(sourceReader);
this.chunker = new Chunker(reader);
} }
@Override @Override
@ -164,17 +172,6 @@ class ExtractedText implements IndexedText {
return numPages; return numPages;
} }
private void initialize() throws TextExtractorFactory.NoTextExtractorFound, TextExtractor.InitReaderException {
TextExtractor extractor = TextExtractorFactory.getExtractor(abstractFile, null);
Map<String, String> extractedMetadata = new HashMap<>();
Reader sourceReader = getTikaOrTextExtractor(extractor, abstractFile, extractedMetadata);
//Get a reader for the content of the given source
BufferedReader reader = new BufferedReader(sourceReader);
chunker = new Chunker(reader);
}
/** /**
* Extract text from abstractFile * Extract text from abstractFile
* *

View File

@ -68,6 +68,9 @@ public class ExtractedTextViewer implements TextViewer {
private IndexedText currentSource = null; private IndexedText currentSource = null;
private FileTypeDetector fileTypeDetector = null; private FileTypeDetector fileTypeDetector = null;
private long cachedObjId = -1;
private boolean chachedIsFullyIndexed = false;
/** /**
* Constructs a text viewer that displays the indexed text associated with a * Constructs a text viewer that displays the indexed text associated with a
* file or an artifact, possibly marked up with HTML to highlight keyword * file or an artifact, possibly marked up with HTML to highlight keyword
@ -426,25 +429,39 @@ public class ExtractedTextViewer implements TextViewer {
} }
/** /**
* Check if Solr has extracted content for a given node * Check if Solr has indexed ALL of the content for a given node. Note that
* in some situations Solr only indexes parts of a file. This happens when
* an in-line KWS finds a KW hit in the file - only the chunks with the KW
* hit (+/- 1 chunk) get indexed by Solr. That is not enough for the
* purposes of this text viewer as we need to display all of the text in the
* file.
* *
* @param objectId * @param objectId
* *
* @return true if Solr has content, else false * @return true if Solr has content, else false
*/ */
private boolean solrHasFullyIndexedContent(Long objectId) { private boolean solrHasFullyIndexedContent(Long objectId) {
final Server solrServer = KeywordSearch.getServer();
if (solrServer.coreIsOpen() == false) { // check if we have cached this decision
return false; if (objectId == cachedObjId) {
return chachedIsFullyIndexed;
} }
// ELTODO get total number of chunks in the file, and verify that cachedObjId = objectId;
// all of the chunks have been indexed. final Server solrServer = KeywordSearch.getServer();
if (solrServer.coreIsOpen() == false) {
chachedIsFullyIndexed = false;
return chachedIsFullyIndexed;
}
// verify that all of the chunks in the file have been indexed.
try { try {
return solrServer.queryIsIndexed(objectId); chachedIsFullyIndexed = solrServer.queryIsFullyIndexed(objectId);
return chachedIsFullyIndexed;
} catch (NoOpenCoreException | KeywordSearchModuleException ex) { } catch (NoOpenCoreException | KeywordSearchModuleException ex) {
logger.log(Level.SEVERE, "Error querying Solr server", ex); //NON-NLS logger.log(Level.SEVERE, "Error querying Solr server", ex); //NON-NLS
return false; chachedIsFullyIndexed = false;
return chachedIsFullyIndexed;
} }
} }

View File

@ -1635,23 +1635,29 @@ public class Server {
} }
/** /**
* Return true if the file is indexed (either as a whole as a chunk) * Return true if the file is fully indexed (no chunks are missing)
* *
* @param contentID * @param contentID
* *
* @return true if it is indexed * @return true if it is fully indexed
* *
* @throws KeywordSearchModuleException * @throws KeywordSearchModuleException
* @throws NoOpenCoreException * @throws NoOpenCoreException
*/ */
public boolean queryIsIndexed(long contentID) throws KeywordSearchModuleException, NoOpenCoreException { public boolean queryIsFullyIndexed(long contentID) throws KeywordSearchModuleException, NoOpenCoreException {
currentCoreLock.readLock().lock(); currentCoreLock.readLock().lock();
try { try {
if (null == currentCollection) { if (null == currentCollection) {
throw new NoOpenCoreException(); throw new NoOpenCoreException();
} }
try { try {
return currentCollection.queryIsIndexed(contentID); int totalNumChunks = currentCollection.queryTotalNumFileChunks(contentID);
if (totalNumChunks == 0) {
return false;
}
int numIndexedChunks = currentCollection.queryNumIndexedChunks(contentID);
return numIndexedChunks == totalNumChunks;
} catch (Exception ex) { } catch (Exception ex) {
// intentional "catch all" as Solr is known to throw all kinds of Runtime exceptions // intentional "catch all" as Solr is known to throw all kinds of Runtime exceptions
throw new KeywordSearchModuleException(NbBundle.getMessage(this.getClass(), "Server.queryIsIdxd.exception.msg"), ex); throw new KeywordSearchModuleException(NbBundle.getMessage(this.getClass(), "Server.queryIsIdxd.exception.msg"), ex);
@ -1680,7 +1686,7 @@ public class Server {
throw new NoOpenCoreException(); throw new NoOpenCoreException();
} }
try { try {
return currentCollection.queryNumFileChunks(fileID); return currentCollection.queryTotalNumFileChunks(fileID);
} catch (Exception ex) { } catch (Exception ex) {
// intentional "catch all" as Solr is known to throw all kinds of Runtime exceptions // intentional "catch all" as Solr is known to throw all kinds of Runtime exceptions
throw new KeywordSearchModuleException(NbBundle.getMessage(this.getClass(), "Server.queryNumFileChunks.exception.msg"), ex); throw new KeywordSearchModuleException(NbBundle.getMessage(this.getClass(), "Server.queryNumFileChunks.exception.msg"), ex);
@ -2484,7 +2490,7 @@ public class Server {
} }
/** /**
* Return true if the file is indexed (either as a whole as a chunk) * Return true if the file is indexed (either as a whole or as a chunk)
* *
* @param contentID * @param contentID
* *
@ -2502,17 +2508,20 @@ public class Server {
} }
/** /**
* Execute query that gets number of indexed file chunks for a file * Execute query that gets total number of file chunks for a file. NOTE:
* this does not imply that all of the chunks have been indexed. This
* parameter simply stores the total number of chunks that the file had
* (as determined during chunking).
* *
* @param contentID file id of the original file broken into chunks and * @param contentID file id of the original file broken into chunks and
* indexed * indexed
* *
* @return int representing number of indexed file chunks, 0 if there is * @return int representing number of file chunks, 0 if there is no
* no chunks * chunks
* *
* @throws SolrServerException * @throws SolrServerException
*/ */
private int queryNumFileChunks(long contentID) throws SolrServerException, IOException { private int queryTotalNumFileChunks(long contentID) throws SolrServerException, IOException {
final SolrQuery q = new SolrQuery(); final SolrQuery q = new SolrQuery();
q.setQuery("*:*"); q.setQuery("*:*");
String filterQuery = Schema.ID.toString() + ":" + KeywordSearchUtil.escapeLuceneQuery(Long.toString(contentID)); String filterQuery = Schema.ID.toString() + ":" + KeywordSearchUtil.escapeLuceneQuery(Long.toString(contentID));
@ -2537,6 +2546,24 @@ public class Server {
logger.log(Level.SEVERE, "Error getting content from Solr. Solr document id " + contentID + ", query: " + filterQuery); //NON-NLS logger.log(Level.SEVERE, "Error getting content from Solr. Solr document id " + contentID + ", query: " + filterQuery); //NON-NLS
return 0; return 0;
} }
/**
* Execute query that gets number of indexed chunks for a specific Solr
* document, without actually returning the content.
*
* @param contentID file id of the original file broken into chunks and
* indexed
*
* @return int representing number of indexed chunks
*
* @throws SolrServerException
*/
int queryNumIndexedChunks(long contentID) throws SolrServerException, IOException {
SolrQuery q = new SolrQuery(Server.Schema.ID + ":" + contentID + Server.CHUNK_ID_SEPARATOR + "*");
q.setRows(0);
int numChunks = (int) query(q).getResults().getNumFound();
return numChunks;
}
} }
class ServerAction extends AbstractAction { class ServerAction extends AbstractAction {