ingestStatus is now static and shared between all threads

This commit is contained in:
Samuel H. Kenyon 2014-04-08 17:42:30 -04:00
parent 3a5e00a6bc
commit 42e83d3b23

View File

@ -101,7 +101,13 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme
SKIPPED_ERROR_TEXTEXTRACT, ///< File was skipped because of text extraction issues
SKIPPED_ERROR_IO ///< File was skipped because of IO issues reading it
};
private Map<Long, IngestStatus> ingestStatus;
private static final Map<Long, IngestStatus> ingestStatus = new HashMap<>(); //guarded by itself
static void putIngestStatus(long id, IngestStatus status) {
synchronized(ingestStatus) {
ingestStatus.put(id, status);
}
}
KeywordSearchIngestModule(KeywordSearchJobSettings settings) {
this.settings = settings;
@ -167,8 +173,6 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme
textExtractors.add(new AbstractFileHtmlExtract(this));
textExtractors.add(new AbstractFileTikaTextExtract(this));
ingestStatus = new HashMap<>();
List<KeywordList> keywordLists = KeywordSearchListsXML.getCurrent().getListsL();
boolean hasKeywordsForSearch = false;
for (KeywordList keywordList : keywordLists) {
@ -191,7 +195,7 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme
if (initialized == false) //error initializing indexing/Solr
{
logger.log(Level.WARNING, "Skipping processing, module not initialized, file: {0}", abstractFile.getName());
ingestStatus.put(abstractFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING);
putIngestStatus(abstractFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING);
return ProcessResult.OK;
}
try {
@ -276,7 +280,9 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme
* Common cleanup code when module stops or final searcher completes
*/
private void cleanup() {
synchronized(ingestStatus) {
ingestStatus.clear();
}
textExtractors.clear();
textExtractors = null;
@ -297,6 +303,8 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme
int error_text = 0;
int error_index = 0;
int error_io = 0;
synchronized(ingestStatus) {
for (IngestStatus s : ingestStatus.values()) {
switch (s) {
case TEXT_INGESTED:
@ -321,6 +329,7 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme
;
}
}
}
StringBuilder msg = new StringBuilder();
msg.append("<table border=0><tr><td>").append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.postIndexSummary.knowFileHeaderLbl")).append("</td><td>").append(text_ingested).append("</td></tr>");
@ -393,16 +402,16 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme
private boolean extractStringsAndIndex(AbstractFile aFile) {
try {
if (stringExtractor.index(aFile)) {
ingestStatus.put(aFile.getId(), IngestStatus.STRINGS_INGESTED);
putIngestStatus(aFile.getId(), IngestStatus.STRINGS_INGESTED);
return true;
} else {
logger.log(Level.WARNING, "Failed to extract strings and ingest, file ''{0}'' (id: {1}).", new Object[]{aFile.getName(), aFile.getId()});
ingestStatus.put(aFile.getId(), IngestStatus.SKIPPED_ERROR_TEXTEXTRACT);
putIngestStatus(aFile.getId(), IngestStatus.SKIPPED_ERROR_TEXTEXTRACT);
return false;
}
} catch (IngesterException ex) {
logger.log(Level.WARNING, "Failed to extract strings and ingest, file '" + aFile.getName() + "' (id: " + aFile.getId() + ").", ex);
ingestStatus.put(aFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING);
putIngestStatus(aFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING);
return false;
}
}
@ -448,9 +457,9 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme
if ((indexContent == false || aFile.isDir() || size == 0)) {
try {
ingester.ingest(aFile, false); //meta-data only
ingestStatus.put(aFile.getId(), IngestStatus.METADATA_INGESTED);
putIngestStatus(aFile.getId(), IngestStatus.METADATA_INGESTED);
} catch (IngesterException ex) {
ingestStatus.put(aFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING);
putIngestStatus(aFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING);
logger.log(Level.WARNING, "Unable to index meta-data for file: " + aFile.getId(), ex);
}
return;
@ -484,9 +493,9 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme
if (AbstractFileExtract.ARCHIVE_MIME_TYPES.contains(detectedFormat)) {
try {
ingester.ingest(aFile, false); //meta-data only
ingestStatus.put(aFile.getId(), IngestStatus.METADATA_INGESTED);
putIngestStatus(aFile.getId(), IngestStatus.METADATA_INGESTED);
} catch (IngesterException ex) {
ingestStatus.put(aFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING);
putIngestStatus(aFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING);
logger.log(Level.WARNING, "Unable to index meta-data for file: " + aFile.getId(), ex);
}
return;
@ -499,20 +508,20 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme
//logger.log(Level.INFO, "indexing: " + aFile.getName());
if (!extractTextAndIndex(aFile, detectedFormat)) {
logger.log(Level.WARNING, "Failed to extract text and ingest, file ''{0}'' (id: {1}).", new Object[]{aFile.getName(), aFile.getId()});
ingestStatus.put(aFile.getId(), IngestStatus.SKIPPED_ERROR_TEXTEXTRACT);
putIngestStatus(aFile.getId(), IngestStatus.SKIPPED_ERROR_TEXTEXTRACT);
} else {
ingestStatus.put(aFile.getId(), IngestStatus.TEXT_INGESTED);
putIngestStatus(aFile.getId(), IngestStatus.TEXT_INGESTED);
wasTextAdded = true;
}
} catch (IngesterException e) {
logger.log(Level.INFO, "Could not extract text with Tika, " + aFile.getId() + ", "
+ aFile.getName(), e);
ingestStatus.put(aFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING);
putIngestStatus(aFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING);
} catch (Exception e) {
logger.log(Level.WARNING, "Error extracting text with Tika, " + aFile.getId() + ", "
+ aFile.getName(), e);
ingestStatus.put(aFile.getId(), IngestStatus.SKIPPED_ERROR_TEXTEXTRACT);
putIngestStatus(aFile.getId(), IngestStatus.SKIPPED_ERROR_TEXTEXTRACT);
}
}