ingestStatus is now static and shared between all threads

This commit is contained in:
Samuel H. Kenyon 2014-04-08 17:42:30 -04:00
parent 3a5e00a6bc
commit 42e83d3b23

View File

@ -91,7 +91,7 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme
private long dataSourceId; private long dataSourceId;
private static AtomicInteger instanceCount = new AtomicInteger(0); //just used for logging private static AtomicInteger instanceCount = new AtomicInteger(0); //just used for logging
private int instanceNum = 0; private int instanceNum = 0;
private enum IngestStatus { private enum IngestStatus {
TEXT_INGESTED, /// Text was extracted by knowing file type and text_ingested TEXT_INGESTED, /// Text was extracted by knowing file type and text_ingested
@ -101,8 +101,14 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme
SKIPPED_ERROR_TEXTEXTRACT, ///< File was skipped because of text extraction issues SKIPPED_ERROR_TEXTEXTRACT, ///< File was skipped because of text extraction issues
SKIPPED_ERROR_IO ///< File was skipped because of IO issues reading it SKIPPED_ERROR_IO ///< File was skipped because of IO issues reading it
}; };
private Map<Long, IngestStatus> ingestStatus; private static final Map<Long, IngestStatus> ingestStatus = new HashMap<>(); //guarded by itself
static void putIngestStatus(long id, IngestStatus status) {
synchronized(ingestStatus) {
ingestStatus.put(id, status);
}
}
KeywordSearchIngestModule(KeywordSearchJobSettings settings) { KeywordSearchIngestModule(KeywordSearchJobSettings settings) {
this.settings = settings; this.settings = settings;
instanceNum = instanceCount.getAndIncrement(); instanceNum = instanceCount.getAndIncrement();
@ -167,8 +173,6 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme
textExtractors.add(new AbstractFileHtmlExtract(this)); textExtractors.add(new AbstractFileHtmlExtract(this));
textExtractors.add(new AbstractFileTikaTextExtract(this)); textExtractors.add(new AbstractFileTikaTextExtract(this));
ingestStatus = new HashMap<>();
List<KeywordList> keywordLists = KeywordSearchListsXML.getCurrent().getListsL(); List<KeywordList> keywordLists = KeywordSearchListsXML.getCurrent().getListsL();
boolean hasKeywordsForSearch = false; boolean hasKeywordsForSearch = false;
for (KeywordList keywordList : keywordLists) { for (KeywordList keywordList : keywordLists) {
@ -191,7 +195,7 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme
if (initialized == false) //error initializing indexing/Solr if (initialized == false) //error initializing indexing/Solr
{ {
logger.log(Level.WARNING, "Skipping processing, module not initialized, file: {0}", abstractFile.getName()); logger.log(Level.WARNING, "Skipping processing, module not initialized, file: {0}", abstractFile.getName());
ingestStatus.put(abstractFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING); putIngestStatus(abstractFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING);
return ProcessResult.OK; return ProcessResult.OK;
} }
try { try {
@ -276,7 +280,9 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme
* Common cleanup code when module stops or final searcher completes * Common cleanup code when module stops or final searcher completes
*/ */
private void cleanup() { private void cleanup() {
ingestStatus.clear(); synchronized(ingestStatus) {
ingestStatus.clear();
}
textExtractors.clear(); textExtractors.clear();
textExtractors = null; textExtractors = null;
@ -297,31 +303,34 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme
int error_text = 0; int error_text = 0;
int error_index = 0; int error_index = 0;
int error_io = 0; int error_io = 0;
for (IngestStatus s : ingestStatus.values()) {
switch (s) { synchronized(ingestStatus) {
case TEXT_INGESTED: for (IngestStatus s : ingestStatus.values()) {
++text_ingested; switch (s) {
break; case TEXT_INGESTED:
case METADATA_INGESTED: ++text_ingested;
++metadata_ingested; break;
break; case METADATA_INGESTED:
case STRINGS_INGESTED: ++metadata_ingested;
++strings_ingested; break;
break; case STRINGS_INGESTED:
case SKIPPED_ERROR_TEXTEXTRACT: ++strings_ingested;
error_text++; break;
break; case SKIPPED_ERROR_TEXTEXTRACT:
case SKIPPED_ERROR_INDEXING: error_text++;
error_index++; break;
break; case SKIPPED_ERROR_INDEXING:
case SKIPPED_ERROR_IO: error_index++;
error_io++; break;
break; case SKIPPED_ERROR_IO:
default: error_io++;
; break;
default:
;
}
} }
} }
StringBuilder msg = new StringBuilder(); StringBuilder msg = new StringBuilder();
msg.append("<table border=0><tr><td>").append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.postIndexSummary.knowFileHeaderLbl")).append("</td><td>").append(text_ingested).append("</td></tr>"); msg.append("<table border=0><tr><td>").append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.postIndexSummary.knowFileHeaderLbl")).append("</td><td>").append(text_ingested).append("</td></tr>");
msg.append("<tr><td>").append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.postIndexSummary.fileGenStringsHead")).append("</td><td>").append(strings_ingested).append("</td></tr>"); msg.append("<tr><td>").append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.postIndexSummary.fileGenStringsHead")).append("</td><td>").append(strings_ingested).append("</td></tr>");
@ -393,16 +402,16 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme
private boolean extractStringsAndIndex(AbstractFile aFile) { private boolean extractStringsAndIndex(AbstractFile aFile) {
try { try {
if (stringExtractor.index(aFile)) { if (stringExtractor.index(aFile)) {
ingestStatus.put(aFile.getId(), IngestStatus.STRINGS_INGESTED); putIngestStatus(aFile.getId(), IngestStatus.STRINGS_INGESTED);
return true; return true;
} else { } else {
logger.log(Level.WARNING, "Failed to extract strings and ingest, file ''{0}'' (id: {1}).", new Object[]{aFile.getName(), aFile.getId()}); logger.log(Level.WARNING, "Failed to extract strings and ingest, file ''{0}'' (id: {1}).", new Object[]{aFile.getName(), aFile.getId()});
ingestStatus.put(aFile.getId(), IngestStatus.SKIPPED_ERROR_TEXTEXTRACT); putIngestStatus(aFile.getId(), IngestStatus.SKIPPED_ERROR_TEXTEXTRACT);
return false; return false;
} }
} catch (IngesterException ex) { } catch (IngesterException ex) {
logger.log(Level.WARNING, "Failed to extract strings and ingest, file '" + aFile.getName() + "' (id: " + aFile.getId() + ").", ex); logger.log(Level.WARNING, "Failed to extract strings and ingest, file '" + aFile.getName() + "' (id: " + aFile.getId() + ").", ex);
ingestStatus.put(aFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING); putIngestStatus(aFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING);
return false; return false;
} }
} }
@ -448,9 +457,9 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme
if ((indexContent == false || aFile.isDir() || size == 0)) { if ((indexContent == false || aFile.isDir() || size == 0)) {
try { try {
ingester.ingest(aFile, false); //meta-data only ingester.ingest(aFile, false); //meta-data only
ingestStatus.put(aFile.getId(), IngestStatus.METADATA_INGESTED); putIngestStatus(aFile.getId(), IngestStatus.METADATA_INGESTED);
} catch (IngesterException ex) { } catch (IngesterException ex) {
ingestStatus.put(aFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING); putIngestStatus(aFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING);
logger.log(Level.WARNING, "Unable to index meta-data for file: " + aFile.getId(), ex); logger.log(Level.WARNING, "Unable to index meta-data for file: " + aFile.getId(), ex);
} }
return; return;
@ -484,9 +493,9 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme
if (AbstractFileExtract.ARCHIVE_MIME_TYPES.contains(detectedFormat)) { if (AbstractFileExtract.ARCHIVE_MIME_TYPES.contains(detectedFormat)) {
try { try {
ingester.ingest(aFile, false); //meta-data only ingester.ingest(aFile, false); //meta-data only
ingestStatus.put(aFile.getId(), IngestStatus.METADATA_INGESTED); putIngestStatus(aFile.getId(), IngestStatus.METADATA_INGESTED);
} catch (IngesterException ex) { } catch (IngesterException ex) {
ingestStatus.put(aFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING); putIngestStatus(aFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING);
logger.log(Level.WARNING, "Unable to index meta-data for file: " + aFile.getId(), ex); logger.log(Level.WARNING, "Unable to index meta-data for file: " + aFile.getId(), ex);
} }
return; return;
@ -499,20 +508,20 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme
//logger.log(Level.INFO, "indexing: " + aFile.getName()); //logger.log(Level.INFO, "indexing: " + aFile.getName());
if (!extractTextAndIndex(aFile, detectedFormat)) { if (!extractTextAndIndex(aFile, detectedFormat)) {
logger.log(Level.WARNING, "Failed to extract text and ingest, file ''{0}'' (id: {1}).", new Object[]{aFile.getName(), aFile.getId()}); logger.log(Level.WARNING, "Failed to extract text and ingest, file ''{0}'' (id: {1}).", new Object[]{aFile.getName(), aFile.getId()});
ingestStatus.put(aFile.getId(), IngestStatus.SKIPPED_ERROR_TEXTEXTRACT); putIngestStatus(aFile.getId(), IngestStatus.SKIPPED_ERROR_TEXTEXTRACT);
} else { } else {
ingestStatus.put(aFile.getId(), IngestStatus.TEXT_INGESTED); putIngestStatus(aFile.getId(), IngestStatus.TEXT_INGESTED);
wasTextAdded = true; wasTextAdded = true;
} }
} catch (IngesterException e) { } catch (IngesterException e) {
logger.log(Level.INFO, "Could not extract text with Tika, " + aFile.getId() + ", " logger.log(Level.INFO, "Could not extract text with Tika, " + aFile.getId() + ", "
+ aFile.getName(), e); + aFile.getName(), e);
ingestStatus.put(aFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING); putIngestStatus(aFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING);
} catch (Exception e) { } catch (Exception e) {
logger.log(Level.WARNING, "Error extracting text with Tika, " + aFile.getId() + ", " logger.log(Level.WARNING, "Error extracting text with Tika, " + aFile.getId() + ", "
+ aFile.getName(), e); + aFile.getName(), e);
ingestStatus.put(aFile.getId(), IngestStatus.SKIPPED_ERROR_TEXTEXTRACT); putIngestStatus(aFile.getId(), IngestStatus.SKIPPED_ERROR_TEXTEXTRACT);
} }
} }