TSK-267 Extract English strings from smallish unknown files

- improve reporting of what is going on.
This commit is contained in:
adam-m 2011-12-16 10:00:39 -05:00
parent 3c827c868e
commit adb3488852

View File

@ -68,8 +68,6 @@ public class IndexContentFilesAction extends AbstractAction {
//could also be useful for reporting //could also be useful for reporting
private Map<Long, IngestStatus> ingestStatus; private Map<Long, IngestStatus> ingestStatus;
private int problemFilesCount; private int problemFilesCount;
private int finishedFiles;
private int totalFilesCount;
/** /**
* New action * New action
@ -126,12 +124,8 @@ public class IndexContentFilesAction extends AbstractAction {
nonIngestibleFiles.addAll(allFiles); nonIngestibleFiles.addAll(allFiles);
nonIngestibleFiles.removeAll(ingestableFiles); nonIngestibleFiles.removeAll(ingestableFiles);
setProgress(0);
// track number complete or with errors // track number complete or with errors
totalFilesCount = allFiles.size();
problemFilesCount = 0; problemFilesCount = 0;
finishedFiles = 0;
ingestStatus.clear(); ingestStatus.clear();
//work on known files first //work on known files first
@ -157,6 +151,10 @@ public class IndexContentFilesAction extends AbstractAction {
private Collection<FsContent> processIngestible(Ingester ingester, Collection<FsContent> fscc) { private Collection<FsContent> processIngestible(Ingester ingester, Collection<FsContent> fscc) {
Collection<FsContent> ingestFailedCol = new ArrayList<FsContent>(); Collection<FsContent> ingestFailedCol = new ArrayList<FsContent>();
setProgress(0);
int finishedFiles = 0;
final int totalFilesCount = fscc.size();
for (FsContent f : fscc) { for (FsContent f : fscc) {
if (isCancelled()) { if (isCancelled()) {
return ingestFailedCol; return ingestFailedCol;
@ -165,22 +163,26 @@ public class IndexContentFilesAction extends AbstractAction {
try { try {
ingester.ingest(f); ingester.ingest(f);
ingestStatus.put(f.getId(), IngestStatus.INGESTED); ingestStatus.put(f.getId(), IngestStatus.INGESTED);
setProgress(++finishedFiles * 100 / totalFilesCount);
} catch (IngesterException ex) { } catch (IngesterException ex) {
ingestFailedCol.add(f); ingestFailedCol.add(f);
ingestStatus.put(f.getId(), IngestStatus.NOT_INGESTED); ingestStatus.put(f.getId(), IngestStatus.NOT_INGESTED);
logger.log(Level.INFO, "Ingester failed with file '" + f.getName() + "' (id: " + f.getId() + ").", ex); logger.log(Level.INFO, "Ingester failed with file '" + f.getName() + "' (id: " + f.getId() + ").", ex);
} }
setProgress(++finishedFiles * 100 / totalFilesCount);
} }
return ingestFailedCol; return ingestFailedCol;
} }
private void processNonIngestible(Ingester ingester, Collection<FsContent> fscc) { private void processNonIngestible(Ingester ingester, Collection<FsContent> fscc) {
setProgress(0);
int finishedFiles = 0;
final int totalFilesCount = fscc.size();
for (FsContent f : fscc) { for (FsContent f : fscc) {
if (isCancelled()) { if (isCancelled()) {
return; return;
} }
this.publish("Extracting/Indexing " + (finishedFiles + 1) + "/" + totalFilesCount + ": " + f.getName()); this.publish("String extracting/Indexing " + (finishedFiles + 1) + "/" + totalFilesCount + ": " + f.getName());
if (f.getSize() < MAX_STRING_EXTRACT_SIZE) { if (f.getSize() < MAX_STRING_EXTRACT_SIZE) {
if (!extractAndIngest(ingester, f)) { if (!extractAndIngest(ingester, f)) {