Skip bad files during ingest

Try to continue ingesting files even if errors reading them or processing them on the server are encountered.
This commit is contained in:
Peter J. Martel 2011-11-30 16:34:33 -05:00
parent 6338302fd6
commit 924dcecfd5
2 changed files with 35 additions and 8 deletions

View File

@ -18,6 +18,7 @@
*/
package org.sleuthkit.autopsy.keywordsearch;
import java.awt.Component;
import java.awt.Dimension;
import java.awt.Toolkit;
import java.awt.event.ActionEvent;
@ -32,6 +33,8 @@ import java.util.logging.Logger;
import javax.swing.AbstractAction;
import javax.swing.JDialog;
import javax.swing.JFrame;
import javax.swing.JOptionPane;
import javax.swing.SwingUtilities;
import javax.swing.SwingWorker;
import org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException;
import org.sleuthkit.datamodel.Content;
@ -62,10 +65,10 @@ public class IndexContentFilesAction extends AbstractAction {
// initialize panel
final IndexProgressPanel panel = new IndexProgressPanel();
final SwingWorker task = new SwingWorker<Void, String>() {
final SwingWorker task = new SwingWorker<Integer, String>() {
@Override
protected Void doInBackground() throws Exception {
protected Integer doInBackground() throws Exception {
Ingester ingester = new Ingester("http://localhost:8983/solr");
Collection<FsContent> files = c.accept(new GetIngestableFilesContentVisitor());
@ -74,10 +77,11 @@ public class IndexContentFilesAction extends AbstractAction {
int fileCount = files.size();
int finishedFiles = 0;
int problemFiles = 0;
for (FsContent f : files) {
if (isCancelled()) {
return null;
return problemFiles;
}
this.publish("Indexing " + (finishedFiles + 1) + "/" + fileCount + ": " + f.getName());
@ -86,6 +90,7 @@ public class IndexContentFilesAction extends AbstractAction {
ingester.ingest(f);
} catch (IngesterException ex) {
logger.log(Level.INFO, "Ingester had a problem with file '" + f.getName() + "' (id: " + f.getId() + ").", ex);
problemFiles++;
}
setProgress(++finishedFiles * 100 / fileCount);
@ -93,14 +98,16 @@ public class IndexContentFilesAction extends AbstractAction {
ingester.commit();
return null;
return problemFiles;
}
@Override
protected void done() {
int problemFiles = 0;
try {
if (!this.isCancelled()) {
get();
problemFiles = get();
}
} catch (InterruptedException ex) {
@ -111,6 +118,9 @@ public class IndexContentFilesAction extends AbstractAction {
} finally {
popUpWindow.setVisible(false);
popUpWindow.dispose();
if (problemFiles > 0) {
displayProblemFilesDialog(problemFiles);
}
}
}
@ -159,4 +169,17 @@ public class IndexContentFilesAction extends AbstractAction {
// display the window
popUpWindow.setVisible(true);
}
private void displayProblemFilesDialog(int problemFiles) {
final Component parentComponent = null; // Use default window frame.
final String message = "Had trouble indexing " + problemFiles + " of the files. See the log for details.";
final String title = "Problem indexing some files";
final int messageType = JOptionPane.WARNING_MESSAGE;
JOptionPane.showMessageDialog(
parentComponent,
message,
title,
messageType);
}
}

View File

@ -98,13 +98,16 @@ class Ingester {
try {
solr.request(up);
// should't get any checked exceptions, but Tika problems result in
// an unchecked SolrException
// should't get any checked exceptions,
} catch (IOException ex) {
throw new RuntimeException(ex);
// It's possible that we will have IO errors
throw new IngesterException("Problem reading file.", ex);
} catch (SolrServerException ex) {
// If there's a problem talking to Solr, something is fundamentally
// wrong with ingest
throw new RuntimeException(ex);
} catch (SolrException ex) {
// Tika problems result in an unchecked SolrException
ErrorCode ec = ErrorCode.getErrorCode(ex.code());
// When Tika has problems with a document, it throws a server error
@ -112,6 +115,7 @@ class Ingester {
if (ec.equals(ErrorCode.SERVER_ERROR)) {
throw new IngesterException("Problem posting file contents to Solr. SolrException error code: " + ec, ex);
} else {
// shouldn't get any other error codes
throw ex;
}
}