Skip bad files during ingest

Try to continue ingesting files even if errors reading them or processing them on the server are encountered.
This commit is contained in:
Peter J. Martel 2011-11-30 16:34:33 -05:00
parent 6338302fd6
commit 924dcecfd5
2 changed files with 35 additions and 8 deletions

View File

@ -18,6 +18,7 @@
*/ */
package org.sleuthkit.autopsy.keywordsearch; package org.sleuthkit.autopsy.keywordsearch;
import java.awt.Component;
import java.awt.Dimension; import java.awt.Dimension;
import java.awt.Toolkit; import java.awt.Toolkit;
import java.awt.event.ActionEvent; import java.awt.event.ActionEvent;
@ -32,6 +33,8 @@ import java.util.logging.Logger;
import javax.swing.AbstractAction; import javax.swing.AbstractAction;
import javax.swing.JDialog; import javax.swing.JDialog;
import javax.swing.JFrame; import javax.swing.JFrame;
import javax.swing.JOptionPane;
import javax.swing.SwingUtilities;
import javax.swing.SwingWorker; import javax.swing.SwingWorker;
import org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException; import org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException;
import org.sleuthkit.datamodel.Content; import org.sleuthkit.datamodel.Content;
@ -62,10 +65,10 @@ public class IndexContentFilesAction extends AbstractAction {
// initialize panel // initialize panel
final IndexProgressPanel panel = new IndexProgressPanel(); final IndexProgressPanel panel = new IndexProgressPanel();
final SwingWorker task = new SwingWorker<Void, String>() { final SwingWorker task = new SwingWorker<Integer, String>() {
@Override @Override
protected Void doInBackground() throws Exception { protected Integer doInBackground() throws Exception {
Ingester ingester = new Ingester("http://localhost:8983/solr"); Ingester ingester = new Ingester("http://localhost:8983/solr");
Collection<FsContent> files = c.accept(new GetIngestableFilesContentVisitor()); Collection<FsContent> files = c.accept(new GetIngestableFilesContentVisitor());
@ -74,10 +77,11 @@ public class IndexContentFilesAction extends AbstractAction {
int fileCount = files.size(); int fileCount = files.size();
int finishedFiles = 0; int finishedFiles = 0;
int problemFiles = 0;
for (FsContent f : files) { for (FsContent f : files) {
if (isCancelled()) { if (isCancelled()) {
return null; return problemFiles;
} }
this.publish("Indexing " + (finishedFiles + 1) + "/" + fileCount + ": " + f.getName()); this.publish("Indexing " + (finishedFiles + 1) + "/" + fileCount + ": " + f.getName());
@ -86,6 +90,7 @@ public class IndexContentFilesAction extends AbstractAction {
ingester.ingest(f); ingester.ingest(f);
} catch (IngesterException ex) { } catch (IngesterException ex) {
logger.log(Level.INFO, "Ingester had a problem with file '" + f.getName() + "' (id: " + f.getId() + ").", ex); logger.log(Level.INFO, "Ingester had a problem with file '" + f.getName() + "' (id: " + f.getId() + ").", ex);
problemFiles++;
} }
setProgress(++finishedFiles * 100 / fileCount); setProgress(++finishedFiles * 100 / fileCount);
@ -93,14 +98,16 @@ public class IndexContentFilesAction extends AbstractAction {
ingester.commit(); ingester.commit();
return null; return problemFiles;
} }
@Override @Override
protected void done() { protected void done() {
int problemFiles = 0;
try { try {
if (!this.isCancelled()) { if (!this.isCancelled()) {
get(); problemFiles = get();
} }
} catch (InterruptedException ex) { } catch (InterruptedException ex) {
@ -111,6 +118,9 @@ public class IndexContentFilesAction extends AbstractAction {
} finally { } finally {
popUpWindow.setVisible(false); popUpWindow.setVisible(false);
popUpWindow.dispose(); popUpWindow.dispose();
if (problemFiles > 0) {
displayProblemFilesDialog(problemFiles);
}
} }
} }
@ -159,4 +169,17 @@ public class IndexContentFilesAction extends AbstractAction {
// display the window // display the window
popUpWindow.setVisible(true); popUpWindow.setVisible(true);
} }
private void displayProblemFilesDialog(int problemFiles) {
final Component parentComponent = null; // Use default window frame.
final String message = "Had trouble indexing " + problemFiles + " of the files. See the log for details.";
final String title = "Problem indexing some files";
final int messageType = JOptionPane.WARNING_MESSAGE;
JOptionPane.showMessageDialog(
parentComponent,
message,
title,
messageType);
}
} }

View File

@ -98,13 +98,16 @@ class Ingester {
try { try {
solr.request(up); solr.request(up);
// should't get any checked exceptions, but Tika problems result in // should't get any checked exceptions,
// an unchecked SolrException
} catch (IOException ex) { } catch (IOException ex) {
throw new RuntimeException(ex); // It's possible that we will have IO errors
throw new IngesterException("Problem reading file.", ex);
} catch (SolrServerException ex) { } catch (SolrServerException ex) {
// If there's a problem talking to Solr, something is fundamentally
// wrong with ingest
throw new RuntimeException(ex); throw new RuntimeException(ex);
} catch (SolrException ex) { } catch (SolrException ex) {
// Tika problems result in an unchecked SolrException
ErrorCode ec = ErrorCode.getErrorCode(ex.code()); ErrorCode ec = ErrorCode.getErrorCode(ex.code());
// When Tika has problems with a document, it throws a server error // When Tika has problems with a document, it throws a server error
@ -112,6 +115,7 @@ class Ingester {
if (ec.equals(ErrorCode.SERVER_ERROR)) { if (ec.equals(ErrorCode.SERVER_ERROR)) {
throw new IngesterException("Problem posting file contents to Solr. SolrException error code: " + ec, ex); throw new IngesterException("Problem posting file contents to Solr. SolrException error code: " + ec, ex);
} else { } else {
// shouldn't get any other error codes
throw ex; throw ex;
} }
} }