This commit is contained in:
Brian Carrier 2013-08-13 11:17:52 -04:00
commit fe5b1d7db7

View File

@ -120,12 +120,11 @@ public class AbstractFileTikaTextExtract implements AbstractFileExtract {
boolean success = false;
Reader reader = null;
final InputStream stream = new ReadContentInputStream(sourceFile);
try {
Metadata meta = new Metadata();
//Tika parse request with timeout
//Parse the file in a task
Tika tika = new Tika(); //new tika instance for every file, to workaround tika memory issues
ParseRequestTask parseTask = new ParseRequestTask(tika, stream, meta, sourceFile);
final Future<?> future = tikaParseExecutor.submit(parseTask);
@ -145,14 +144,16 @@ public class AbstractFileTikaTextExtract implements AbstractFileExtract {
throw new IngesterException(msg);
}
// get the reader with the results
reader = parseTask.getReader();
if (reader == null) {
//likely due to exception in parse()
logger.log(Level.WARNING, "No reader available from Tika parse");
return false;
}
// break the results into chunks and index
success = true;
long readSize;
long totalRead = 0;
@ -180,8 +181,6 @@ public class AbstractFileTikaTextExtract implements AbstractFileExtract {
//this is the last chunk
eof = true;
}
}
//logger.log(Level.INFO, "TOTAL READ SIZE: " + totalRead + " file: " + sourceFile.getName());
@ -293,8 +292,8 @@ public class AbstractFileTikaTextExtract implements AbstractFileExtract {
}
/**
* Runnable and timeable task that calls tika to parse the content using
* streaming
* Runnable task that calls tika to parse the content using
* the input stream. Provides reader for results.
*/
private static class ParseRequestTask implements Runnable {