This commit is contained in:
Brian Carrier 2013-08-13 11:17:52 -04:00
commit fe5b1d7db7

View File

@ -120,12 +120,11 @@ public class AbstractFileTikaTextExtract implements AbstractFileExtract {
boolean success = false; boolean success = false;
Reader reader = null; Reader reader = null;
final InputStream stream = new ReadContentInputStream(sourceFile); final InputStream stream = new ReadContentInputStream(sourceFile);
try { try {
Metadata meta = new Metadata(); Metadata meta = new Metadata();
//Tika parse request with timeout
//Parse the file in a task
Tika tika = new Tika(); //new tika instance for every file, to workaround tika memory issues Tika tika = new Tika(); //new tika instance for every file, to workaround tika memory issues
ParseRequestTask parseTask = new ParseRequestTask(tika, stream, meta, sourceFile); ParseRequestTask parseTask = new ParseRequestTask(tika, stream, meta, sourceFile);
final Future<?> future = tikaParseExecutor.submit(parseTask); final Future<?> future = tikaParseExecutor.submit(parseTask);
@ -145,14 +144,16 @@ public class AbstractFileTikaTextExtract implements AbstractFileExtract {
throw new IngesterException(msg); throw new IngesterException(msg);
} }
// get the reader with the results
reader = parseTask.getReader(); reader = parseTask.getReader();
if (reader == null) { if (reader == null) {
//likely due to exception in parse() //likely due to exception in parse()
logger.log(Level.WARNING, "No reader available from Tika parse"); logger.log(Level.WARNING, "No reader available from Tika parse");
return false; return false;
} }
// break the results into chunks and index
success = true; success = true;
long readSize; long readSize;
long totalRead = 0; long totalRead = 0;
@ -180,8 +181,6 @@ public class AbstractFileTikaTextExtract implements AbstractFileExtract {
//this is the last chunk //this is the last chunk
eof = true; eof = true;
} }
} }
//logger.log(Level.INFO, "TOTAL READ SIZE: " + totalRead + " file: " + sourceFile.getName()); //logger.log(Level.INFO, "TOTAL READ SIZE: " + totalRead + " file: " + sourceFile.getName());
@ -293,8 +292,8 @@ public class AbstractFileTikaTextExtract implements AbstractFileExtract {
} }
/** /**
* Runnable and timeable task that calls tika to parse the content using * Runnable task that calls tika to parse the content using
* streaming * the input stream. Provides reader for results.
*/ */
private static class ParseRequestTask implements Runnable { private static class ParseRequestTask implements Runnable {