make tika extractor skip x-msdownload, in lieu of strings extraction

This commit is contained in:
adam-m 2013-03-29 11:21:22 -04:00
parent 9bffd1dc32
commit 0d8e56a7e9

View File

@ -272,7 +272,8 @@ public class AbstractFileTikaTextExtract implements AbstractFileExtract {
public boolean isSupported(AbstractFile file, String detectedFormat) { public boolean isSupported(AbstractFile file, String detectedFormat) {
if (detectedFormat == null) { if (detectedFormat == null) {
return false; return false;
} else if (detectedFormat.equals("application/octet-stream")) { } else if (detectedFormat.equals("application/octet-stream")
|| detectedFormat.equals("application/x-msdownload") ) {
//any binary unstructured blobs (string extraction will be used) //any binary unstructured blobs (string extraction will be used)
return false; return false;
} else if (AbstractFileExtract.ARCHIVE_MIME_TYPES.contains(detectedFormat)) { } else if (AbstractFileExtract.ARCHIVE_MIME_TYPES.contains(detectedFormat)) {