make tika extractor skip x-msdownload, in lieu of strings extraction

This commit is contained in:
adam-m 2013-03-29 11:21:22 -04:00
parent 9bffd1dc32
commit 0d8e56a7e9

View File

@ -272,7 +272,8 @@ public class AbstractFileTikaTextExtract implements AbstractFileExtract {
public boolean isSupported(AbstractFile file, String detectedFormat) {
if (detectedFormat == null) {
return false;
} else if (detectedFormat.equals("application/octet-stream")) {
} else if (detectedFormat.equals("application/octet-stream")
|| detectedFormat.equals("application/x-msdownload") ) {
//any binary unstructured blobs (string extraction will be used)
return false;
} else if (AbstractFileExtract.ARCHIVE_MIME_TYPES.contains(detectedFormat)) {