Merge pull request #4480 from dannysmyda/4659-audio-mpeg-fix

4659 Verify Tikas audio/mpeg mimetype
This commit is contained in:
Richard Cordovano 2019-02-11 12:38:48 -05:00 committed by GitHub
commit 2fa42f9386
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -23,6 +23,7 @@ import java.util.Collections;
import java.util.List; import java.util.List;
import java.util.SortedSet; import java.util.SortedSet;
import java.util.TreeSet; import java.util.TreeSet;
import java.util.logging.Level;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import org.apache.tika.Tika; import org.apache.tika.Tika;
import org.apache.tika.io.TikaInputStream; import org.apache.tika.io.TikaInputStream;
@ -57,7 +58,7 @@ public class FileTypeDetector {
* @return A list of all detectable file types. * @return A list of all detectable file types.
* *
* @throws FileTypeDetectorInitException If an error occurs while assembling * @throws FileTypeDetectorInitException If an error occurs while assembling
* the list of types * the list of types
*/ */
public static synchronized SortedSet<String> getDetectedTypes() throws FileTypeDetectorInitException { public static synchronized SortedSet<String> getDetectedTypes() throws FileTypeDetectorInitException {
TreeSet<String> detectedTypes = new TreeSet<>((String string1, String string2) -> { TreeSet<String> detectedTypes = new TreeSet<>((String string1, String string2) -> {
@ -108,9 +109,7 @@ public class FileTypeDetector {
* Tika, and Autopsy file type definitions take precendence over Tika. * Tika, and Autopsy file type definitions take precendence over Tika.
* *
* @throws FileTypeDetectorInitException If an initialization error occurs, * @throws FileTypeDetectorInitException If an initialization error occurs,
* e.g., user-defined file type * e.g., user-defined file type definitions exist but cannot be loaded.
* definitions exist but cannot be
* loaded.
*/ */
public FileTypeDetector() throws FileTypeDetectorInitException { public FileTypeDetector() throws FileTypeDetectorInitException {
try { try {
@ -140,7 +139,7 @@ public class FileTypeDetector {
* user-defined MIME type by this detector. * user-defined MIME type by this detector.
* *
* @param customTypes * @param customTypes
* @param mimeType The MIME type name (e.g., "text/html"). * @param mimeType The MIME type name (e.g., "text/html").
* *
* @return True or false. * @return True or false.
*/ */
@ -171,9 +170,9 @@ public class FileTypeDetector {
* @param file The file to test. * @param file The file to test.
* *
* @return A MIME type name. If file type could not be detected, or results * @return A MIME type name. If file type could not be detected, or results
* were uncertain, octet-stream is returned. * were uncertain, octet-stream is returned.
* *
*
*/ */
public String getMIMEType(AbstractFile file) { public String getMIMEType(AbstractFile file) {
/* /*
@ -235,6 +234,22 @@ public class FileTypeDetector {
*/ */
mimeType = removeOptionalParameter(mimeType); mimeType = removeOptionalParameter(mimeType);
/**
* We cannot trust Tika's audio/mpeg mimetype. Lets verify the
* first two bytes and confirm it is not 0xffff. Details in
* JIRA-4659
*/
if (mimeType.contains("audio/mpeg")) {
try {
byte[] header = getNBytes(file, 0, 2);
if (byteIs0xFF(header[0]) && byteIs0xFF(header[1])) {
mimeType = MimeTypes.OCTET_STREAM;
}
} catch (TskCoreException ex) {
//Oh well, the mimetype is what it is.
logger.log(Level.WARNING, String.format("Could not verify audio/mpeg mimetype for file %s with id=%d", file.getName(), file.getId()), ex);
}
}
} catch (Exception ignored) { } catch (Exception ignored) {
/* /*
* This exception is swallowed and not logged rather than * This exception is swallowed and not logged rather than
@ -255,6 +270,33 @@ public class FileTypeDetector {
return mimeType; return mimeType;
} }
/**
* Determine if the byte is 255 (0xFF) by examining the last 4 bits and the
* first 4 bits.
*
* @param x byte
* @return Flag indicating the byte if 0xFF
*/
private boolean byteIs0xFF(byte x) {
return (x & 0x0F) == 0x0F && (x & 0xF0) == 0xF0;
}
/**
* Retrieves the first N bytes from a file.
*
* @param file Abstract file to read
* @param offset Offset to begin reading
* @param n Number of bytes to read
* @return Byte array of size n
*
* @throws TskCoreException
*/
private byte[] getNBytes(AbstractFile file, int offset, int n) throws TskCoreException {
byte[] headerCache = new byte[n];
file.read(headerCache, offset, n);
return headerCache;
}
/** /**
* Removes the optional parameter from a MIME type string * Removes the optional parameter from a MIME type string
* *
@ -280,7 +322,7 @@ public class FileTypeDetector {
*/ */
private String detectUserDefinedType(AbstractFile file) { private String detectUserDefinedType(AbstractFile file) {
String retValue = null; String retValue = null;
for (FileType fileType : userDefinedFileTypes) { for (FileType fileType : userDefinedFileTypes) {
if (fileType.matches(file)) { if (fileType.matches(file)) {
retValue = fileType.getMimeType(); retValue = fileType.getMimeType();
@ -291,7 +333,8 @@ public class FileTypeDetector {
} }
/** /**
* Determines whether or not a file matches a custom file type defined by Autopsy. * Determines whether or not a file matches a custom file type defined by
* Autopsy.
* *
* @param file The file to test. * @param file The file to test.
* *
@ -328,7 +371,7 @@ public class FileTypeDetector {
* Constructs an exception to throw if an initialization error occurs, * Constructs an exception to throw if an initialization error occurs,
* e.g., user-defined file type definitions exist but cannot be loaded. * e.g., user-defined file type definitions exist but cannot be loaded.
* *
* @param message The exception message, * @param message The exception message,
* @param throwable The underlying cause of the exception. * @param throwable The underlying cause of the exception.
*/ */
FileTypeDetectorInitException(String message, Throwable throwable) { FileTypeDetectorInitException(String message, Throwable throwable) {
@ -366,7 +409,7 @@ public class FileTypeDetector {
* @return A MIME type name. * @return A MIME type name.
* *
* @throws TskCoreException if detection is required and there is a problem * @throws TskCoreException if detection is required and there is a problem
* writing the result to the case database. * writing the result to the case database.
* @deprecated Use getMIMEType instead, and call AbstractFile.setMIMEType * @deprecated Use getMIMEType instead, and call AbstractFile.setMIMEType
* and AbstractFile.save to save the result to the file object and the * and AbstractFile.save to save the result to the file object and the
* database. * database.
@ -386,10 +429,10 @@ public class FileTypeDetector {
* @param file The file. * @param file The file.
* *
* @return A MIME type name. If file type could not be detected or results * @return A MIME type name. If file type could not be detected or results
* were uncertain, octet-stream is returned. * were uncertain, octet-stream is returned.
* *
* @throws TskCoreException if detection is required and there is a problem * @throws TskCoreException if detection is required and there is a problem
* writing the result to the case database. * writing the result to the case database.
* *
* @deprecated Use getMIMEType instead, and call AbstractFile.setMIMEType * @deprecated Use getMIMEType instead, and call AbstractFile.setMIMEType
* and AbstractFile.save to save the result to the file object and the * and AbstractFile.save to save the result to the file object and the
@ -410,7 +453,7 @@ public class FileTypeDetector {
* @param file The file to test. * @param file The file to test.
* *
* @return A MIME type name. If file type could not be detected or results * @return A MIME type name. If file type could not be detected or results
* were uncertain, octet-stream is returned. * were uncertain, octet-stream is returned.
* *
* @throws TskCoreException * @throws TskCoreException
* @deprecated Use getMIMEType instead. * @deprecated Use getMIMEType instead.