From 20ddc1be249e84459605f493f9f78c10a2abb6c8 Mon Sep 17 00:00:00 2001 From: "U-BASIS\\dsmyda" Date: Thu, 24 Jan 2019 11:21:02 -0500 Subject: [PATCH] Add a check for the first two bytes of Tika output if audio/mpeg --- .../modules/filetypeid/FileTypeDetector.java | 73 +++++++++++++++---- 1 file changed, 58 insertions(+), 15 deletions(-) diff --git a/Core/src/org/sleuthkit/autopsy/modules/filetypeid/FileTypeDetector.java b/Core/src/org/sleuthkit/autopsy/modules/filetypeid/FileTypeDetector.java index 8c225381df..dace794e85 100644 --- a/Core/src/org/sleuthkit/autopsy/modules/filetypeid/FileTypeDetector.java +++ b/Core/src/org/sleuthkit/autopsy/modules/filetypeid/FileTypeDetector.java @@ -23,6 +23,7 @@ import java.util.Collections; import java.util.List; import java.util.SortedSet; import java.util.TreeSet; +import java.util.logging.Level; import java.util.stream.Collectors; import org.apache.tika.Tika; import org.apache.tika.io.TikaInputStream; @@ -57,7 +58,7 @@ public class FileTypeDetector { * @return A list of all detectable file types. * * @throws FileTypeDetectorInitException If an error occurs while assembling - * the list of types + * the list of types */ public static synchronized SortedSet getDetectedTypes() throws FileTypeDetectorInitException { TreeSet detectedTypes = new TreeSet<>((String string1, String string2) -> { @@ -108,9 +109,7 @@ public class FileTypeDetector { * Tika, and Autopsy file type definitions take precendence over Tika. * * @throws FileTypeDetectorInitException If an initialization error occurs, - * e.g., user-defined file type - * definitions exist but cannot be - * loaded. + * e.g., user-defined file type definitions exist but cannot be loaded. */ public FileTypeDetector() throws FileTypeDetectorInitException { try { @@ -140,7 +139,7 @@ public class FileTypeDetector { * user-defined MIME type by this detector. * * @param customTypes - * @param mimeType The MIME type name (e.g., "text/html"). + * @param mimeType The MIME type name (e.g., "text/html"). * * @return True or false. */ @@ -171,9 +170,9 @@ public class FileTypeDetector { * @param file The file to test. * * @return A MIME type name. If file type could not be detected, or results - * were uncertain, octet-stream is returned. - * - + * were uncertain, octet-stream is returned. + * + * */ public String getMIMEType(AbstractFile file) { /* @@ -235,6 +234,22 @@ public class FileTypeDetector { */ mimeType = removeOptionalParameter(mimeType); + /** + * We cannot trust Tika's audio/mpeg mimetype. Lets verify the + * first two bytes and confirm it is not 0xffff. Details in + * JIRA-4659 + */ + if (mimeType.contains("audio/mpeg")) { + try { + byte[] header = getNBytes(file, 0, 2); + if (byteIs0xFF(header[0]) && byteIs0xFF(header[1])) { + mimeType = MimeTypes.OCTET_STREAM; + } + } catch (TskCoreException ex) { + //Oh well, the mimetype is what it is. + logger.log(Level.WARNING, String.format("Could not verify audio/mpeg mimetype for file %s with id=%d", file.getName(), file.getId()), ex); + } + } } catch (Exception ignored) { /* * This exception is swallowed and not logged rather than @@ -255,6 +270,33 @@ public class FileTypeDetector { return mimeType; } + /** + * Determine if the byte is 255 (0xFF) by examining the last 4 bits and the + * first 4 bits. + * + * @param x byte + * @return Flag indicating the byte if 0xFF + */ + private boolean byteIs0xFF(byte x) { + return (x & 0x0F) == 0x0F && (x & 0xF0) == 0xF0; + } + + /** + * Retrieves the first N bytes from a file. + * + * @param file Abstract file to read + * @param offset Offset to begin reading + * @param n Number of bytes to read + * @return Byte array of size n + * + * @throws TskCoreException + */ + private byte[] getNBytes(AbstractFile file, int offset, int n) throws TskCoreException { + byte[] headerCache = new byte[n]; + file.read(headerCache, offset, n); + return headerCache; + } + /** * Removes the optional parameter from a MIME type string * @@ -280,7 +322,7 @@ public class FileTypeDetector { */ private String detectUserDefinedType(AbstractFile file) { String retValue = null; - + for (FileType fileType : userDefinedFileTypes) { if (fileType.matches(file)) { retValue = fileType.getMimeType(); @@ -291,7 +333,8 @@ public class FileTypeDetector { } /** - * Determines whether or not a file matches a custom file type defined by Autopsy. + * Determines whether or not a file matches a custom file type defined by + * Autopsy. * * @param file The file to test. * @@ -328,7 +371,7 @@ public class FileTypeDetector { * Constructs an exception to throw if an initialization error occurs, * e.g., user-defined file type definitions exist but cannot be loaded. * - * @param message The exception message, + * @param message The exception message, * @param throwable The underlying cause of the exception. */ FileTypeDetectorInitException(String message, Throwable throwable) { @@ -366,7 +409,7 @@ public class FileTypeDetector { * @return A MIME type name. * * @throws TskCoreException if detection is required and there is a problem - * writing the result to the case database. + * writing the result to the case database. * @deprecated Use getMIMEType instead, and call AbstractFile.setMIMEType * and AbstractFile.save to save the result to the file object and the * database. @@ -386,10 +429,10 @@ public class FileTypeDetector { * @param file The file. * * @return A MIME type name. If file type could not be detected or results - * were uncertain, octet-stream is returned. + * were uncertain, octet-stream is returned. * * @throws TskCoreException if detection is required and there is a problem - * writing the result to the case database. + * writing the result to the case database. * * @deprecated Use getMIMEType instead, and call AbstractFile.setMIMEType * and AbstractFile.save to save the result to the file object and the @@ -410,7 +453,7 @@ public class FileTypeDetector { * @param file The file to test. * * @return A MIME type name. If file type could not be detected or results - * were uncertain, octet-stream is returned. + * were uncertain, octet-stream is returned. * * @throws TskCoreException * @deprecated Use getMIMEType instead.