mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-17 18:17:43 +00:00
Implemented the second pass algorithm to improve mime type accuracy
This commit is contained in:
parent
616ac03da4
commit
aaa22f80d8
@ -58,7 +58,7 @@ public class FileTypeDetector {
|
|||||||
* @return A list of all detectable file types.
|
* @return A list of all detectable file types.
|
||||||
*
|
*
|
||||||
* @throws FileTypeDetectorInitException If an error occurs while assembling
|
* @throws FileTypeDetectorInitException If an error occurs while assembling
|
||||||
* the list of types
|
* the list of types
|
||||||
*/
|
*/
|
||||||
public static synchronized SortedSet<String> getDetectedTypes() throws FileTypeDetectorInitException {
|
public static synchronized SortedSet<String> getDetectedTypes() throws FileTypeDetectorInitException {
|
||||||
TreeSet<String> detectedTypes = new TreeSet<>((String string1, String string2) -> {
|
TreeSet<String> detectedTypes = new TreeSet<>((String string1, String string2) -> {
|
||||||
@ -109,7 +109,9 @@ public class FileTypeDetector {
|
|||||||
* Tika, and Autopsy file type definitions take precendence over Tika.
|
* Tika, and Autopsy file type definitions take precendence over Tika.
|
||||||
*
|
*
|
||||||
* @throws FileTypeDetectorInitException If an initialization error occurs,
|
* @throws FileTypeDetectorInitException If an initialization error occurs,
|
||||||
* e.g., user-defined file type definitions exist but cannot be loaded.
|
* e.g., user-defined file type
|
||||||
|
* definitions exist but cannot be
|
||||||
|
* loaded.
|
||||||
*/
|
*/
|
||||||
public FileTypeDetector() throws FileTypeDetectorInitException {
|
public FileTypeDetector() throws FileTypeDetectorInitException {
|
||||||
try {
|
try {
|
||||||
@ -139,7 +141,7 @@ public class FileTypeDetector {
|
|||||||
* user-defined MIME type by this detector.
|
* user-defined MIME type by this detector.
|
||||||
*
|
*
|
||||||
* @param customTypes
|
* @param customTypes
|
||||||
* @param mimeType The MIME type name (e.g., "text/html").
|
* @param mimeType The MIME type name (e.g., "text/html").
|
||||||
*
|
*
|
||||||
* @return True or false.
|
* @return True or false.
|
||||||
*/
|
*/
|
||||||
@ -170,7 +172,7 @@ public class FileTypeDetector {
|
|||||||
* @param file The file to test.
|
* @param file The file to test.
|
||||||
*
|
*
|
||||||
* @return A MIME type name. If file type could not be detected, or results
|
* @return A MIME type name. If file type could not be detected, or results
|
||||||
* were uncertain, octet-stream is returned.
|
* were uncertain, octet-stream is returned.
|
||||||
*
|
*
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
@ -223,7 +225,7 @@ public class FileTypeDetector {
|
|||||||
ReadContentInputStream stream = new ReadContentInputStream(file);
|
ReadContentInputStream stream = new ReadContentInputStream(file);
|
||||||
|
|
||||||
try (TikaInputStream tikaInputStream = TikaInputStream.get(stream)) {
|
try (TikaInputStream tikaInputStream = TikaInputStream.get(stream)) {
|
||||||
String tikaType = tika.detect(tikaInputStream, file.getName());
|
String tikaType = tika.detect(tikaInputStream);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Remove the Tika suffix from the MIME type name.
|
* Remove the Tika suffix from the MIME type name.
|
||||||
@ -233,6 +235,23 @@ public class FileTypeDetector {
|
|||||||
* Remove the optional parameter from the MIME type.
|
* Remove the optional parameter from the MIME type.
|
||||||
*/
|
*/
|
||||||
mimeType = removeOptionalParameter(mimeType);
|
mimeType = removeOptionalParameter(mimeType);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If Tika recognizes the file signature, then use the file
|
||||||
|
* name to refine the type. In short, this is to exclude the
|
||||||
|
* mime types that are determined solely by file extension.
|
||||||
|
* More details in JIRA-4871.
|
||||||
|
*/
|
||||||
|
if (!mimeType.equals(MimeTypes.OCTET_STREAM)) {
|
||||||
|
ReadContentInputStream secondPassStream = new ReadContentInputStream(file);
|
||||||
|
try (TikaInputStream secondPassTikaStream = TikaInputStream.get(secondPassStream)) {
|
||||||
|
tikaType = tika.detect(secondPassTikaStream, file.getName());
|
||||||
|
mimeType = tikaType.replace("tika-", ""); //NON-NLS
|
||||||
|
mimeType = removeOptionalParameter(mimeType);
|
||||||
|
} catch (Exception ex) {
|
||||||
|
throw ex;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* We cannot trust Tika's audio/mpeg mimetype. Lets verify the
|
* We cannot trust Tika's audio/mpeg mimetype. Lets verify the
|
||||||
@ -275,6 +294,7 @@ public class FileTypeDetector {
|
|||||||
* first 4 bits.
|
* first 4 bits.
|
||||||
*
|
*
|
||||||
* @param x byte
|
* @param x byte
|
||||||
|
*
|
||||||
* @return Flag indicating the byte if 0xFF
|
* @return Flag indicating the byte if 0xFF
|
||||||
*/
|
*/
|
||||||
private boolean byteIs0xFF(byte x) {
|
private boolean byteIs0xFF(byte x) {
|
||||||
@ -284,9 +304,10 @@ public class FileTypeDetector {
|
|||||||
/**
|
/**
|
||||||
* Retrieves the first N bytes from a file.
|
* Retrieves the first N bytes from a file.
|
||||||
*
|
*
|
||||||
* @param file Abstract file to read
|
* @param file Abstract file to read
|
||||||
* @param offset Offset to begin reading
|
* @param offset Offset to begin reading
|
||||||
* @param n Number of bytes to read
|
* @param n Number of bytes to read
|
||||||
|
*
|
||||||
* @return Byte array of size n
|
* @return Byte array of size n
|
||||||
*
|
*
|
||||||
* @throws TskCoreException
|
* @throws TskCoreException
|
||||||
@ -371,7 +392,7 @@ public class FileTypeDetector {
|
|||||||
* Constructs an exception to throw if an initialization error occurs,
|
* Constructs an exception to throw if an initialization error occurs,
|
||||||
* e.g., user-defined file type definitions exist but cannot be loaded.
|
* e.g., user-defined file type definitions exist but cannot be loaded.
|
||||||
*
|
*
|
||||||
* @param message The exception message,
|
* @param message The exception message,
|
||||||
* @param throwable The underlying cause of the exception.
|
* @param throwable The underlying cause of the exception.
|
||||||
*/
|
*/
|
||||||
FileTypeDetectorInitException(String message, Throwable throwable) {
|
FileTypeDetectorInitException(String message, Throwable throwable) {
|
||||||
@ -409,7 +430,7 @@ public class FileTypeDetector {
|
|||||||
* @return A MIME type name.
|
* @return A MIME type name.
|
||||||
*
|
*
|
||||||
* @throws TskCoreException if detection is required and there is a problem
|
* @throws TskCoreException if detection is required and there is a problem
|
||||||
* writing the result to the case database.
|
* writing the result to the case database.
|
||||||
* @deprecated Use getMIMEType instead, and call AbstractFile.setMIMEType
|
* @deprecated Use getMIMEType instead, and call AbstractFile.setMIMEType
|
||||||
* and AbstractFile.save to save the result to the file object and the
|
* and AbstractFile.save to save the result to the file object and the
|
||||||
* database.
|
* database.
|
||||||
@ -429,10 +450,10 @@ public class FileTypeDetector {
|
|||||||
* @param file The file.
|
* @param file The file.
|
||||||
*
|
*
|
||||||
* @return A MIME type name. If file type could not be detected or results
|
* @return A MIME type name. If file type could not be detected or results
|
||||||
* were uncertain, octet-stream is returned.
|
* were uncertain, octet-stream is returned.
|
||||||
*
|
*
|
||||||
* @throws TskCoreException if detection is required and there is a problem
|
* @throws TskCoreException if detection is required and there is a problem
|
||||||
* writing the result to the case database.
|
* writing the result to the case database.
|
||||||
*
|
*
|
||||||
* @deprecated Use getMIMEType instead, and call AbstractFile.setMIMEType
|
* @deprecated Use getMIMEType instead, and call AbstractFile.setMIMEType
|
||||||
* and AbstractFile.save to save the result to the file object and the
|
* and AbstractFile.save to save the result to the file object and the
|
||||||
@ -453,7 +474,7 @@ public class FileTypeDetector {
|
|||||||
* @param file The file to test.
|
* @param file The file to test.
|
||||||
*
|
*
|
||||||
* @return A MIME type name. If file type could not be detected or results
|
* @return A MIME type name. If file type could not be detected or results
|
||||||
* were uncertain, octet-stream is returned.
|
* were uncertain, octet-stream is returned.
|
||||||
*
|
*
|
||||||
* @throws TskCoreException
|
* @throws TskCoreException
|
||||||
* @deprecated Use getMIMEType instead.
|
* @deprecated Use getMIMEType instead.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user