mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-17 18:17:43 +00:00
Implemented the second pass algorithm to improve mime type accuracy
This commit is contained in:
parent
616ac03da4
commit
aaa22f80d8
@ -109,7 +109,9 @@ public class FileTypeDetector {
|
||||
* Tika, and Autopsy file type definitions take precendence over Tika.
|
||||
*
|
||||
* @throws FileTypeDetectorInitException If an initialization error occurs,
|
||||
* e.g., user-defined file type definitions exist but cannot be loaded.
|
||||
* e.g., user-defined file type
|
||||
* definitions exist but cannot be
|
||||
* loaded.
|
||||
*/
|
||||
public FileTypeDetector() throws FileTypeDetectorInitException {
|
||||
try {
|
||||
@ -223,7 +225,7 @@ public class FileTypeDetector {
|
||||
ReadContentInputStream stream = new ReadContentInputStream(file);
|
||||
|
||||
try (TikaInputStream tikaInputStream = TikaInputStream.get(stream)) {
|
||||
String tikaType = tika.detect(tikaInputStream, file.getName());
|
||||
String tikaType = tika.detect(tikaInputStream);
|
||||
|
||||
/*
|
||||
* Remove the Tika suffix from the MIME type name.
|
||||
@ -234,6 +236,23 @@ public class FileTypeDetector {
|
||||
*/
|
||||
mimeType = removeOptionalParameter(mimeType);
|
||||
|
||||
/*
|
||||
* If Tika recognizes the file signature, then use the file
|
||||
* name to refine the type. In short, this is to exclude the
|
||||
* mime types that are determined solely by file extension.
|
||||
* More details in JIRA-4871.
|
||||
*/
|
||||
if (!mimeType.equals(MimeTypes.OCTET_STREAM)) {
|
||||
ReadContentInputStream secondPassStream = new ReadContentInputStream(file);
|
||||
try (TikaInputStream secondPassTikaStream = TikaInputStream.get(secondPassStream)) {
|
||||
tikaType = tika.detect(secondPassTikaStream, file.getName());
|
||||
mimeType = tikaType.replace("tika-", ""); //NON-NLS
|
||||
mimeType = removeOptionalParameter(mimeType);
|
||||
} catch (Exception ex) {
|
||||
throw ex;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* We cannot trust Tika's audio/mpeg mimetype. Lets verify the
|
||||
* first two bytes and confirm it is not 0xffff. Details in
|
||||
@ -275,6 +294,7 @@ public class FileTypeDetector {
|
||||
* first 4 bits.
|
||||
*
|
||||
* @param x byte
|
||||
*
|
||||
* @return Flag indicating the byte if 0xFF
|
||||
*/
|
||||
private boolean byteIs0xFF(byte x) {
|
||||
@ -287,6 +307,7 @@ public class FileTypeDetector {
|
||||
* @param file Abstract file to read
|
||||
* @param offset Offset to begin reading
|
||||
* @param n Number of bytes to read
|
||||
*
|
||||
* @return Byte array of size n
|
||||
*
|
||||
* @throws TskCoreException
|
||||
|
Loading…
x
Reference in New Issue
Block a user