Merge branch 'scalpel-carver' of https://github.com/tmciver-basis/autopsy into scalpel-carver

This commit is contained in:
adam-m 2013-04-16 15:06:44 -04:00
commit d03e7e9dfa
2 changed files with 336 additions and 17 deletions

View File

@ -57,7 +57,7 @@ public class ScalpelCarverIngestModule implements IngestModuleAbstractFile {
private final String MODULE_NAME = "Scalpel Carver";
private final String MODULE_DESCRIPTION = "Carves files from unallocated space at ingest time.\nCarved files are reanalyzed and displayed in the directory tree.";
private final String MODULE_VERSION = "1.0";
private String configFileName = "scalpel.cfg";
private String configFileName = "scalpel.conf";
private String configFilePath;
private boolean initialized = false;
@ -194,19 +194,6 @@ public class ScalpelCarverIngestModule implements IngestModuleAbstractFile {
return instance;
}
private String getScalpelConfigPath() throws IOException {
// create path to scalpel config file in user's home directory
String scalpelConfig = PlatformUtil.getUserConfigDirectory()
+ File.pathSeparator + configFileName;
// copy the default config file to the user's home directory if one
// is not already there
PlatformUtil.extractResourceToUserConfigDir(this.getClass(), configFileName);
return scalpelConfig;
}
@Override
public void init(IngestModuleInit initContext) {
@ -217,14 +204,25 @@ public class ScalpelCarverIngestModule implements IngestModuleAbstractFile {
return;
}
// get the path to the config file
// create path to scalpel config file in user's home directory
configFilePath = PlatformUtil.getUserConfigDirectory()
+ File.pathSeparator + configFileName;
// copy the default config file to the user's home directory if one
// is not already there
boolean succeeded = false;
try {
configFilePath = getScalpelConfigPath();
succeeded = PlatformUtil.extractResourceToUserConfigDir(this.getClass(), configFileName);
} catch (IOException ex) {
logger.log(Level.SEVERE, "Could not obtain the path to the Scalpel configuration file.", ex);
return;
}
// we're not initialize if we failed
if (!succeeded) {
return;
}
initialized = true;
}

View File

@ -0,0 +1,321 @@
# Scalpel configuration file
# This configuration file controls the types and sizes of files that
# are carved by Scalpel. NOTE THAT THE FORMAT OF THIS FILE WAS
# EXTENDED in Scalpel 1.90-->!
# For each file type, the configuration file describes the file's
# extension, whether the header and footer are case sensitive, the
# min/maximum file size, and the header and footer for the file. The
# footer field is optional, but extension, case sensitivity, size, and
# footer are required. Any line that begins with a '#' is considered
# a comment and ignored. Thus, to skip a file type just put a '#' at
# the beginning of the line containing the rule for the file type.
# If you want files carved without filename extensions, use "NONE" in
# the extension column.
# Beginning with Scalpel 1.90, HEADERS AND/OR FOOTERS MAY BE EITHER
# FIXED STRINGS OR REGULAR EXPRESSIONS.
# Headers and footers are decoded before use, unless they are regular
# expressions. To specify a value in hexadecimal use \x[0-f][0-f] and
# for octal use \[0-3][0-7][0-7]. Spaces can be represented by
# \s. Example: "\x4F\123\I\sCCI" decodes to "OSI CCI".
# To match any single character (aka a wildcard) in a non-regular
# expression header/footer, use a '?'. If you need to search for the
# '?' character, you will need to change the 'wildcard' line *and*
# every occurrence of the old wildcard character in the configuration
# file.
# Regular expressions in extended format can be specified for headers
# or footers by bracketing a header or footer with //, e.g., /GGG[^G]/
# matches a string of three G characters, followed by a character
# other than G. To clarify, here is a complete rule for a file type
# that should be at most 100000 characters, must begin with three G's
# followed by a non-G character and terminate with at least one digit
# character (0-9) followed by five H characters:
# XXX y 100000 /GGG[^G]/ /[0-9]HHHHH/
# Beginning with Scalpel 1.90, minimum carve sizes may be specified
# for each file type using this format for the size parameter:
# smallest:largest e.g.,
# jpg y 5000:100000 \xff\xd8\xff\xe0\x00\x10 \xff\xd9
# carves JPG format image files between 5000 and 100000 bytes in
# length, ignoring files smaller than 5000 bytes. If the minimum
# carve size is not specified, 0 is assumed. This maintains
# compatibility with Scalpel configuration files created prior to
# 1.90.
# The REVERSE keyword after a footer causes a search
# backwards starting from [size] bytes beyond the location of the header
# This is useful for files like PDFs that may contain multiple copies of
# the footer throughout the file. When using the REVERSE keyword you will
# extract bytes from the header to the LAST occurence of the footer (and
# including the footer in the carved file).
# The NEXT keyword after a footer results in file carves that
# include the header and all data BEFORE the first occurence of the
# footer (the footer is not included in the carved file). If no
# occurrence of the footer is discovered within maximum carve size bytes
# from the header, then a block of the disk image including the header
# and with length equal to the maximum carve size is carved. Use NEXT
# when there is no definitive footer for a file type, but you know which
# data should NOT be included in a carved file--e.g., the beginning of
# a subsequent file of the same type.
# FORWARD_NEXT is the default carve type and this keyword may be
# included after the footer, but is not required. For FORWARD_NEXT
# carves, a block of data including the header and the first footer
# (within the maximum carve size) are carved. If no footer appears
# after the header within the maximum carve size, then no carving is
# performed UNLESS the -b command line option is supplied. In this case,
# a block of max carve size bytes, including the header, is carved and a
# notation is made in the Scalpel log that the file was chopped.
# To redefine the wildcard character, change the setting below and all
# occurences in the formost.conf file.
#
#wildcard ?
# case size header footer
#extension sensitive
#
#---------------------------------------------------------------------
# EXAMPLE WITH NO SUFFIX
#---------------------------------------------------------------------
#
# Here is an example of how to use the no extension option. Any files
# beginning with the string "FOREMOST" are carved and no file extensions
# are used. No footer is defined and the max carve size is 1000 bytes.
#
# NONE y 1000 FOREMOST
#
#---------------------------------------------------------------------
# GRAPHICS FILES
#---------------------------------------------------------------------
#
#
# AOL ART files
# art y 150000 \x4a\x47\x04\x0e \xcf\xc7\xcb
# art y 150000 \x4a\x47\x03\x0e \xd0\xcb\x00\x00
#
# GIF and JPG files (very common)
gif y 5000000 \x47\x49\x46\x38\x37\x61 \x00\x3b
gif y 5000000 \x47\x49\x46\x38\x39\x61 \x00\x00\x3b
jpg y 200000000 \xff\xd8\xff\xe0\x00\x10 \xff\xd9
# jpg y 200000000 \xff\xd8\xff\xe1 \xff\xd9
#
#
# PNG
png y 20000000 \x50\x4e\x47? \xff\xfc\xfd\xfe
#
#
# BMP (used by MSWindows, use only if you have reason to think there are
# BMP files worth digging for. This often kicks back a lot of false
# positives
#
# bmp y 100000 BM??\x00\x00\x00
#
# TIFF
tif y 200000000 \x49\x49\x2a\x00
# TIFF
tif y 200000000 \x4D\x4D\x00\x2A
#
#---------------------------------------------------------------------
# VIDEO AND AUDIO FILES
#---------------------------------------------------------------------
#
# AVI (Windows animation and DiVX/MPEG-4 movies)
avi y 50000000 RIFF????AVI
#
# APPLE QUICKTIME
# These needles are based on the file command's magic. I don't
# recommend uncommenting the 4th and 5th Quicktime needles unless
# you're sure you need to, because they generate HUGE numbers of
# false positives.
#
# mov y 10000000 ????moov
# mov y 10000000 ????mdat
# mov y 10000000 ????widev
# mov y 10000000 ????skip
# mov y 10000000 ????free
# mov y 10000000 ????idsc
# mov y 10000000 ????pckg
#
# MPEG Video
mpg y 50000000 \x00\x00\x01\xba \x00\x00\x01\xb9
mpg y 50000000 \x00\x00\x01\xb3 \x00\x00\x01\xb7
#
# FLASH
fws y 4000000 FWS
#
# WAV format
wav y 200000 RIFF????WAVE
#
# REAL AUDIO
# ra y 1000000 .RMF
# ra y 1000000 \x2e\x72\x61\xfd
#
# asf y 8000000 \x30\x26\xB2\x75\x8E\x66\xCF\x11\xA6\xD9\x00\xAA\x00\x62\xCE\x6C
#
# WMV/WMA
# wmv y 20000000 \x30\x26\xB2\x75\x8E\x66\xCF\x11\xA6\xD9\x00\xAA\x00\x62\xCE\x6C
#
# wma y 8000000 \x30\x26\xB2\x75 \x00\x00\x00\xFF
#
# wma y 8000000 \x30\x26\xB2\x75 \x52\x9A\x12\x46
#
# MP3
# mp3 y 8000000 \xFF\xFB??\x44\x00\x00
# mp3 y 8000000 \x57\x41\x56\45 \x00\x00\xFF\
# mp3 y 8000000 \xFF\xFB\xD0\ \xD1\x35\x51\xCC\
# mp3 y 8000000 \x49\x44\x33\
# mp3 y 8000000 \x4C\x41\x4D\x45\
#
#---------------------------------------------------------------------
# MICROSOFT OFFICE
#---------------------------------------------------------------------
#
# Word documents
#
doc y 10000000 \xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1\x00\x00 \xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1\x00\x00 NEXT
doc y 10000000 \xd0\xcf\x11\xe0\xa1\xb1
#
# Outlook files
pst y 500000000 \x21\x42\x4e\xa5\x6f\xb5\xa6
ost y 500000000 \x21\x42\x44\x4e
#
# Outlook Express
dbx y 10000000 \xcf\xad\x12\xfe\xc5\xfd\x74\x6f
idx y 10000000 \x4a\x4d\x46\x39
mbx y 10000000 \x4a\x4d\x46\x36
#
#---------------------------------------------------------------------
# WORDPERFECT
#---------------------------------------------------------------------
#
# wpc y 1000000 ?WPC
#
#---------------------------------------------------------------------
# HTML
#---------------------------------------------------------------------
#
htm n 50000 <html </html>
#
#---------------------------------------------------------------------
# ADOBE PDF
#---------------------------------------------------------------------
#
pdf y 5000000 %PDF %EOF\x0d REVERSE
pdf y 5000000 %PDF %EOF\x0a REVERSE
#
#---------------------------------------------------------------------
# AOL (AMERICA ONLINE)
#---------------------------------------------------------------------
#
# AOL Mailbox
# mail y 500000 \x41\x4f\x4c\x56\x4d
#
#---------------------------------------------------------------------
# RPM (Linux package format)
#---------------------------------------------------------------------
# rpm y 1000000 \xed\xab
#---------------------------------------------------------------------
# WINDOWS REGISTRY FILES
#---------------------------------------------------------------------
#
# Windows NT registry
# dat y 4000000 regf
# Windows 95 registry
# dat y 4000000 CREG
#
#---------------------------------------------------------------------
# MISCELLANEOUS
#---------------------------------------------------------------------
#
zip y 10000000 PK\x03\x04 \x3c\xac
# rar y 10000000 Rar!
java y 1000000 \xca\xfe\xba\xbe
#
#---------------------------------------------------------------------
# ScanSoft PaperPort "Max" files
#---------------------------------------------------------------------
# max y 1000000 \x56\x69\x47\x46\x6b\x1a\x00\x00\x00\x00 \x00\x00\x05\x80\x00\x00
#---------------------------------------------------------------------
# PINs Password Manager program
#---------------------------------------------------------------------
# pins y 8000 \x50\x49\x4e\x53\x20\x34\x2e\x32\x30\x0d
#---------------------------------------------------------------------
# Experimental header for Virtual Box disks
# vbox y 10000000000 <<<????????????????????????????????????????????????????????????\x00\x7f\x10\xda\xbe
#---------------------------------------------------------------------
# Tar/gzip files
tgz y 2000000 \x1f\x8b\x08\x08
#---------------------------------------------------------------------
# 7-zip (courtesy of Brandon de Graaf)
# 7z y 2147483648 \x37\x7a\xbc\xaf\x27\x1c
#---------------------------------------------------------------------
# OGG (courtesy of Daniek Weuthen)
ogg y 15728640 x4fx67x67x53x00x02 x4fx67x67x53x00x02 NEXT
#---------------------------------------------------------------------
# LNK files (courtesy of Christina Dijkshoorn)
#
# lnk y 4000 \x4c\x00\x00\x00\x01\x14\x02\x00\x00\x00\x00\x00\xc0\x00\x00\x00\x00\x00\x00\x46
#---------------------------------------------------------------------
# Print spool files (courtesy of Christina Dijkshoorn)
#
# Windows XP
# shd y 2000 \x67\x49\x00\x00
# Windows 98
# shd y 2000 \x4B\x49\x00\x00
#---------------------------------------------------------------------
# Blender 3D and Finale Music (courtesy of Rick Spoketire)
# blend y 1000000000 BLENDER_v ENDB
#
# mus y 1000000000 ENIGMA\x20BINARY\x20FILE \x13\x00\x06\x00\x00\x00
#
#---------------------------------------------------------------------
#---------------------------------------------------------------------
#---------------------------------------------------------------------
# iPhone headers/footers w/ associated explanations, courtesy of
# Jonathan A. Zdziarski
#
#
# Dynamic dictionary files are keyboard caches used for learning
# specific spellings of words used frequently by the iPhones
# user
#
# dat y 8192 DynamicDictionary
#
# The AMR codec is an audio codec designed by Ericsson. It yields high
# quality audio playback for voice content. AMR is used on the iPhone to
# deliver voicemail messages. To extract longer chunks of voicemail
# messages, adjust the file size specified above.
#
# amr y 65535 #!AMR
#
# A .plist file is a configuration file used heavily in the Mac OS
# world, including the iPhone. Many preloaded applications, as well as
# Apples operating system components, use .plist files to store
# anything from basic configuration data to history and cache
# information. By examining these files, the technician can get an idea
# of what websites the suspect may have previously visited, even after
# deleting a cache. Other useful information may include location lookup
# caches (revealing maps the suspect has looked up), mail server
# information, etc.
#
# plist y 4096 <plist </plist
#
# Simple email header
#
# email y 4096 From:
#
#---------------------------------------------------------------------
#---------------------------------------------------------------------
#---------------------------------------------------------------------