Merge branch 'develop' of https://github.com/sleuthkit/autopsy into 4114-AddHasCommentColumn

This commit is contained in:
William Schaefer 2018-09-10 11:19:38 -04:00
commit 7e8c1eec57
2 changed files with 112 additions and 105 deletions

View File

@ -41,6 +41,7 @@ public class AutopsyOptionProcessor extends OptionProcessor {
private static final Logger logger = Logger.getLogger(AutopsyOptionProcessor.class.getName());
private final Option liveAutopsyOption = Option.optionalArgument('l', "liveAutopsy");
// @@@ We should centralize where we store this. It is defined in 2 other places.
private final static String PROP_BASECASE = "LBL_BaseCase_PATH";
@ -56,13 +57,20 @@ public class AutopsyOptionProcessor extends OptionProcessor {
if(values.containsKey(liveAutopsyOption)){
try {
RuntimeProperties.setRunningInTarget(true);
String[] dir= values.get(liveAutopsyOption);
String directory = dir == null ? PlatformUtil.getUserDirectory().toString() : dir[0];
ModuleSettings.setConfigSetting(ModuleSettings.MAIN_SETTINGS, PROP_BASECASE, directory);
// get the starting folder to store cases in
String[] argDirs= values.get(liveAutopsyOption);
String startingCaseDir;
if (argDirs == null || argDirs.length == 0) {
startingCaseDir = PlatformUtil.getUserDirectory().toString();
}
else {
startingCaseDir = argDirs[0];
}
ModuleSettings.setConfigSetting(ModuleSettings.MAIN_SETTINGS, PROP_BASECASE, startingCaseDir);
} catch (RuntimeProperties.RuntimePropertiesException ex) {
logger.log(Level.SEVERE, ex.getMessage(), ex);
}
}
}
}

View File

@ -40,23 +40,25 @@ import org.sleuthkit.datamodel.AbstractFile;
import com.monitorjbl.xlsx.StreamingReader;
import org.apache.poi.hssf.OldExcelFormatException;
/**
* Reads excel files and implements the abstract reader api for interfacing with the
* content. Supports .xls and .xlsx files.
* Reads excel files and implements the abstract reader api for interfacing with
* the content. Supports .xls and .xlsx files.
*/
public final class ExcelReader extends AbstractReader {
/* Boilerplate code */
public final class ExcelReader extends AbstractReader {
/*
* Boilerplate code
*/
private final static IngestServices services = IngestServices.getInstance();
private final static Logger logger = services.getLogger(ExcelReader.class.getName());
private Workbook workbook;
private final static String XLSX_MIME_TYPE = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet";
private final static String XLS_MIME_TYPE = "application/vnd.ms-excel";
private final static String EMPTY_CELL_STRING = "";
private Map<String, Row> headerCache;
public ExcelReader(AbstractFile file, String localDiskPath, String mimeType)
public ExcelReader(AbstractFile file, String localDiskPath, String mimeType)
throws FileReaderInitException {
super(file, localDiskPath);
try {
@ -66,22 +68,25 @@ public final class ExcelReader extends AbstractReader {
throw new FileReaderInitException(ex);
}
}
/**
* Internal factory for creating the correct workbook given the mime type. The
* file reader factory in this module passes both the XLSMimeType and XLSXMimeType
* into this constructor for the reader to handle. This avoided the need for creating
* an AbstractExcelReader class and two sub classes overriding the workbook field.
* Additionally, I don't forsee needing to support more than these two mime types.
*
* Internal factory for creating the correct workbook given the mime type.
* The file reader factory in this module passes both the XLSMimeType and
* XLSXMimeType into this constructor for the reader to handle. This avoided
* the need for creating an AbstractExcelReader class and two sub classes
* overriding the workbook field. Additionally, I don't forsee needing to
* support more than these two mime types.
*
* @param localDiskPath To open an input stream for poi to read from
* @param mimeType The mimeType passed to the constructor
* @param mimeType The mimeType passed to the constructor
*
* @return The corrent workbook instance
* @throws IOException Issue with input stream and opening file location at
* localDiskPath
*
* @throws IOException Issue with input stream and opening file
* location at localDiskPath
* @throws FileReaderInitException mimetype unsupported
*/
private Workbook createWorkbook(String localDiskPath, String mimeType) throws
private Workbook createWorkbook(String localDiskPath, String mimeType) throws
IOException, FileReaderInitException {
switch (mimeType) {
case XLS_MIME_TYPE:
@ -100,17 +105,20 @@ public final class ExcelReader extends AbstractReader {
//and this can use the same functions below.
return StreamingReader.builder().rowCacheSize(500).open(new File(localDiskPath));
default:
throw new FileReaderInitException(String.format("Excel reader for mime " +
"type [%s] is not supported", mimeType));
throw new FileReaderInitException(String.format("Excel reader for mime "
+ "type [%s] is not supported", mimeType));
}
}
/**
* Returns the number of rows in a given excel table (aka sheet).
*
* Returns the number of rows in a given excel table (aka sheet).
*
* @param tableName Name of table to count total rows from
* @return row count for requested table name
* @throws org.sleuthkit.autopsy.tabulardatareader.AbstractReader.FileReaderException
*
* @return row count for requested table name
*
* @throws
* org.sleuthkit.autopsy.tabulardatareader.AbstractReader.FileReaderException
*/
@Override
public Integer getRowCountFromTable(String tableName) throws FileReaderException {
@ -118,38 +126,18 @@ public final class ExcelReader extends AbstractReader {
}
/**
* Returns a collection of all the rows from a given table in an excel document.
*
* Returns a collection of all the rows from a given table in an excel
* document.
*
* @param tableName Current sheet name being read
* @return A collection of row maps
* @throws org.sleuthkit.autopsy.tabulardatareader.AbstractReader.FileReaderException
*
* @return A collection of row maps
*
* @throws
* org.sleuthkit.autopsy.tabulardatareader.AbstractReader.FileReaderException
*/
@Override
public List<Map<String, Object>> getRowsFromTable(String tableName) throws FileReaderException {
//Pad with + 1 because rows are zero index, thus a LastRowNum() (in getRowCountFromTable()) of 1
//indicates that there are records in 0 and 1 and so a total row count of
//2. This also implies there is no way to determine if a workbook is empty,
//since a last row num of 0 doesnt differentiate between a record in 0 or
//nothing in the workbook. Such a HSSF.
return getRowsFromTable(tableName, 0, getRowCountFromTable(tableName));
}
/**
* Returns a window of rows starting at the offset and ending when the number of rows read
* equals the 'numRowsToRead' parameter or the iterator has nothing left to read.
*
* For instance: offset 1, numRowsToRead 5 would return 5 results (1-5).
* offset 0, numRowsToRead 5 would return 5 results (0-4).
*
* @param tableName Current name of sheet to be read
* @param offset start index to begin reading (documents are 0 indexed)
* @param numRowsToRead number of rows to read
* @return
* @throws org.sleuthkit.autopsy.tabulardatareader.AbstractReader.FileReaderException
*/
@Override
public List<Map<String, Object>> getRowsFromTable(String tableName,
int offset, int numRowsToRead) throws FileReaderException {
//StreamingReader maintains the same pointer to a sheet rowIterator, so this
//call returns an iterator that could have already been iterated on instead
//of a fresh copy. We must cache the header value from the call to
@ -157,58 +145,66 @@ public final class ExcelReader extends AbstractReader {
//missed.
Iterator<Row> sheetIter = workbook.getSheet(tableName).rowIterator();
List<Map<String, Object>> rowList = new ArrayList<>();
//Read the header value as the header may be a row of data in the
//excel sheet
if(headerCache.containsKey(tableName)) {
if (headerCache.containsKey(tableName)) {
Row header = headerCache.get(tableName);
if(header.getRowNum() >= offset
&& header.getRowNum() < (offset + numRowsToRead)) {
rowList.add(getRowMap(tableName, header));
}
rowList.add(getRowMap(tableName, header));
}
while(sheetIter.hasNext()) {
while (sheetIter.hasNext()) {
Row currRow = sheetIter.next();
//If the current row number is within the window of our row capture
if(currRow.getRowNum() >= offset
&& currRow.getRowNum() < (offset + numRowsToRead)) {
rowList.add(getRowMap(tableName, currRow));
}
//if current row number is equal to our upper bound
//of rows requested to be read.
if(currRow.getRowNum() >= (offset + numRowsToRead)) {
break;
}
rowList.add(getRowMap(tableName, currRow));
}
return rowList;
}
/**
* Currently not supported. Returns a window of rows starting at the offset
* and ending when the number of rows read equals the 'numRowsToRead'
* parameter or the iterator has nothing left to read.
*
* For instance: offset 1, numRowsToRead 5 would return 5 results (1-5).
* offset 0, numRowsToRead 5 would return 5 results (0-4).
*
* @param tableName Current name of sheet to be read
* @param offset start index to begin reading (documents are 0
* indexed)
* @param numRowsToRead number of rows to read
*
* @return
*
* @throws
* org.sleuthkit.autopsy.tabulardatareader.AbstractReader.FileReaderException
*/
@Override
public List<Map<String, Object>> getRowsFromTable(String tableName,
int offset, int numRowsToRead) throws FileReaderException {
throw new FileReaderException("Operation Not Supported.");
}
private Map<String, Object> getRowMap(String tableName, Row row) {
Map<String, Object> rowMap = new HashMap<>();
for(Cell cell : row) {
String columnName = getColumnName(cell, tableName);
for (Cell cell : row) {
Object value = getCellValue(cell);
rowMap.put(columnName, value);
rowMap.put(String.valueOf(cell.getColumnIndex()), value);
}
return rowMap;
}
/**
* Returns the value of a given cell. The correct value function must be
* Returns the value of a given cell. The correct value function must be
* called on a cell depending on its type, hence the switch.
*
*
* @param cell Cell object containing a getter function for its value type
*
* @return A generic object pointer to the cell's value
*/
private Object getCellValue(Cell cell){
private Object getCellValue(Cell cell) {
switch (cell.getCellTypeEnum()) {
case BOOLEAN:
return cell.getBooleanCellValue();
case STRING:
return cell.getRichStringCellValue().getString();
return cell.getStringCellValue();
case NUMERIC:
if (DateUtil.isCellDateFormatted(cell)) {
return cell.getDateCellValue();
@ -222,20 +218,21 @@ public final class ExcelReader extends AbstractReader {
return EMPTY_CELL_STRING;
}
}
/**
* Returns the name of the column that the cell currently lives in
* Cell Value: 6784022342 -> Header name: Phone Number
*
* @param cell current cell being read
* Returns the name of the column that the cell currently lives in Cell
* Value: 6784022342 -> Header name: Phone Number
*
* @param cell current cell being read
* @param tableName current sheet name being read
*
* @return the name of the column the current cell lives in
*/
private String getColumnName(Cell cell, String tableName) {
if(headerCache.containsKey(tableName)) {
if (headerCache.containsKey(tableName)) {
Row header = headerCache.get(tableName);
Cell columnHeaderCell = header.getCell(cell.getRowIndex());
if(columnHeaderCell == null) {
if (columnHeaderCell == null) {
return EMPTY_CELL_STRING;
}
Object columnHeaderValue = getCellValue(columnHeaderCell);
@ -246,18 +243,20 @@ public final class ExcelReader extends AbstractReader {
}
/**
* Returns a map of sheet names to headers (header is in a comma-seperated string).
* Warning: Only call this ONCE per excel file.
*
* @return A map of sheet names to header strings.
* @throws org.sleuthkit.autopsy.tabulardatareader.AbstractReader.FileReaderException
* Returns a map of sheet names to headers (header is in a comma-seperated
* string). Warning: Only call this ONCE per excel file.
*
* @return A map of sheet names to header strings.
*
* @throws
* org.sleuthkit.autopsy.tabulardatareader.AbstractReader.FileReaderException
*/
@Override
public Map<String, String> getTableSchemas() throws FileReaderException {
public Map<String, String> getTableSchemas() throws FileReaderException {
Map<String, String> tableSchemas = new HashMap<>();
for(Sheet sheet : workbook) {
for (Sheet sheet : workbook) {
Iterator<Row> iterator = sheet.rowIterator();
if(iterator.hasNext()) {
if (iterator.hasNext()) {
//Consume header
Row header = iterator.next();
headerCache.put(sheet.getSheetName(), header);
@ -265,10 +264,10 @@ public final class ExcelReader extends AbstractReader {
tableSchemas.put(sheet.getSheetName(), headerStringFormat);
}
}
return tableSchemas;
}
@Override
public void close() {
try {