Major speed improvements, still need to comment and refactor and clean up

This commit is contained in:
U-BASIS\dsmyda 2018-09-10 11:37:41 -04:00
parent 8e0cd3b700
commit b4628e745f

View File

@ -27,6 +27,7 @@ import java.util.LinkedList;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.logging.Level; import java.util.logging.Level;
import javax.swing.text.Segment;
import org.sleuthkit.autopsy.coreutils.Logger; import org.sleuthkit.autopsy.coreutils.Logger;
import org.sleuthkit.autopsy.tabulardatareader.AbstractReader; import org.sleuthkit.autopsy.tabulardatareader.AbstractReader;
import org.sleuthkit.autopsy.tabulardatareader.AbstractReader.FileReaderInitException; import org.sleuthkit.autopsy.tabulardatareader.AbstractReader.FileReaderInitException;
@ -41,14 +42,18 @@ import org.sleuthkit.autopsy.tabulardatareader.FileReaderFactory;
* Tika problems: * Tika problems:
* 1) Tika fails to open virtual tables * 1) Tika fails to open virtual tables
* 2) Tika fails to open tables with spaces in table name * 2) Tika fails to open tables with spaces in table name
* 3) Tika fails to include the table names in output (except for the first table it parses) * 3) Tika fails to include the table names in output (except for the
* 4) BasisTech > Apache * first table it parses)
* *
*/ */
public class SqliteTextExtractor extends ContentTextExtractor { public class SqliteTextExtractor extends ContentTextExtractor {
private final String SQLITE_MIMETYPE = "application/x-sqlite3"; private final String SQLITE_MIMETYPE = "application/x-sqlite3";
private static final Logger logger = Logger.getLogger(SqliteTextExtractor.class.getName()); private static final Logger logger = Logger.getLogger(SqliteTextExtractor.class.getName());
private final CharSequence EMPTY_CHARACTER_SEQUENCE = "";
LinkedList<String> databaseContents;
Integer characterCount = 0;
@Override @Override
boolean isContentTypeSpecific() { boolean isContentTypeSpecific() {
@ -90,17 +95,11 @@ public class SqliteTextExtractor extends ContentTextExtractor {
*/ */
@Override @Override
public Reader getReader(Content source) throws TextExtractorException { public Reader getReader(Content source) throws TextExtractorException {
StringBuilder databaseBuffer = new StringBuilder();
try (AbstractReader reader = FileReaderFactory.createReader( try (AbstractReader reader = FileReaderFactory.createReader(
SQLITE_MIMETYPE, source)) { SQLITE_MIMETYPE, source)) {
databaseBuffer = new StringBuilder(); final CharSequence databaseContents = getDatabaseContents(source, reader);
//Fill the buffer with table names and table data //CharSource will maintain unicode strings correctly
copyDatabaseIntoBuffer(source, reader, databaseBuffer); return CharSource.wrap(databaseContents).openStream();
//Once the buffer is full, wrap it into a CharSource and open the reader
//This is necessary to maintain integrity of unicode string. Returning
//character by character will not work.
return CharSource.wrap(databaseBuffer.toString()).openStream();
} catch (FileReaderInitException | IOException ex) { } catch (FileReaderInitException | IOException ex) {
throw new TextExtractorException( throw new TextExtractorException(
String.format("Encountered a FileReaderInitException" //NON-NLS String.format("Encountered a FileReaderInitException" //NON-NLS
@ -117,19 +116,43 @@ public class SqliteTextExtractor extends ContentTextExtractor {
* *
* @param reader Sqlite reader for the content source * @param reader Sqlite reader for the content source
* @param source Sqlite file source * @param source Sqlite file source
* @param databaseBuffer Buffer containing all of the database content
*/ */
private void copyDatabaseIntoBuffer(Content source, AbstractReader reader, private CharSequence getDatabaseContents(Content source, AbstractReader reader) {
StringBuilder databaseBuffer) {
try { try {
Map<String, String> tables = reader.getTableSchemas(); Map<String, String> tables = reader.getTableSchemas();
copyDatabaseIntoBuffer(tables, reader, source, databaseBuffer); databaseContents = new LinkedList<>();
copyDatabaseIntoBuffer(tables, reader, source, databaseContents);
return databaseContentsToCharSequence();
} catch (AbstractReader.FileReaderException ex) { } catch (AbstractReader.FileReaderException ex) {
logger.log(Level.WARNING, String.format( logger.log(Level.WARNING, String.format(
"Error attempting to get tables from file: " //NON-NLS "Error attempting to get tables from file: " //NON-NLS
+ "[%s] (id=%d).", source.getName(), //NON-NLS + "[%s] (id=%d).", source.getName(), //NON-NLS
source.getId()), ex); source.getId()), ex);
} }
//Failed to get tables from file
return EMPTY_CHARACTER_SEQUENCE;
}
/**
* Copy linkedList elements into a character array to be wrapped into a
* CharSequence.
*
* @return A character seqeunces of the database contents
*/
private CharSequence databaseContentsToCharSequence() {
final char[] databaseCharacters = new char[characterCount];
int currSequenceIndex = 0;
for (String table : databaseContents) {
System.arraycopy(table.toCharArray(), 0, databaseCharacters, currSequenceIndex, table.length());
currSequenceIndex += table.length();
}
//Segment class does not make an internal copy of the character array
//being passed in (more efficient). It also implements a CharSequences
//necessary for the CharSource class to create a compatible reader.
return new Segment(databaseCharacters, 0, characterCount);
} }
/** /**
@ -140,10 +163,10 @@ public class SqliteTextExtractor extends ContentTextExtractor {
* @param tables A map of table names to table schemas * @param tables A map of table names to table schemas
* @param reader SqliteReader for interfacing with the database * @param reader SqliteReader for interfacing with the database
* @param source Source database file for logging * @param source Source database file for logging
* @param databaseBuffer Buffer containing all of the database content * @param databaseContents List containing all of the database content
*/ */
private void copyDatabaseIntoBuffer(Map<String, String> tables, private void copyDatabaseIntoBuffer(Map<String, String> tables,
AbstractReader reader, Content source, StringBuilder databaseBuffer) { AbstractReader reader, Content source, LinkedList<String> databaseContents) {
for (String tableName : tables.keySet()) { for (String tableName : tables.keySet()) {
TableBuilder tableBuilder = new TableBuilder(); TableBuilder tableBuilder = new TableBuilder();
@ -152,16 +175,16 @@ public class SqliteTextExtractor extends ContentTextExtractor {
List<Map<String, Object>> rowsInTable List<Map<String, Object>> rowsInTable
= reader.getRowsFromTable(tableName); = reader.getRowsFromTable(tableName);
if (!rowsInTable.isEmpty()) { if (!rowsInTable.isEmpty()) {
//Create a collection from the header set, so that the TableBuilder
//can easily format it
tableBuilder.addHeader(new ArrayList<>( tableBuilder.addHeader(new ArrayList<>(
rowsInTable.get(0).keySet())); rowsInTable.get(0).keySet()));
for (Map<String, Object> row : rowsInTable) { for (Map<String, Object> row : rowsInTable) {
tableBuilder.addRow(row.values()); tableBuilder.addRow(row.values());
} }
} }
//If rowsInTable was empty, just append the table as is
databaseBuffer.append(tableBuilder); String formattedTable = tableBuilder.toString();
characterCount += formattedTable.length();
databaseContents.add(formattedTable);
} catch (AbstractReader.FileReaderException ex) { } catch (AbstractReader.FileReaderException ex) {
logger.log(Level.WARNING, String.format( logger.log(Level.WARNING, String.format(
"Error attempting to read file table: [%s]" //NON-NLS "Error attempting to read file table: [%s]" //NON-NLS
@ -177,7 +200,8 @@ public class SqliteTextExtractor extends ContentTextExtractor {
*/ */
private class TableBuilder { private class TableBuilder {
private List<String[]> rows = new LinkedList<>(); private final List<String[]> rows = new LinkedList<>();
private Integer characterCount = 0;
//Formatters //Formatters
private final String HORIZONTAL_DELIMITER = "-"; private final String HORIZONTAL_DELIMITER = "-";
@ -188,6 +212,9 @@ public class SqliteTextExtractor extends ContentTextExtractor {
private final String NEW_LINE = "\n"; private final String NEW_LINE = "\n";
private final String SPACE = " "; private final String SPACE = " ";
//Number of escape sequences in the header row
private final int ESCAPE_SEQUENCES = 4;
private String section = ""; private String section = "";
/** /**
@ -235,6 +262,7 @@ public class SqliteTextExtractor extends ContentTextExtractor {
List<String> rowValues = new ArrayList<>(); List<String> rowValues = new ArrayList<>();
vals.forEach((val) -> { vals.forEach((val) -> {
rowValues.add(val.toString()); rowValues.add(val.toString());
characterCount += val.toString().length();
}); });
rows.add(rowValues.toArray( rows.add(rowValues.toArray(
new String[rowValues.size()])); new String[rowValues.size()]));
@ -242,10 +270,8 @@ public class SqliteTextExtractor extends ContentTextExtractor {
/** /**
* Gets the max width of a cell in each column and the max number of * Gets the max width of a cell in each column and the max number of
* columns in any given row. This ensures that there is enough space for * columns in any given row. This ensures that there are enough columns
* even the longest entry and enough columns. The length of the string * and enough space for even the longest entry.
* seems to be different from the length of the print statement in some
* languages. For instance, arabic will cause the table to look off.
* *
* @return * @return
*/ */
@ -277,7 +303,7 @@ public class SqliteTextExtractor extends ContentTextExtractor {
*/ */
@Override @Override
public String toString() { public String toString() {
StringBuilder outputTable = new StringBuilder(); StringBuilder outputTable = new StringBuilder(characterCount);
int barLength = 0; int barLength = 0;
int[] colMaxWidths = getMaxWidthPerColumn(); int[] colMaxWidths = getMaxWidthPerColumn();
@ -289,7 +315,7 @@ public class SqliteTextExtractor extends ContentTextExtractor {
//formatted header, minus the one tab added at the beginning //formatted header, minus the one tab added at the beginning
//of the row (we want to count the vertical delimiters since //of the row (we want to count the vertical delimiters since
//we want it all to line up. //we want it all to line up.
barLength = outputTable.length() - 4; barLength = outputTable.length() - ESCAPE_SEQUENCES;
} }
addFormattedHeaderToBuffer(outputTable, barLength, header); addFormattedHeaderToBuffer(outputTable, barLength, header);
header = false; header = false;
@ -328,8 +354,7 @@ public class SqliteTextExtractor extends ContentTextExtractor {
/** /**
* Outputs a fully formatted header. * Outputs a fully formatted header.
* *
* Example: \t+----------------------+\n * Example: \t+----------------------+\n \t| Email | Phone | Name |\n
* \t| Email | Phone | Name |\n
* \t+----------------------+\n * \t+----------------------+\n
* *
* @param outputTable Buffer that formatted contents are written to * @param outputTable Buffer that formatted contents are written to