mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-09 06:39:33 +00:00
better variable names, fix typo in ascii test, use UTF8 charset in stream, move string stream class
This commit is contained in:
parent
95097e16d9
commit
ac4cc7c624
@ -119,54 +119,44 @@ public class DataConversion {
|
|||||||
* -- When looking for ASCII strings, they evaluate each byte and when they find four or more printable characters they get printed out with a newline in between each string.
|
* -- When looking for ASCII strings, they evaluate each byte and when they find four or more printable characters they get printed out with a newline in between each string.
|
||||||
* -- When looking for Unicode strings, they evaluate each two byte sequence and look for four or more printable characters…
|
* -- When looking for Unicode strings, they evaluate each two byte sequence and look for four or more printable characters…
|
||||||
*
|
*
|
||||||
* @param args the bytes that the string read from
|
* @param readBuf the bytes that the string read from
|
||||||
* @param len length of text in the buffer to convert, starting at position 0
|
* @param len length of text in the buffer to convert, starting at position 0
|
||||||
* @param parameter the "length" parameter for the string
|
* @param minStringLen minimum length of consecutive chars to qualify as a string
|
||||||
*
|
*
|
||||||
|
* TODO should be encoding specific and detect UTF8, UTF16LE, UTF16BE
|
||||||
|
* then process remainder of the string using detected encoding
|
||||||
|
*
|
||||||
* @author jantonius
|
* @author jantonius
|
||||||
*/
|
*/
|
||||||
public static String getString(byte[] args, int len, int parameter) {
|
public static String getString(byte[] readBuf, int len, int minStringLen) {
|
||||||
|
|
||||||
/*
|
|
||||||
// these encoding might be needed for later
|
|
||||||
// Note: if not used, can be deleted
|
|
||||||
CharsetEncoder asciiEncoder =
|
|
||||||
Charset.forName("US-ASCII").newEncoder(); // or "ISO-8859-1" for ISO Latin 1
|
|
||||||
|
|
||||||
CharsetEncoder utf8Encoder =
|
|
||||||
Charset.forName("UTF-8").newEncoder();
|
|
||||||
*/
|
|
||||||
final StringBuilder result = new StringBuilder();
|
final StringBuilder result = new StringBuilder();
|
||||||
StringBuilder temp = new StringBuilder();
|
StringBuilder temp = new StringBuilder();
|
||||||
int counter = 0;
|
int curLen = 0;
|
||||||
//char[] converted = new java.lang.System.Text.Encoding.ASCII.GetString(args).ToCharArray();
|
|
||||||
|
|
||||||
final char NL = (char) 10; // ASCII char for new line
|
final char NL = (char) 10; // ASCII char for new line
|
||||||
final String NLS = Character.toString(NL);
|
final String NLS = Character.toString(NL);
|
||||||
boolean isZero = false;
|
boolean singleConsecZero = false; //preserve the current sequence of chars if 1 consecutive zero char
|
||||||
for (int i = 0; i < len; i++) {
|
for (int i = 0; i < len; i++) {
|
||||||
char curChar = (char) args[i];
|
char curChar = (char) readBuf[i];
|
||||||
|
if (curChar == 0 && singleConsecZero == false) {
|
||||||
if (curChar == 0 && isZero == false) {
|
//preserve the current sequence if max consec. 1 zero char
|
||||||
//allow to skip one 0
|
singleConsecZero = true;
|
||||||
isZero = true;
|
|
||||||
} else {
|
} else {
|
||||||
isZero = false;
|
singleConsecZero = false;
|
||||||
}
|
}
|
||||||
//ignore non-printable ASCII chars
|
//ignore non-printable ASCII chars
|
||||||
//use 32-126 and not TAB ( 9)
|
if (isPrintableAscii(curChar)) {
|
||||||
if (isUsableChar(curChar)) {
|
|
||||||
temp.append(curChar);
|
temp.append(curChar);
|
||||||
++counter;
|
++curLen;
|
||||||
} else if (!isZero) {
|
} else if (!singleConsecZero) {
|
||||||
if (counter >= parameter) {
|
if (curLen >= minStringLen) {
|
||||||
// add to the result and also add the new line at the end
|
// add to the result and also add the new line at the end
|
||||||
result.append(temp);
|
result.append(temp);
|
||||||
result.append(NLS);
|
result.append(NLS);
|
||||||
}
|
}
|
||||||
// reset the temp and counter
|
// reset the temp and curLen
|
||||||
temp = new StringBuilder();
|
temp = new StringBuilder();
|
||||||
counter = 0;
|
curLen = 0;
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -175,8 +165,14 @@ public class DataConversion {
|
|||||||
return result.toString();
|
return result.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
private static boolean isUsableChar(char c) {
|
/**
|
||||||
return c >= 32 && c <= 126 && c != 9;
|
* Determine if char is a printable ASCII char
|
||||||
|
* in range <32,126> and a tab
|
||||||
|
* @param c char to test
|
||||||
|
* @return true if it's a printable char, or false otherwise
|
||||||
|
*/
|
||||||
|
public static boolean isPrintableAscii(char c) {
|
||||||
|
return (c >= 32 && c <= 126) || c == 9;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -18,7 +18,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
package org.sleuthkit.autopsy.keywordsearch;
|
package org.sleuthkit.autopsy.datamodel;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
@ -31,11 +31,20 @@ import org.sleuthkit.datamodel.TskException;
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* FsContent input string stream reader/converter
|
* FsContent input string stream reader/converter
|
||||||
|
* TODO should be encoding specific and detect UTF8, UTF16LE, UTF16BE
|
||||||
|
* then process remainder of the string using detected encoding
|
||||||
*/
|
*/
|
||||||
public class FsContentStringStream extends InputStream {
|
public class FsContentStringStream extends InputStream {
|
||||||
|
|
||||||
public static enum Encoding {
|
public static enum Encoding {
|
||||||
ASCII,
|
UTF8 {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "UTF-8";
|
||||||
|
}
|
||||||
|
|
||||||
|
},
|
||||||
};
|
};
|
||||||
private FsContent content;
|
private FsContent content;
|
||||||
private String encoding;
|
private String encoding;
|
||||||
@ -51,7 +60,7 @@ public class FsContentStringStream extends InputStream {
|
|||||||
private boolean isEOF = false;
|
private boolean isEOF = false;
|
||||||
private boolean stringAtBoundary = false; //if temp has part of string that didn't make it in previous read()
|
private boolean stringAtBoundary = false; //if temp has part of string that didn't make it in previous read()
|
||||||
private static final byte[] oneCharBuf = new byte[1];
|
private static final byte[] oneCharBuf = new byte[1];
|
||||||
private final int ASCII_CHARS_MIN = 4; //num. of chars needed to qualify as a char string
|
private final int MIN_PRINTABLE_CHARS = 4; //num. of chars needed to qualify as a char string
|
||||||
private static final String NLS = Character.toString((char)10); //new line
|
private static final String NLS = Character.toString((char)10); //new line
|
||||||
private static final Logger logger = Logger.getLogger(FsContentStringStream.class.getName());
|
private static final Logger logger = Logger.getLogger(FsContentStringStream.class.getName());
|
||||||
|
|
||||||
@ -99,7 +108,7 @@ public class FsContentStringStream extends InputStream {
|
|||||||
//there could be more to this string in fscontent/buffer
|
//there could be more to this string in fscontent/buffer
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean isZero = false;
|
boolean singleConsecZero = false; //preserve the current sequence of chars if 1 consecutive zero char
|
||||||
int newCurLen = curStringLen + tempStringLen;
|
int newCurLen = curStringLen + tempStringLen;
|
||||||
while (newCurLen < len) {
|
while (newCurLen < len) {
|
||||||
//need to extract more strings
|
//need to extract more strings
|
||||||
@ -134,20 +143,20 @@ public class FsContentStringStream extends InputStream {
|
|||||||
}
|
}
|
||||||
//get char from cur read buf
|
//get char from cur read buf
|
||||||
char c = (char) curReadBuf[readBufOffset++];
|
char c = (char) curReadBuf[readBufOffset++];
|
||||||
if (c == 0 && isZero == false) {
|
if (c == 0 && singleConsecZero == false) {
|
||||||
//allow to skip one zero
|
//preserve the current sequence if max consec. 1 zero char
|
||||||
isZero = true;
|
singleConsecZero = true;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
isZero = false;
|
singleConsecZero = false;
|
||||||
}
|
}
|
||||||
if (isUsableChar(c)) {
|
if (DataConversion.isPrintableAscii(c)) {
|
||||||
tempString.append(c);
|
tempString.append(c);
|
||||||
++tempStringLen;
|
++tempStringLen;
|
||||||
//boundary case handled after the loop
|
//boundary case handled after the loop
|
||||||
} else if (! isZero) {
|
} else if (! singleConsecZero) {
|
||||||
//break the string, clear temp
|
//break the string, clear temp
|
||||||
if (tempStringLen >= ASCII_CHARS_MIN) {
|
if (tempStringLen >= MIN_PRINTABLE_CHARS) {
|
||||||
//append entire temp string
|
//append entire temp string
|
||||||
tempString.append(NLS);
|
tempString.append(NLS);
|
||||||
++tempStringLen;
|
++tempStringLen;
|
||||||
@ -166,7 +175,7 @@ public class FsContentStringStream extends InputStream {
|
|||||||
//check if temp still has chars to qualify as a string
|
//check if temp still has chars to qualify as a string
|
||||||
//we might need to break up temp into 2 parts for next read() call
|
//we might need to break up temp into 2 parts for next read() call
|
||||||
//consume as many as possible to fill entire user buffer
|
//consume as many as possible to fill entire user buffer
|
||||||
if (tempStringLen >= ASCII_CHARS_MIN) {
|
if (tempStringLen >= MIN_PRINTABLE_CHARS) {
|
||||||
if (newCurLen > len) {
|
if (newCurLen > len) {
|
||||||
int appendChars = len - curStringLen;
|
int appendChars = len - curStringLen;
|
||||||
//save part for next user read(), need to break up temp string
|
//save part for next user read(), need to break up temp string
|
||||||
@ -225,9 +234,6 @@ public class FsContentStringStream extends InputStream {
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static boolean isUsableChar(char c) {
|
|
||||||
return c >= 32 && c <= 126 && c != 9;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
@ -24,7 +24,8 @@ import java.io.InputStreamReader;
|
|||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.util.logging.Logger;
|
import java.util.logging.Logger;
|
||||||
import org.apache.solr.common.util.ContentStream;
|
import org.apache.solr.common.util.ContentStream;
|
||||||
import org.sleuthkit.autopsy.keywordsearch.FsContentStringStream.Encoding;
|
import org.sleuthkit.autopsy.datamodel.FsContentStringStream;
|
||||||
|
import org.sleuthkit.autopsy.datamodel.FsContentStringStream.Encoding;
|
||||||
import org.sleuthkit.datamodel.FsContent;
|
import org.sleuthkit.datamodel.FsContent;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -55,7 +56,7 @@ public class FsContentStringContentStream implements ContentStream {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getContentType() {
|
public String getContentType() {
|
||||||
return "text/plain; charset = " + encoding.toString();
|
return "text/plain; charset=" + encoding.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -18,6 +18,7 @@
|
|||||||
*/
|
*/
|
||||||
package org.sleuthkit.autopsy.keywordsearch;
|
package org.sleuthkit.autopsy.keywordsearch;
|
||||||
|
|
||||||
|
import org.sleuthkit.autopsy.datamodel.FsContentStringStream;
|
||||||
import java.awt.event.ActionEvent;
|
import java.awt.event.ActionEvent;
|
||||||
import java.awt.event.ActionListener;
|
import java.awt.event.ActionListener;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
@ -417,7 +418,7 @@ public final class KeywordSearchIngestService implements IngestServiceFsContent
|
|||||||
|
|
||||||
private boolean extractAndIngest(FsContent f) {
|
private boolean extractAndIngest(FsContent f) {
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
FsContentStringContentStream fscs = new FsContentStringContentStream(f, FsContentStringStream.Encoding.ASCII);
|
FsContentStringContentStream fscs = new FsContentStringContentStream(f, FsContentStringStream.Encoding.UTF8);
|
||||||
try {
|
try {
|
||||||
ingester.ingest(fscs);
|
ingester.ingest(fscs);
|
||||||
success = true;
|
success = true;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user