mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-09 06:39:33 +00:00
better variable names, fix typo in ascii test, use UTF8 charset in stream, move string stream class
This commit is contained in:
parent
95097e16d9
commit
ac4cc7c624
@ -119,54 +119,44 @@ public class DataConversion {
|
||||
* -- When looking for ASCII strings, they evaluate each byte and when they find four or more printable characters they get printed out with a newline in between each string.
|
||||
* -- When looking for Unicode strings, they evaluate each two byte sequence and look for four or more printable characters…
|
||||
*
|
||||
* @param args the bytes that the string read from
|
||||
* @param readBuf the bytes that the string read from
|
||||
* @param len length of text in the buffer to convert, starting at position 0
|
||||
* @param parameter the "length" parameter for the string
|
||||
* @param minStringLen minimum length of consecutive chars to qualify as a string
|
||||
*
|
||||
* TODO should be encoding specific and detect UTF8, UTF16LE, UTF16BE
|
||||
* then process remainder of the string using detected encoding
|
||||
*
|
||||
* @author jantonius
|
||||
*/
|
||||
public static String getString(byte[] args, int len, int parameter) {
|
||||
|
||||
/*
|
||||
// these encoding might be needed for later
|
||||
// Note: if not used, can be deleted
|
||||
CharsetEncoder asciiEncoder =
|
||||
Charset.forName("US-ASCII").newEncoder(); // or "ISO-8859-1" for ISO Latin 1
|
||||
|
||||
CharsetEncoder utf8Encoder =
|
||||
Charset.forName("UTF-8").newEncoder();
|
||||
*/
|
||||
public static String getString(byte[] readBuf, int len, int minStringLen) {
|
||||
final StringBuilder result = new StringBuilder();
|
||||
StringBuilder temp = new StringBuilder();
|
||||
int counter = 0;
|
||||
//char[] converted = new java.lang.System.Text.Encoding.ASCII.GetString(args).ToCharArray();
|
||||
int curLen = 0;
|
||||
|
||||
final char NL = (char) 10; // ASCII char for new line
|
||||
final String NLS = Character.toString(NL);
|
||||
boolean isZero = false;
|
||||
boolean singleConsecZero = false; //preserve the current sequence of chars if 1 consecutive zero char
|
||||
for (int i = 0; i < len; i++) {
|
||||
char curChar = (char) args[i];
|
||||
|
||||
if (curChar == 0 && isZero == false) {
|
||||
//allow to skip one 0
|
||||
isZero = true;
|
||||
char curChar = (char) readBuf[i];
|
||||
if (curChar == 0 && singleConsecZero == false) {
|
||||
//preserve the current sequence if max consec. 1 zero char
|
||||
singleConsecZero = true;
|
||||
} else {
|
||||
isZero = false;
|
||||
singleConsecZero = false;
|
||||
}
|
||||
//ignore non-printable ASCII chars
|
||||
//use 32-126 and not TAB ( 9)
|
||||
if (isUsableChar(curChar)) {
|
||||
if (isPrintableAscii(curChar)) {
|
||||
temp.append(curChar);
|
||||
++counter;
|
||||
} else if (!isZero) {
|
||||
if (counter >= parameter) {
|
||||
++curLen;
|
||||
} else if (!singleConsecZero) {
|
||||
if (curLen >= minStringLen) {
|
||||
// add to the result and also add the new line at the end
|
||||
result.append(temp);
|
||||
result.append(NLS);
|
||||
}
|
||||
// reset the temp and counter
|
||||
// reset the temp and curLen
|
||||
temp = new StringBuilder();
|
||||
counter = 0;
|
||||
curLen = 0;
|
||||
|
||||
}
|
||||
}
|
||||
@ -175,8 +165,14 @@ public class DataConversion {
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
private static boolean isUsableChar(char c) {
|
||||
return c >= 32 && c <= 126 && c != 9;
|
||||
/**
|
||||
* Determine if char is a printable ASCII char
|
||||
* in range <32,126> and a tab
|
||||
* @param c char to test
|
||||
* @return true if it's a printable char, or false otherwise
|
||||
*/
|
||||
public static boolean isPrintableAscii(char c) {
|
||||
return (c >= 32 && c <= 126) || c == 9;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -18,7 +18,7 @@
|
||||
*/
|
||||
|
||||
|
||||
package org.sleuthkit.autopsy.keywordsearch;
|
||||
package org.sleuthkit.autopsy.datamodel;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
@ -31,11 +31,20 @@ import org.sleuthkit.datamodel.TskException;
|
||||
|
||||
/**
|
||||
* FsContent input string stream reader/converter
|
||||
* TODO should be encoding specific and detect UTF8, UTF16LE, UTF16BE
|
||||
* then process remainder of the string using detected encoding
|
||||
*/
|
||||
public class FsContentStringStream extends InputStream {
|
||||
|
||||
public static enum Encoding {
|
||||
ASCII,
|
||||
UTF8 {
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "UTF-8";
|
||||
}
|
||||
|
||||
},
|
||||
};
|
||||
private FsContent content;
|
||||
private String encoding;
|
||||
@ -51,7 +60,7 @@ public class FsContentStringStream extends InputStream {
|
||||
private boolean isEOF = false;
|
||||
private boolean stringAtBoundary = false; //if temp has part of string that didn't make it in previous read()
|
||||
private static final byte[] oneCharBuf = new byte[1];
|
||||
private final int ASCII_CHARS_MIN = 4; //num. of chars needed to qualify as a char string
|
||||
private final int MIN_PRINTABLE_CHARS = 4; //num. of chars needed to qualify as a char string
|
||||
private static final String NLS = Character.toString((char)10); //new line
|
||||
private static final Logger logger = Logger.getLogger(FsContentStringStream.class.getName());
|
||||
|
||||
@ -99,7 +108,7 @@ public class FsContentStringStream extends InputStream {
|
||||
//there could be more to this string in fscontent/buffer
|
||||
}
|
||||
|
||||
boolean isZero = false;
|
||||
boolean singleConsecZero = false; //preserve the current sequence of chars if 1 consecutive zero char
|
||||
int newCurLen = curStringLen + tempStringLen;
|
||||
while (newCurLen < len) {
|
||||
//need to extract more strings
|
||||
@ -134,20 +143,20 @@ public class FsContentStringStream extends InputStream {
|
||||
}
|
||||
//get char from cur read buf
|
||||
char c = (char) curReadBuf[readBufOffset++];
|
||||
if (c == 0 && isZero == false) {
|
||||
//allow to skip one zero
|
||||
isZero = true;
|
||||
if (c == 0 && singleConsecZero == false) {
|
||||
//preserve the current sequence if max consec. 1 zero char
|
||||
singleConsecZero = true;
|
||||
}
|
||||
else {
|
||||
isZero = false;
|
||||
singleConsecZero = false;
|
||||
}
|
||||
if (isUsableChar(c)) {
|
||||
if (DataConversion.isPrintableAscii(c)) {
|
||||
tempString.append(c);
|
||||
++tempStringLen;
|
||||
//boundary case handled after the loop
|
||||
} else if (! isZero) {
|
||||
} else if (! singleConsecZero) {
|
||||
//break the string, clear temp
|
||||
if (tempStringLen >= ASCII_CHARS_MIN) {
|
||||
if (tempStringLen >= MIN_PRINTABLE_CHARS) {
|
||||
//append entire temp string
|
||||
tempString.append(NLS);
|
||||
++tempStringLen;
|
||||
@ -166,7 +175,7 @@ public class FsContentStringStream extends InputStream {
|
||||
//check if temp still has chars to qualify as a string
|
||||
//we might need to break up temp into 2 parts for next read() call
|
||||
//consume as many as possible to fill entire user buffer
|
||||
if (tempStringLen >= ASCII_CHARS_MIN) {
|
||||
if (tempStringLen >= MIN_PRINTABLE_CHARS) {
|
||||
if (newCurLen > len) {
|
||||
int appendChars = len - curStringLen;
|
||||
//save part for next user read(), need to break up temp string
|
||||
@ -225,9 +234,6 @@ public class FsContentStringStream extends InputStream {
|
||||
return 0;
|
||||
}
|
||||
|
||||
private static boolean isUsableChar(char c) {
|
||||
return c >= 32 && c <= 126 && c != 9;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
@ -24,7 +24,8 @@ import java.io.InputStreamReader;
|
||||
import java.io.Reader;
|
||||
import java.util.logging.Logger;
|
||||
import org.apache.solr.common.util.ContentStream;
|
||||
import org.sleuthkit.autopsy.keywordsearch.FsContentStringStream.Encoding;
|
||||
import org.sleuthkit.autopsy.datamodel.FsContentStringStream;
|
||||
import org.sleuthkit.autopsy.datamodel.FsContentStringStream.Encoding;
|
||||
import org.sleuthkit.datamodel.FsContent;
|
||||
|
||||
/**
|
||||
@ -55,7 +56,7 @@ public class FsContentStringContentStream implements ContentStream {
|
||||
|
||||
@Override
|
||||
public String getContentType() {
|
||||
return "text/plain; charset = " + encoding.toString();
|
||||
return "text/plain; charset=" + encoding.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -18,6 +18,7 @@
|
||||
*/
|
||||
package org.sleuthkit.autopsy.keywordsearch;
|
||||
|
||||
import org.sleuthkit.autopsy.datamodel.FsContentStringStream;
|
||||
import java.awt.event.ActionEvent;
|
||||
import java.awt.event.ActionListener;
|
||||
import java.util.ArrayList;
|
||||
@ -417,7 +418,7 @@ public final class KeywordSearchIngestService implements IngestServiceFsContent
|
||||
|
||||
private boolean extractAndIngest(FsContent f) {
|
||||
boolean success = false;
|
||||
FsContentStringContentStream fscs = new FsContentStringContentStream(f, FsContentStringStream.Encoding.ASCII);
|
||||
FsContentStringContentStream fscs = new FsContentStringContentStream(f, FsContentStringStream.Encoding.UTF8);
|
||||
try {
|
||||
ingester.ingest(fscs);
|
||||
success = true;
|
||||
|
Loading…
x
Reference in New Issue
Block a user