mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-17 18:17:43 +00:00
-process files with mail extension normally with tika first
- fscontentstringstream tweak
This commit is contained in:
parent
b7806a42ea
commit
2dcc8121a0
@ -55,7 +55,9 @@ public class FsContentStringStream extends InputStream {
|
|||||||
private StringBuilder tempString = new StringBuilder();
|
private StringBuilder tempString = new StringBuilder();
|
||||||
private int tempStringLen = 0;
|
private int tempStringLen = 0;
|
||||||
private boolean isEOF = false;
|
private boolean isEOF = false;
|
||||||
private boolean stringAtBoundary = false; //if temp has part of string that didn't make it in previous read()
|
private boolean stringAtTempBoundary = false; //if temp has part of string that didn't make it in previous read()
|
||||||
|
private boolean stringAtBufBoundary = false; //if continue string from prev read
|
||||||
|
private boolean inString = false; //if current temp has min chars required
|
||||||
private static final byte[] oneCharBuf = new byte[1];
|
private static final byte[] oneCharBuf = new byte[1];
|
||||||
private final int MIN_PRINTABLE_CHARS = 4; //num. of chars needed to qualify as a char string
|
private final int MIN_PRINTABLE_CHARS = 4; //num. of chars needed to qualify as a char string
|
||||||
private static final String NLS = Character.toString((char) 10); //new line
|
private static final String NLS = Character.toString((char) 10); //new line
|
||||||
@ -72,8 +74,6 @@ public class FsContentStringStream extends InputStream {
|
|||||||
//logger.log(Level.INFO, "FILE: " + content.getParentPath() + "/" + content.getName());
|
//logger.log(Level.INFO, "FILE: " + content.getParentPath() + "/" + content.getName());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int read(byte[] b, int off, int len) throws IOException {
|
public int read(byte[] b, int off, int len) throws IOException {
|
||||||
if (b == null) {
|
if (b == null) {
|
||||||
@ -93,7 +93,7 @@ public class FsContentStringStream extends InputStream {
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (stringAtBoundary) {
|
if (stringAtTempBoundary) {
|
||||||
//append entire temp string residual from previous read()
|
//append entire temp string residual from previous read()
|
||||||
//because qualified string was broken down into 2 parts
|
//because qualified string was broken down into 2 parts
|
||||||
curString.append(tempString);
|
curString.append(tempString);
|
||||||
@ -103,7 +103,7 @@ public class FsContentStringStream extends InputStream {
|
|||||||
tempString = new StringBuilder();
|
tempString = new StringBuilder();
|
||||||
tempStringLen = 0;
|
tempStringLen = 0;
|
||||||
|
|
||||||
stringAtBoundary = false;
|
stringAtTempBoundary = false;
|
||||||
//there could be more to this string in fscontent/buffer
|
//there could be more to this string in fscontent/buffer
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -154,16 +154,23 @@ public class FsContentStringStream extends InputStream {
|
|||||||
if (DataConversion.isPrintableAscii(c)) {
|
if (DataConversion.isPrintableAscii(c)) {
|
||||||
tempString.append(c);
|
tempString.append(c);
|
||||||
++tempStringLen;
|
++tempStringLen;
|
||||||
//boundary case handled after the loop
|
if (tempStringLen >= MIN_PRINTABLE_CHARS) {
|
||||||
|
inString = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
//boundary case when temp has still chars - handled after the loop
|
||||||
} else if (!singleConsecZero) {
|
} else if (!singleConsecZero) {
|
||||||
//break the string, clear temp
|
//break the string, clear temp
|
||||||
if (tempStringLen >= MIN_PRINTABLE_CHARS) {
|
if (tempStringLen >= MIN_PRINTABLE_CHARS
|
||||||
//append entire temp string
|
|| stringAtBufBoundary) {
|
||||||
|
//append entire temp string with new line
|
||||||
tempString.append(NLS);
|
tempString.append(NLS);
|
||||||
++tempStringLen;
|
++tempStringLen;
|
||||||
|
|
||||||
curString.append(tempString);
|
curString.append(tempString);
|
||||||
curStringLen += tempStringLen;
|
curStringLen += tempStringLen;
|
||||||
|
|
||||||
|
stringAtBufBoundary = false;
|
||||||
}
|
}
|
||||||
//reset temp
|
//reset temp
|
||||||
tempString = new StringBuilder();
|
tempString = new StringBuilder();
|
||||||
@ -173,6 +180,13 @@ public class FsContentStringStream extends InputStream {
|
|||||||
newCurLen = curStringLen + tempStringLen;
|
newCurLen = curStringLen + tempStringLen;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//check if still in string state, so that next chars in read buf bypass min chars check
|
||||||
|
//and qualify as string even if less < min chars required
|
||||||
|
if (inString) {
|
||||||
|
inString = false; //reset
|
||||||
|
stringAtBufBoundary = true; //will bypass the check
|
||||||
|
}
|
||||||
|
|
||||||
//check if temp still has chars to qualify as a string
|
//check if temp still has chars to qualify as a string
|
||||||
//we might need to break up temp into 2 parts for next read() call
|
//we might need to break up temp into 2 parts for next read() call
|
||||||
//consume as many as possible to fill entire user buffer
|
//consume as many as possible to fill entire user buffer
|
||||||
@ -190,7 +204,7 @@ public class FsContentStringStream extends InputStream {
|
|||||||
tempString = new StringBuilder(newTemp);
|
tempString = new StringBuilder(newTemp);
|
||||||
tempStringLen = newTemp.length();
|
tempStringLen = newTemp.length();
|
||||||
|
|
||||||
stringAtBoundary = true;
|
stringAtTempBoundary = true;
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
//append entire temp
|
//append entire temp
|
||||||
@ -213,7 +227,7 @@ public class FsContentStringStream extends InputStream {
|
|||||||
|
|
||||||
return copied;
|
return copied;
|
||||||
}
|
}
|
||||||
|
|
||||||
//append temp buffer to cur string buffer and reset temp, if enough chars
|
//append temp buffer to cur string buffer and reset temp, if enough chars
|
||||||
//does not append new line
|
//does not append new line
|
||||||
private void appendResetTemp() {
|
private void appendResetTemp() {
|
||||||
@ -233,6 +247,7 @@ public class FsContentStringStream extends InputStream {
|
|||||||
//logger.log(Level.INFO, curStringS);
|
//logger.log(Level.INFO, curStringS);
|
||||||
byte[] stringBytes = curStringS.getBytes(encoding);
|
byte[] stringBytes = curStringS.getBytes(encoding);
|
||||||
System.arraycopy(stringBytes, 0, b, off, Math.min(curStringLen, (int) len));
|
System.arraycopy(stringBytes, 0, b, off, Math.min(curStringLen, (int) len));
|
||||||
|
//logger.log(Level.INFO, curStringS);
|
||||||
//copied all string, reset
|
//copied all string, reset
|
||||||
curString = new StringBuilder();
|
curString = new StringBuilder();
|
||||||
int ret = curStringLen;
|
int ret = curStringLen;
|
||||||
|
@ -55,7 +55,7 @@ class Ingester {
|
|||||||
static final String[] ingestibleExtensions = {"tar", "jar", "zip", "gzip", "bzip2",
|
static final String[] ingestibleExtensions = {"tar", "jar", "zip", "gzip", "bzip2",
|
||||||
"gz", "tgz", "odf", "doc", "xls", "ppt", "rtf", "pdf", "html", "htm", "xhtml", "txt", "log", "manifest",
|
"gz", "tgz", "odf", "doc", "xls", "ppt", "rtf", "pdf", "html", "htm", "xhtml", "txt", "log", "manifest",
|
||||||
"bmp", "gif", "png", "jpeg", "tiff", "mp3", "aiff", "au", "midi", "wav",
|
"bmp", "gif", "png", "jpeg", "tiff", "mp3", "aiff", "au", "midi", "wav",
|
||||||
"pst", "xml", "class", "dwg"};
|
"pst", "xml", "class", "dwg", "eml", "emlx", "mbox", "mht"};
|
||||||
|
|
||||||
Ingester() {
|
Ingester() {
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user