mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-17 10:17:41 +00:00
Make text chunks 32k or smaller.
This commit is contained in:
parent
950ac1aea5
commit
f96f831bb6
@ -43,7 +43,7 @@ class HtmlTextExtractor implements TextExtractor {
|
||||
private static final Logger logger = Logger.getLogger(HtmlTextExtractor.class.getName());
|
||||
private static Ingester ingester;
|
||||
static final Charset outCharset = Server.DEFAULT_INDEXED_TEXT_CHARSET;
|
||||
static final int MAX_EXTR_TEXT_CHARS = 512 * 1024;
|
||||
static final int MAX_EXTR_TEXT_CHARS = 31 * 1024;
|
||||
private static final int SINGLE_READ_CHARS = 1024;
|
||||
private static final int EXTRA_CHARS = 128; //for whitespace
|
||||
private static final int MAX_SIZE = 50000000;
|
||||
|
@ -59,7 +59,7 @@ class Ingester {
|
||||
|
||||
//for ingesting chunk as SolrInputDocument (non-content-streaming, by-pass tika)
|
||||
//TODO use a streaming way to add content to /update handler
|
||||
private static final int MAX_DOC_CHUNK_SIZE = 1024 * 1024;
|
||||
private static final int MAX_DOC_CHUNK_SIZE = 32 * 1024;
|
||||
private static final String ENCODING = "UTF-8"; //NON-NLS
|
||||
|
||||
private Ingester() {
|
||||
|
@ -97,6 +97,12 @@ public class Server {
|
||||
return "content"; //NON-NLS
|
||||
}
|
||||
},
|
||||
CONTENT_STR {
|
||||
@Override
|
||||
public String toString() {
|
||||
return "content_str"; //NON-NLS
|
||||
}
|
||||
},
|
||||
TEXT {
|
||||
@Override
|
||||
public String toString() {
|
||||
@ -153,7 +159,7 @@ public class Server {
|
||||
|
||||
public static final String HL_ANALYZE_CHARS_UNLIMITED = "500000"; //max 1MB in a chunk. use -1 for unlimited, but -1 option may not be supported (not documented)
|
||||
//max content size we can send to Solr
|
||||
public static final long MAX_CONTENT_SIZE = 1L * 1024 * 1024 * 1024;
|
||||
public static final long MAX_CONTENT_SIZE = 1L * 31 * 1024 * 1024;
|
||||
private static final Logger logger = Logger.getLogger(Server.class.getName());
|
||||
private static final String DEFAULT_CORE_NAME = "coreCase"; //NON-NLS
|
||||
public static final String CORE_EVT = "CORE_EVT"; //NON-NLS
|
||||
|
@ -40,7 +40,7 @@ class StringsTextExtractor implements TextExtractor {
|
||||
|
||||
private static Ingester ingester;
|
||||
private static final Logger logger = Logger.getLogger(StringsTextExtractor.class.getName());
|
||||
private static final long MAX_STRING_CHUNK_SIZE = 1 * 1024 * 1024L;
|
||||
private static final long MAX_STRING_CHUNK_SIZE = 1 * 31 * 1024L;
|
||||
//private static final int BOM_LEN = 3;
|
||||
private static final int BOM_LEN = 0; //disabled prepending of BOM
|
||||
private static final Charset INDEX_CHARSET = Server.DEFAULT_INDEXED_TEXT_CHARSET;
|
||||
|
@ -62,7 +62,7 @@ class TikaTextExtractor implements TextExtractor {
|
||||
private static final Logger logger = Logger.getLogger(TikaTextExtractor.class.getName());
|
||||
private static Ingester ingester;
|
||||
private static final Charset OUTPUT_CHARSET = Server.DEFAULT_INDEXED_TEXT_CHARSET;
|
||||
private static final int MAX_EXTR_TEXT_CHARS = 512 * 1024;
|
||||
private static final int MAX_EXTR_TEXT_CHARS = 16 * 1024;
|
||||
private static final int SINGLE_READ_CHARS = 1024;
|
||||
private static final int EXTRA_CHARS = 128; //for whitespace
|
||||
private final char[] textChunkBuf = new char[MAX_EXTR_TEXT_CHARS];
|
||||
|
Loading…
x
Reference in New Issue
Block a user