mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-12 16:06:15 +00:00
Revert "Removed workaround for bug in Solr 4.0."
This reverts commit 46389f13f509ab47d21d9d4eb32037df8eb6008a.
This commit is contained in:
parent
edf2b8018b
commit
c8b8530386
@ -209,8 +209,9 @@
|
||||
<fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<!-- workaround to bug in Solr 4.0 to set LimitTokenCountFilterFactory maxTokenCount, might change to single attribute in future -->
|
||||
<!-- 200000 token limit ensures we are indexing entire 1MB chunk of meaningful tokens, increase the limit for larger chunks -->
|
||||
<filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="200000"/>
|
||||
<filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="val" val="200000"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
@ -222,8 +223,9 @@
|
||||
<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="200000"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
|
||||
<!-- workaround to bug in Solr 4.0 to set LimitTokenCountFilterFactory maxTokenCount, might change to single attribute in future -->
|
||||
<filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="val" val="200000"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_en.txt" enablePositionIncrements="true" />
|
||||
<!-- in this example, we will only use synonyms at query time
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
|
||||
-->
|
||||
@ -231,12 +233,12 @@
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="200000"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
||||
<!-- workaround to bug in Solr 4.0 to set LimitTokenCountFilterFactory maxTokenCount, might change to single attribute in future -->
|
||||
<filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="val" val="200000"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_en.txt" enablePositionIncrements="true" />
|
||||
<!--<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>-->
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
|
||||
</fieldType>
|
||||
|
||||
<!-- A text field with defaults appropriate for English: it
|
||||
@ -507,17 +509,19 @@
|
||||
<!-- use image_id to easily search a specific image only -->
|
||||
<field name="image_id" type="string" indexed="true" stored="false" required="true" />
|
||||
|
||||
<!-- Autopsy pushes text to the content field and gets the text to display from it. It is copied to other places -->
|
||||
<field name="content" type="text_general" indexed="true" stored="true" termVectors="true" termPositions="true" termOffsets="true" />
|
||||
<!-- Autopsy pushes text to this field and gets the text to display from it. It is copied to other places -->
|
||||
<field name="content" type="string" indexed="false" stored="false" />
|
||||
|
||||
<!-- The strings field holds strings extracted from files that SolrCell doesn't support -->
|
||||
<!--<field name="strings" type="text_general" indexed="true" stored="true"/>-->
|
||||
|
||||
<!-- NOTE: file_name gets copied later to other fields for searching -->
|
||||
<field name="file_name" type="text_general" indexed="false" stored="true"/>
|
||||
<field name="ctime" type="tdate" indexed="false" stored="false"/>
|
||||
<field name="atime" type="tdate" indexed="false" stored="false"/>
|
||||
<field name="mtime" type="tdate" indexed="false" stored="false"/>
|
||||
<field name="crtime" type="tdate" indexed="false" stored="false"/>
|
||||
|
||||
<!-- file chunk-specific fields (optional for others) -->
|
||||
<!-- for a parent file with no content, number of chunks are specified -->
|
||||
<field name="num_chunks" type="int" indexed="true" stored="true" required="false" />
|
||||
@ -527,6 +531,7 @@
|
||||
Some fields are multiValued only because Tika currently may return
|
||||
multiple values for them.
|
||||
-->
|
||||
<!--
|
||||
<field name="title" type="text_general" indexed="false" stored="false" multiValued="true"/>
|
||||
<field name="subject" type="text_general" indexed="false" stored="false"/>
|
||||
<field name="description" type="text_general" indexed="false" stored="false"/>
|
||||
@ -537,13 +542,14 @@
|
||||
<field name="content_type" type="string" indexed="false" stored="false" multiValued="true"/>
|
||||
<field name="last_modified" type="date" indexed="false" stored="false"/>
|
||||
<field name="links" type="string" indexed="false" stored="false" multiValued="true"/>
|
||||
-->
|
||||
|
||||
<!-- Tika places all metadata into a multivalued field named "meta" -->
|
||||
<field name="meta" type="text_general" indexed="true" stored="true" multiValued="true"/>
|
||||
<!--<field name="meta" type="text_general" indexed="true" stored="true" multiValued="true"/> -->
|
||||
|
||||
<!-- catchall field, containing all other searchable text fields (implemented
|
||||
via copyField further on in this schema -->
|
||||
<field name="text" type="text_general" indexed="true" stored="false" multiValued="true"/>
|
||||
<field name="text" type="text_general" indexed="true" stored="true" termVectors="true" termPositions="true" termOffsets="true" multiValued="true"/>
|
||||
|
||||
<!-- catchall text field that indexes tokens both normally and in reverse for efficient
|
||||
leading wildcard queries. -->
|
||||
@ -552,7 +558,7 @@
|
||||
<!-- field with white-space tokenized words for TermsComponent regex search (useful for fast search of IP addresses, URLs, certain phone numbers)
|
||||
also be useful for Lucene based queries containing special characters-->
|
||||
<!-- populated via copyField -->
|
||||
<field name="content_ws" type="text_ws" indexed="true" stored="false" />
|
||||
<field name="content_ws" type="text_ws" indexed="true" stored="false" multiValued="true" />
|
||||
|
||||
<!-- Uncommenting the following will create a "timestamp" field using
|
||||
a default value of "NOW" to indicate when each document was indexed.
|
||||
@ -569,6 +575,7 @@
|
||||
EXAMPLE: name="*_i" will match any field ending in _i (like myid_i, z_i)
|
||||
Longer patterns will be matched first. if equal size patterns
|
||||
both match, the first appearing in the schema will be used. -->
|
||||
<!--
|
||||
<dynamicField name="*_i" type="int" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_s" type="string" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_l" type="long" indexed="true" stored="true"/>
|
||||
@ -577,15 +584,19 @@
|
||||
<dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_f" type="float" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_d" type="double" indexed="true" stored="true"/>
|
||||
-->
|
||||
|
||||
<!-- Type used to index the lat and lon components for the "location" FieldType -->
|
||||
<dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false"/>
|
||||
<!--
|
||||
<dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false"/>
|
||||
|
||||
<dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_p" type="location" indexed="true" stored="true"/>
|
||||
-->
|
||||
|
||||
<!-- some trie-coded dynamic fields for faster range queries -->
|
||||
<dynamicField name="*_ti" type="tint" indexed="true" stored="true"/>
|
||||
<!--
|
||||
<dynamicField name="*_ti" type="tint" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_tl" type="tlong" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_tf" type="tfloat" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_td" type="tdouble" indexed="true" stored="true"/>
|
||||
@ -597,6 +608,7 @@
|
||||
<dynamicField name="attr_*" type="text_general" indexed="true" stored="true" multiValued="true"/>
|
||||
|
||||
<dynamicField name="random_*" type="random" />
|
||||
-->
|
||||
|
||||
<!-- uncomment the following to ignore any fields that don't already match an existing
|
||||
field name or dynamic field, rather than reporting them as an error.
|
||||
@ -623,9 +635,12 @@
|
||||
|
||||
<copyField source="content" dest="text"/>
|
||||
<copyField source="file_name" dest="text"/>
|
||||
<copyField source="meta" dest="text"/>
|
||||
<!--<copyField source="meta" dest="text"/>-->
|
||||
<!--<copyField source="strings" dest="text"/>-->
|
||||
|
||||
<copyField source="content" dest="content_ws"/>
|
||||
<copyField source="file_name" dest="content_ws"/>
|
||||
<!--<copyField source="meta" dest="content_ws"/>-->
|
||||
|
||||
<!-- Above, multiple source fields are copied to the [text] field.
|
||||
Another way to map multiple source fields to the same
|
||||
|
Loading…
x
Reference in New Issue
Block a user