Revert "Removed workaround for bug in Solr 4.0."

This reverts commit 46389f13f509ab47d21d9d4eb32037df8eb6008a.
This commit is contained in:
Eamonn Saunders 2015-03-30 15:10:38 -04:00
parent edf2b8018b
commit c8b8530386

View File

@ -209,8 +209,9 @@
<fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer class="solr.WhitespaceTokenizerFactory"/>
<!-- workaround to bug in Solr 4.0 to set LimitTokenCountFilterFactory maxTokenCount, might change to single attribute in future -->
<!-- 200000 token limit ensures we are indexing entire 1MB chunk of meaningful tokens, increase the limit for larger chunks --> <!-- 200000 token limit ensures we are indexing entire 1MB chunk of meaningful tokens, increase the limit for larger chunks -->
<filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="200000"/> <filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="val" val="200000"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -222,8 +223,9 @@
<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="200000"/> <!-- workaround to bug in Solr 4.0 to set LimitTokenCountFilterFactory maxTokenCount, might change to single attribute in future -->
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" /> <filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="val" val="200000"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_en.txt" enablePositionIncrements="true" />
<!-- in this example, we will only use synonyms at query time <!-- in this example, we will only use synonyms at query time
<filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
--> -->
@ -231,12 +233,12 @@
</analyzer> </analyzer>
<analyzer type="query"> <analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="200000"/> <!-- workaround to bug in Solr 4.0 to set LimitTokenCountFilterFactory maxTokenCount, might change to single attribute in future -->
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" /> <filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="val" val="200000"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_en.txt" enablePositionIncrements="true" />
<!--<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>-->
<filter class="solr.LowerCaseFilterFactory"/> <filter class="solr.LowerCaseFilterFactory"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
<!-- A text field with defaults appropriate for English: it <!-- A text field with defaults appropriate for English: it
@ -507,17 +509,19 @@
<!-- use image_id to easily search a specific image only --> <!-- use image_id to easily search a specific image only -->
<field name="image_id" type="string" indexed="true" stored="false" required="true" /> <field name="image_id" type="string" indexed="true" stored="false" required="true" />
<!-- Autopsy pushes text to the content field and gets the text to display from it. It is copied to other places --> <!-- Autopsy pushes text to this field and gets the text to display from it. It is copied to other places -->
<field name="content" type="text_general" indexed="true" stored="true" termVectors="true" termPositions="true" termOffsets="true" /> <field name="content" type="string" indexed="false" stored="false" />
<!-- The strings field holds strings extracted from files that SolrCell doesn't support --> <!-- The strings field holds strings extracted from files that SolrCell doesn't support -->
<!--<field name="strings" type="text_general" indexed="true" stored="true"/>--> <!--<field name="strings" type="text_general" indexed="true" stored="true"/>-->
<!-- NOTE: file_name gets copied later to other fields for searching -->
<field name="file_name" type="text_general" indexed="false" stored="true"/> <field name="file_name" type="text_general" indexed="false" stored="true"/>
<field name="ctime" type="tdate" indexed="false" stored="false"/> <field name="ctime" type="tdate" indexed="false" stored="false"/>
<field name="atime" type="tdate" indexed="false" stored="false"/> <field name="atime" type="tdate" indexed="false" stored="false"/>
<field name="mtime" type="tdate" indexed="false" stored="false"/> <field name="mtime" type="tdate" indexed="false" stored="false"/>
<field name="crtime" type="tdate" indexed="false" stored="false"/> <field name="crtime" type="tdate" indexed="false" stored="false"/>
<!-- file chunk-specific fields (optional for others) --> <!-- file chunk-specific fields (optional for others) -->
<!-- for a parent file with no content, number of chunks are specified --> <!-- for a parent file with no content, number of chunks are specified -->
<field name="num_chunks" type="int" indexed="true" stored="true" required="false" /> <field name="num_chunks" type="int" indexed="true" stored="true" required="false" />
@ -527,6 +531,7 @@
Some fields are multiValued only because Tika currently may return Some fields are multiValued only because Tika currently may return
multiple values for them. multiple values for them.
--> -->
<!--
<field name="title" type="text_general" indexed="false" stored="false" multiValued="true"/> <field name="title" type="text_general" indexed="false" stored="false" multiValued="true"/>
<field name="subject" type="text_general" indexed="false" stored="false"/> <field name="subject" type="text_general" indexed="false" stored="false"/>
<field name="description" type="text_general" indexed="false" stored="false"/> <field name="description" type="text_general" indexed="false" stored="false"/>
@ -537,13 +542,14 @@
<field name="content_type" type="string" indexed="false" stored="false" multiValued="true"/> <field name="content_type" type="string" indexed="false" stored="false" multiValued="true"/>
<field name="last_modified" type="date" indexed="false" stored="false"/> <field name="last_modified" type="date" indexed="false" stored="false"/>
<field name="links" type="string" indexed="false" stored="false" multiValued="true"/> <field name="links" type="string" indexed="false" stored="false" multiValued="true"/>
-->
<!-- Tika places all metadata into a multivalued field named "meta" --> <!-- Tika places all metadata into a multivalued field named "meta" -->
<field name="meta" type="text_general" indexed="true" stored="true" multiValued="true"/> <!--<field name="meta" type="text_general" indexed="true" stored="true" multiValued="true"/> -->
<!-- catchall field, containing all other searchable text fields (implemented <!-- catchall field, containing all other searchable text fields (implemented
via copyField further on in this schema --> via copyField further on in this schema -->
<field name="text" type="text_general" indexed="true" stored="false" multiValued="true"/> <field name="text" type="text_general" indexed="true" stored="true" termVectors="true" termPositions="true" termOffsets="true" multiValued="true"/>
<!-- catchall text field that indexes tokens both normally and in reverse for efficient <!-- catchall text field that indexes tokens both normally and in reverse for efficient
leading wildcard queries. --> leading wildcard queries. -->
@ -552,7 +558,7 @@
<!-- field with white-space tokenized words for TermsComponent regex search (useful for fast search of IP addresses, URLs, certain phone numbers) <!-- field with white-space tokenized words for TermsComponent regex search (useful for fast search of IP addresses, URLs, certain phone numbers)
also be useful for Lucene based queries containing special characters--> also be useful for Lucene based queries containing special characters-->
<!-- populated via copyField --> <!-- populated via copyField -->
<field name="content_ws" type="text_ws" indexed="true" stored="false" /> <field name="content_ws" type="text_ws" indexed="true" stored="false" multiValued="true" />
<!-- Uncommenting the following will create a "timestamp" field using <!-- Uncommenting the following will create a "timestamp" field using
a default value of "NOW" to indicate when each document was indexed. a default value of "NOW" to indicate when each document was indexed.
@ -569,6 +575,7 @@
EXAMPLE: name="*_i" will match any field ending in _i (like myid_i, z_i) EXAMPLE: name="*_i" will match any field ending in _i (like myid_i, z_i)
Longer patterns will be matched first. if equal size patterns Longer patterns will be matched first. if equal size patterns
both match, the first appearing in the schema will be used. --> both match, the first appearing in the schema will be used. -->
<!--
<dynamicField name="*_i" type="int" indexed="true" stored="true"/> <dynamicField name="*_i" type="int" indexed="true" stored="true"/>
<dynamicField name="*_s" type="string" indexed="true" stored="true"/> <dynamicField name="*_s" type="string" indexed="true" stored="true"/>
<dynamicField name="*_l" type="long" indexed="true" stored="true"/> <dynamicField name="*_l" type="long" indexed="true" stored="true"/>
@ -577,15 +584,19 @@
<dynamicField name="*_b" type="boolean" indexed="true" stored="true"/> <dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
<dynamicField name="*_f" type="float" indexed="true" stored="true"/> <dynamicField name="*_f" type="float" indexed="true" stored="true"/>
<dynamicField name="*_d" type="double" indexed="true" stored="true"/> <dynamicField name="*_d" type="double" indexed="true" stored="true"/>
-->
<!-- Type used to index the lat and lon components for the "location" FieldType --> <!-- Type used to index the lat and lon components for the "location" FieldType -->
<dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false"/> <!--
<dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false"/>
<dynamicField name="*_dt" type="date" indexed="true" stored="true"/> <dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
<dynamicField name="*_p" type="location" indexed="true" stored="true"/> <dynamicField name="*_p" type="location" indexed="true" stored="true"/>
-->
<!-- some trie-coded dynamic fields for faster range queries --> <!-- some trie-coded dynamic fields for faster range queries -->
<dynamicField name="*_ti" type="tint" indexed="true" stored="true"/> <!--
<dynamicField name="*_ti" type="tint" indexed="true" stored="true"/>
<dynamicField name="*_tl" type="tlong" indexed="true" stored="true"/> <dynamicField name="*_tl" type="tlong" indexed="true" stored="true"/>
<dynamicField name="*_tf" type="tfloat" indexed="true" stored="true"/> <dynamicField name="*_tf" type="tfloat" indexed="true" stored="true"/>
<dynamicField name="*_td" type="tdouble" indexed="true" stored="true"/> <dynamicField name="*_td" type="tdouble" indexed="true" stored="true"/>
@ -597,7 +608,8 @@
<dynamicField name="attr_*" type="text_general" indexed="true" stored="true" multiValued="true"/> <dynamicField name="attr_*" type="text_general" indexed="true" stored="true" multiValued="true"/>
<dynamicField name="random_*" type="random" /> <dynamicField name="random_*" type="random" />
-->
<!-- uncomment the following to ignore any fields that don't already match an existing <!-- uncomment the following to ignore any fields that don't already match an existing
field name or dynamic field, rather than reporting them as an error. field name or dynamic field, rather than reporting them as an error.
alternately, change the type="ignored" to some other type e.g. "text" if you want alternately, change the type="ignored" to some other type e.g. "text" if you want
@ -623,9 +635,12 @@
<copyField source="content" dest="text"/> <copyField source="content" dest="text"/>
<copyField source="file_name" dest="text"/> <copyField source="file_name" dest="text"/>
<copyField source="meta" dest="text"/> <!--<copyField source="meta" dest="text"/>-->
<!--<copyField source="strings" dest="text"/>--> <!--<copyField source="strings" dest="text"/>-->
<copyField source="content" dest="content_ws"/> <copyField source="content" dest="content_ws"/>
<copyField source="file_name" dest="content_ws"/>
<!--<copyField source="meta" dest="content_ws"/>-->
<!-- Above, multiple source fields are copied to the [text] field. <!-- Above, multiple source fields are copied to the [text] field.
Another way to map multiple source fields to the same Another way to map multiple source fields to the same