mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-06 21:00:22 +00:00
Fixing up files that were broken by an assortment of merge and merge reverts.
This commit is contained in:
parent
5864303f21
commit
680ad710e1
@ -21,6 +21,7 @@
|
||||
<dependency conf="autopsy->*" org="org.apache.solr" name="solr-solrj" rev="4.9.1"/>
|
||||
<dependency conf="autopsy->*" org="commons-lang" name="commons-lang" rev="2.4"/>
|
||||
<dependency conf="autopsy->*" org="commons-validator" name="commons-validator" rev="1.5.1"/>
|
||||
<dependency conf="autopsy->*" org="com.optimaize.languagedetector" name="language-detector" rev="0.6"/>
|
||||
<!-- Exclude the version of cxf-rt-rs-client from Tika 1.20, one of its depedencies breaks Ivy -->
|
||||
<dependency conf="autopsy->*" org="org.apache.tika" name="tika-parsers" rev="1.20">
|
||||
<exclude module="cxf-rt-rs-client"/>
|
||||
|
@ -29,6 +29,7 @@ file.reference.jericho-html-3.3.jar=release/modules/ext/jericho-html-3.3.jar
|
||||
file.reference.joda-time-2.2.jar=release/modules/ext/joda-time-2.2.jar
|
||||
file.reference.json-simple-1.1.1.jar=release/modules/ext/json-simple-1.1.1.jar
|
||||
file.reference.juniversalchardet-1.0.3.jar=release/modules/ext/juniversalchardet-1.0.3.jar
|
||||
file.reference.language-detector-0.6.jar=release\\modules\\ext\\language-detector-0.6.jar
|
||||
file.reference.libsvm-3.1.jar=release/modules/ext/libsvm-3.1.jar
|
||||
file.reference.log4j-1.2.17.jar=release/modules/ext/log4j-1.2.17.jar
|
||||
file.reference.lucene-core-4.0.0.jar=release/modules/ext/lucene-core-4.0.0.jar
|
||||
|
@ -230,10 +230,6 @@
|
||||
<package>org.codehaus.stax2.validation</package>
|
||||
<package>org.noggit</package>
|
||||
<package>org.sleuthkit.autopsy.keywordsearch</package>
|
||||
<package>org.slf4j</package>
|
||||
<package>org.slf4j.event</package>
|
||||
<package>org.slf4j.helpers</package>
|
||||
<package>org.slf4j.spi</package>
|
||||
</public-packages>
|
||||
<class-path-extension>
|
||||
<runtime-relative-path>ext/commons-digester-1.8.1.jar</runtime-relative-path>
|
||||
@ -283,6 +279,10 @@
|
||||
<runtime-relative-path>ext/guava-17.0.jar</runtime-relative-path>
|
||||
<binary-origin>release/modules/ext/guava-17.0.jar</binary-origin>
|
||||
</class-path-extension>
|
||||
<class-path-extension>
|
||||
<runtime-relative-path>ext/language-detector-0.6.jar</runtime-relative-path>
|
||||
<binary-origin>release\modules\ext\language-detector-0.6.jar</binary-origin>
|
||||
</class-path-extension>
|
||||
<class-path-extension>
|
||||
<runtime-relative-path>ext/joda-time-2.2.jar</runtime-relative-path>
|
||||
<binary-origin>release/modules/ext/joda-time-2.2.jar</binary-origin>
|
||||
|
@ -45,7 +45,7 @@
|
||||
that avoids logging every request
|
||||
-->
|
||||
|
||||
<schema name="Autopsy Keyword Search" version="2.1">
|
||||
<schema name="Autopsy Keyword Search" version="2.2">
|
||||
<!-- attribute "name" is the name of this schema and is only used for display purposes.
|
||||
Applications should change this to reflect the nature of the search collection.
|
||||
version="1.4" is Solr's version number for the schema syntax and semantics. It should
|
||||
@ -62,6 +62,7 @@
|
||||
2.0 added chunk_size field
|
||||
2.1 to facilitate case insensitive regex search,no longer copying content into content_str.
|
||||
content_str will be populated with lowercase content by Autopsy.
|
||||
2.2 added text_ja type, content_ja and language fields to support Japanese text search
|
||||
-->
|
||||
|
||||
<types>
|
||||
@ -243,6 +244,18 @@
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<fieldType name="text_ja" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="false">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.JapaneseTokenizerFactory" mode="search"/>
|
||||
<filter class="solr.JapaneseBaseFormFilterFactory"/>
|
||||
<filter class="solr.JapanesePartOfSpeechStopFilterFactory" tags="lang/stoptags_ja.txt" />
|
||||
<filter class="solr.CJKWidthFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ja.txt" />
|
||||
<filter class="solr.JapaneseKatakanaStemFilterFactory" minimumLength="4"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- A text field with defaults appropriate for English: it
|
||||
tokenizes with StandardTokenizer, removes English stop words
|
||||
(stopwords_en.txt), down cases, protects words from protwords.txt, and
|
||||
@ -557,6 +570,11 @@
|
||||
via copyField further on in this schema -->
|
||||
<field name="text" type="text_general" indexed="true" stored="true" termVectors="true" termPositions="true" termOffsets="true" multiValued="true"/>
|
||||
|
||||
<!-- Store language detection result. Only parents of text documents have this -->
|
||||
<field name="language" type="string" indexed="false" stored="true" required="false"/>
|
||||
|
||||
<field name="content_ja" type="text_ja" indexed="true" stored="true" termVectors="true" termPositions="true" termOffsets="true" multiValued="true"/>
|
||||
|
||||
<!-- catchall text field that indexes tokens both normally and in reverse for efficient
|
||||
leading wildcard queries. -->
|
||||
<!--<field name="text_rev" type="text_general_rev" indexed="true" stored="false" multiValued="true"/>-->
|
||||
|
@ -38,6 +38,7 @@ import org.apache.commons.lang3.math.NumberUtils;
|
||||
import org.apache.solr.client.solrj.SolrQuery;
|
||||
import org.apache.solr.client.solrj.SolrRequest.METHOD;
|
||||
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.common.SolrDocumentList;
|
||||
import org.openide.util.NbBundle;
|
||||
import org.sleuthkit.autopsy.coreutils.Logger;
|
||||
@ -346,6 +347,8 @@ class HighlightedText implements IndexedText {
|
||||
String chunkID = "";
|
||||
String highlightField = "";
|
||||
try {
|
||||
double indexSchemaVersion = NumberUtils.toDouble(solrServer.getIndexInfo().getSchemaVersion());
|
||||
|
||||
loadPageInfo(); //inits once
|
||||
SolrQuery q = new SolrQuery();
|
||||
q.setShowDebugInfo(DEBUG); //debug
|
||||
@ -359,22 +362,46 @@ class HighlightedText implements IndexedText {
|
||||
|
||||
highlightField = LuceneQuery.HIGHLIGHT_FIELD;
|
||||
if (isLiteral) {
|
||||
//if the query is literal try to get solr to do the highlighting
|
||||
final String highlightQuery = keywords.stream()
|
||||
.map(HighlightedText::constructEscapedSolrQuery)
|
||||
.collect(Collectors.joining(" "));
|
||||
if (2.2 <= indexSchemaVersion) {
|
||||
//if the query is literal try to get solr to do the highlighting
|
||||
final String highlightQuery = keywords.stream().map(s ->
|
||||
LanguageSpecificContentQueryHelper.expandQueryString(KeywordSearchUtil.quoteQuery(KeywordSearchUtil.escapeLuceneQuery(s))))
|
||||
.collect(Collectors.joining(" OR "));
|
||||
q.setQuery(highlightQuery);
|
||||
for (Server.Schema field : LanguageSpecificContentQueryHelper.getQueryFields()) {
|
||||
q.addField(field.toString());
|
||||
q.addHighlightField(field.toString());
|
||||
}
|
||||
q.addField(Server.Schema.LANGUAGE.toString());
|
||||
// in case of single term literal query there is only 1 term
|
||||
LanguageSpecificContentQueryHelper.configureTermfreqQuery(q, keywords.iterator().next());
|
||||
q.addFilterQuery(filterQuery);
|
||||
q.setHighlightFragsize(0); // don't fragment the highlight, works with original highlighter, or needs "single" list builder with FVH
|
||||
} else {
|
||||
//if the query is literal try to get solr to do the highlighting
|
||||
final String highlightQuery = keywords.stream()
|
||||
.map(HighlightedText::constructEscapedSolrQuery)
|
||||
.collect(Collectors.joining(" "));
|
||||
|
||||
q.setQuery(highlightQuery);
|
||||
q.addField(highlightField);
|
||||
q.addFilterQuery(filterQuery);
|
||||
q.addHighlightField(highlightField);
|
||||
q.setHighlightFragsize(0); // don't fragment the highlight, works with original highlighter, or needs "single" list builder with FVH
|
||||
q.setQuery(highlightQuery);
|
||||
q.addField(highlightField);
|
||||
q.addFilterQuery(filterQuery);
|
||||
q.addHighlightField(highlightField);
|
||||
q.setHighlightFragsize(0); // don't fragment the highlight, works with original highlighter, or needs "single" list builder with FVH
|
||||
}
|
||||
|
||||
//tune the highlighter
|
||||
q.setParam("hl.useFastVectorHighlighter", "on"); //fast highlighter scales better than standard one NON-NLS
|
||||
q.setParam("hl.tag.pre", HIGHLIGHT_PRE); //makes sense for FastVectorHighlighter only NON-NLS
|
||||
q.setParam("hl.tag.post", HIGHLIGHT_POST); //makes sense for FastVectorHighlighter only NON-NLS
|
||||
q.setParam("hl.fragListBuilder", "single"); //makes sense for FastVectorHighlighter only NON-NLS
|
||||
if (shouldUseOriginalHighlighter(contentIdStr)) {
|
||||
// use original highlighter
|
||||
q.setParam("hl.useFastVectorHighlighter", "off");
|
||||
q.setParam("hl.simple.pre", HIGHLIGHT_PRE);
|
||||
q.setParam("hl.simple.post", HIGHLIGHT_POST);
|
||||
} else {
|
||||
q.setParam("hl.useFastVectorHighlighter", "on"); //fast highlighter scales better than standard one NON-NLS
|
||||
q.setParam("hl.tag.pre", HIGHLIGHT_PRE); //makes sense for FastVectorHighlighter only NON-NLS
|
||||
q.setParam("hl.tag.post", HIGHLIGHT_POST); //makes sense for FastVectorHighlighter only NON-NLS
|
||||
q.setParam("hl.fragListBuilder", "single"); //makes sense for FastVectorHighlighter only NON-NLS
|
||||
}
|
||||
|
||||
//docs says makes sense for the original Highlighter only, but not really
|
||||
q.setParam("hl.maxAnalyzedChars", Server.HL_ANALYZE_CHARS_UNLIMITED); //NON-NLS
|
||||
@ -406,12 +433,40 @@ class HighlightedText implements IndexedText {
|
||||
if (responseHighlightID == null) {
|
||||
highlightedContent = attemptManualHighlighting(response.getResults(), highlightField, keywords);
|
||||
} else {
|
||||
List<String> contentHighlights = responseHighlightID.get(LuceneQuery.HIGHLIGHT_FIELD);
|
||||
if (contentHighlights == null) {
|
||||
highlightedContent = attemptManualHighlighting(response.getResults(), highlightField, keywords);
|
||||
SolrDocument document = response.getResults().get(0);
|
||||
Object language = document.getFieldValue(Server.Schema.LANGUAGE.toString());
|
||||
if (2.2 <= indexSchemaVersion && language != null) {
|
||||
List<String> contentHighlights = LanguageSpecificContentQueryHelper.getHighlights(responseHighlightID).orElse(null);
|
||||
if (contentHighlights == null) {
|
||||
highlightedContent = "";
|
||||
} else {
|
||||
int hitCountInMiniChunk = LanguageSpecificContentQueryHelper.queryChunkTermfreq(keywords, MiniChunkHelper.getChunkIdString(contentIdStr));
|
||||
String s = contentHighlights.get(0).trim();
|
||||
// If there is a mini-chunk, trim the content not to show highlighted text in it.
|
||||
if (0 < hitCountInMiniChunk) {
|
||||
int hitCountInChunk = ((Float) document.getFieldValue(Server.Schema.TERMFREQ.toString())).intValue();
|
||||
int idx = LanguageSpecificContentQueryHelper.findNthIndexOf(
|
||||
s,
|
||||
HIGHLIGHT_PRE,
|
||||
// trim after the last hit in chunk
|
||||
hitCountInChunk - hitCountInMiniChunk);
|
||||
if (idx != -1) {
|
||||
highlightedContent = s.substring(0, idx);
|
||||
} else {
|
||||
highlightedContent = s;
|
||||
}
|
||||
} else {
|
||||
highlightedContent = s;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// extracted content (minus highlight tags) is HTML-escaped
|
||||
highlightedContent = contentHighlights.get(0).trim();
|
||||
List<String> contentHighlights = responseHighlightID.get(LuceneQuery.HIGHLIGHT_FIELD);
|
||||
if (contentHighlights == null) {
|
||||
highlightedContent = attemptManualHighlighting(response.getResults(), highlightField, keywords);
|
||||
} else {
|
||||
// extracted content (minus highlight tags) is HTML-escaped
|
||||
highlightedContent = contentHighlights.get(0).trim();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -551,4 +606,37 @@ class HighlightedText implements IndexedText {
|
||||
return buf.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if we should use original highlighter instead of FastVectorHighlighter.
|
||||
*
|
||||
* In the case Japanese text and phrase query, FastVectorHighlighter does not work well.
|
||||
*
|
||||
* Note about highlighters:
|
||||
* If the query is "雨が降る" (phrase query), Solr divides it into 雨 and 降る. が is a stop word here.
|
||||
* It seems that FastVector highlighter does not produce any snippet when there is a stop word between terms.
|
||||
* On the other hand, original highlighter produces multiple matches, for example:
|
||||
* > <em>雨</em>が<em>降っ</em>ています
|
||||
* Unified highlighter (from Solr 6.4) handles the case as expected:
|
||||
* > <em>雨が降っ</em>ています。
|
||||
*/
|
||||
private boolean shouldUseOriginalHighlighter(String contentID) throws NoOpenCoreException, KeywordSearchModuleException {
|
||||
final SolrQuery q = new SolrQuery();
|
||||
q.setQuery("*:*");
|
||||
q.addFilterQuery(Server.Schema.ID.toString() + ":" + contentID);
|
||||
q.setFields(Server.Schema.LANGUAGE.toString());
|
||||
|
||||
QueryResponse response = solrServer.query(q, METHOD.POST);
|
||||
SolrDocumentList solrDocuments = response.getResults();
|
||||
|
||||
if (!solrDocuments.isEmpty()) {
|
||||
SolrDocument solrDocument = solrDocuments.get(0);
|
||||
if (solrDocument != null) {
|
||||
Object languageField = solrDocument.getFieldValue(Server.Schema.LANGUAGE.toString());
|
||||
if (languageField != null) {
|
||||
return languageField.equals("ja");
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -39,7 +39,7 @@ class IndexFinder {
|
||||
private static final String KWS_DATA_FOLDER_NAME = "data";
|
||||
private static final String INDEX_FOLDER_NAME = "index";
|
||||
private static final String CURRENT_SOLR_VERSION = "4";
|
||||
private static final String CURRENT_SOLR_SCHEMA_VERSION = "2.1";
|
||||
private static final String CURRENT_SOLR_SCHEMA_VERSION = "2.2";
|
||||
|
||||
static String getCurrentSolrVersion() {
|
||||
return CURRENT_SOLR_VERSION;
|
||||
|
@ -20,8 +20,10 @@ package org.sleuthkit.autopsy.keywordsearch;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.Reader;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
import java.util.logging.Level;
|
||||
import org.apache.commons.lang3.math.NumberUtils;
|
||||
import org.apache.solr.client.solrj.SolrServerException;
|
||||
@ -59,6 +61,8 @@ class Ingester {
|
||||
private final Server solrServer = KeywordSearch.getServer();
|
||||
private static final SolrFieldsVisitor SOLR_FIELDS_VISITOR = new SolrFieldsVisitor();
|
||||
private static Ingester instance;
|
||||
private final LanguageSpecificContentIndexingHelper languageSpecificContentIndexingHelper
|
||||
= new LanguageSpecificContentIndexingHelper();
|
||||
|
||||
private Ingester() {
|
||||
}
|
||||
@ -93,7 +97,7 @@ class Ingester {
|
||||
* file, but the Solr server is probably fine.
|
||||
*/
|
||||
void indexMetaDataOnly(AbstractFile file) throws IngesterException {
|
||||
indexChunk("", file.getName().toLowerCase(), getContentFields(file));
|
||||
indexChunk("", file.getName().toLowerCase(), new HashMap<>(getContentFields(file)));
|
||||
}
|
||||
|
||||
/**
|
||||
@ -107,7 +111,7 @@ class Ingester {
|
||||
* artifact, but the Solr server is probably fine.
|
||||
*/
|
||||
void indexMetaDataOnly(BlackboardArtifact artifact, String sourceName) throws IngesterException {
|
||||
indexChunk("", sourceName, getContentFields(artifact));
|
||||
indexChunk("", sourceName, new HashMap<>(getContentFields(artifact)));
|
||||
}
|
||||
|
||||
/**
|
||||
@ -143,21 +147,30 @@ class Ingester {
|
||||
< T extends SleuthkitVisitableItem> boolean indexText(Reader sourceReader, long sourceID, String sourceName, T source, IngestJobContext context) throws Ingester.IngesterException {
|
||||
int numChunks = 0; //unknown until chunking is done
|
||||
|
||||
Map<String, String> fields = getContentFields(source);
|
||||
Map<String, String> contentFields = Collections.unmodifiableMap(getContentFields(source));
|
||||
//Get a reader for the content of the given source
|
||||
try (BufferedReader reader = new BufferedReader(sourceReader)) {
|
||||
Chunker chunker = new Chunker(reader);
|
||||
for (Chunk chunk : chunker) {
|
||||
while (chunker.hasNext()) {
|
||||
if (context != null && context.fileIngestIsCancelled()) {
|
||||
logger.log(Level.INFO, "File ingest cancelled. Cancelling keyword search indexing of {0}", sourceName);
|
||||
return false;
|
||||
}
|
||||
|
||||
Chunk chunk = chunker.next();
|
||||
Map<String, Object> fields = new HashMap<>(contentFields);
|
||||
String chunkId = Server.getChunkIdString(sourceID, numChunks + 1);
|
||||
fields.put(Server.Schema.ID.toString(), chunkId);
|
||||
fields.put(Server.Schema.CHUNK_SIZE.toString(), String.valueOf(chunk.getBaseChunkLength()));
|
||||
Optional<Language> language = languageSpecificContentIndexingHelper.detectLanguageIfNeeded(chunk);
|
||||
language.ifPresent(lang -> languageSpecificContentIndexingHelper.updateLanguageSpecificFields(fields, chunk, lang));
|
||||
try {
|
||||
//add the chunk text to Solr index
|
||||
indexChunk(chunk.toString(), sourceName, fields);
|
||||
// add mini chunk when there's a language specific field
|
||||
if (chunker.hasNext() && language.isPresent()) {
|
||||
languageSpecificContentIndexingHelper.indexMiniChunk(chunk, sourceName, new HashMap<>(contentFields), chunkId, language.get());
|
||||
}
|
||||
numChunks++;
|
||||
} catch (Ingester.IngesterException ingEx) {
|
||||
logger.log(Level.WARNING, "Ingester had a problem with extracted string from file '" //NON-NLS
|
||||
@ -171,12 +184,13 @@ class Ingester {
|
||||
return false;
|
||||
}
|
||||
} catch (Exception ex) {
|
||||
logger.log(Level.WARNING, "Unexpected error while indexing content from " + sourceID + ": " + sourceName, ex);//NON-NLS
|
||||
logger.log(Level.WARNING, "Unexpected error, can't read content stream from " + sourceID + ": " + sourceName, ex);//NON-NLS
|
||||
return false;
|
||||
} finally {
|
||||
if (context != null && context.fileIngestIsCancelled()) {
|
||||
return false;
|
||||
} else {
|
||||
Map<String, Object> fields = new HashMap<>(contentFields);
|
||||
//after all chunks, index just the meta data, including the numChunks, of the parent file
|
||||
fields.put(Server.Schema.NUM_CHUNKS.toString(), Integer.toString(numChunks));
|
||||
//reset id field to base document id
|
||||
@ -202,7 +216,7 @@ class Ingester {
|
||||
*
|
||||
* @throws org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException
|
||||
*/
|
||||
private void indexChunk(String chunk, String sourceName, Map<String, String> fields) throws IngesterException {
|
||||
private void indexChunk(String chunk, String sourceName, Map<String, Object> fields) throws IngesterException {
|
||||
if (fields.get(Server.Schema.IMAGE_ID.toString()) == null) {
|
||||
//JMTODO: actually if the we couldn't get the image id it is set to -1,
|
||||
// but does this really mean we don't want to index it?
|
||||
|
@ -134,6 +134,7 @@ class LuceneQuery implements KeywordSearchQuery {
|
||||
String cursorMark = CursorMarkParams.CURSOR_MARK_START;
|
||||
boolean allResultsProcessed = false;
|
||||
List<KeywordHit> matches = new ArrayList<>();
|
||||
LanguageSpecificContentQueryHelper.QueryResults languageSpecificQueryResults = new LanguageSpecificContentQueryHelper.QueryResults();
|
||||
while (!allResultsProcessed) {
|
||||
solrQuery.set(CursorMarkParams.CURSOR_MARK_PARAM, cursorMark);
|
||||
QueryResponse response = solrServer.query(solrQuery, SolrRequest.METHOD.POST);
|
||||
@ -141,7 +142,18 @@ class LuceneQuery implements KeywordSearchQuery {
|
||||
// objectId_chunk -> "text" -> List of previews
|
||||
Map<String, Map<String, List<String>>> highlightResponse = response.getHighlighting();
|
||||
|
||||
if (2.2 <= indexSchemaVersion) {
|
||||
languageSpecificQueryResults.highlighting.putAll(response.getHighlighting());
|
||||
}
|
||||
|
||||
for (SolrDocument resultDoc : resultList) {
|
||||
if (2.2 <= indexSchemaVersion) {
|
||||
Object language = resultDoc.getFieldValue(Server.Schema.LANGUAGE.toString());
|
||||
if (language != null) {
|
||||
LanguageSpecificContentQueryHelper.updateQueryResults(languageSpecificQueryResults, resultDoc);
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
/*
|
||||
* for each result doc, check that the first occurence of
|
||||
@ -153,6 +165,11 @@ class LuceneQuery implements KeywordSearchQuery {
|
||||
final Integer chunkSize = (Integer) resultDoc.getFieldValue(Server.Schema.CHUNK_SIZE.toString());
|
||||
final Collection<Object> content = resultDoc.getFieldValues(Server.Schema.CONTENT_STR.toString());
|
||||
|
||||
// if the document has language, it should be hit in language specific content fields. So skip here.
|
||||
if (resultDoc.containsKey(Server.Schema.LANGUAGE.toString())) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (indexSchemaVersion < 2.0) {
|
||||
//old schema versions don't support chunk_size or the content_str fields, so just accept hits
|
||||
matches.add(createKeywordtHit(highlightResponse, docId));
|
||||
@ -179,9 +196,16 @@ class LuceneQuery implements KeywordSearchQuery {
|
||||
cursorMark = nextCursorMark;
|
||||
}
|
||||
|
||||
List<KeywordHit> mergedMatches;
|
||||
if (2.2 <= indexSchemaVersion) {
|
||||
mergedMatches = LanguageSpecificContentQueryHelper.mergeKeywordHits(matches, originalKeyword, languageSpecificQueryResults);
|
||||
} else {
|
||||
mergedMatches = matches;
|
||||
}
|
||||
|
||||
QueryResults results = new QueryResults(this);
|
||||
//in case of single term literal query there is only 1 term
|
||||
results.addResult(new Keyword(originalKeyword.getSearchTerm(), true, true, originalKeyword.getListName(), originalKeyword.getOriginalTerm()), matches);
|
||||
results.addResult(new Keyword(originalKeyword.getSearchTerm(), true, true, originalKeyword.getListName(), originalKeyword.getOriginalTerm()), mergedMatches);
|
||||
|
||||
return results;
|
||||
}
|
||||
@ -262,19 +286,25 @@ class LuceneQuery implements KeywordSearchQuery {
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
private SolrQuery createAndConfigureSolrQuery(boolean snippets) {
|
||||
private SolrQuery createAndConfigureSolrQuery(boolean snippets) throws NoOpenCoreException, KeywordSearchModuleException {
|
||||
double indexSchemaVersion = NumberUtils.toDouble(KeywordSearch.getServer().getIndexInfo().getSchemaVersion());
|
||||
|
||||
SolrQuery q = new SolrQuery();
|
||||
q.setShowDebugInfo(DEBUG); //debug
|
||||
// Wrap the query string in quotes if this is a literal search term.
|
||||
String queryStr = originalKeyword.searchTermIsLiteral()
|
||||
? KeywordSearchUtil.quoteQuery(keywordStringEscaped) : keywordStringEscaped;
|
||||
? KeywordSearchUtil.quoteQuery(keywordStringEscaped) : keywordStringEscaped;
|
||||
|
||||
// Run the query against an optional alternative field.
|
||||
if (field != null) {
|
||||
//use the optional field
|
||||
queryStr = field + ":" + queryStr;
|
||||
q.setQuery(queryStr);
|
||||
} else if (2.2 <= indexSchemaVersion && originalKeyword.searchTermIsLiteral()) {
|
||||
q.setQuery(LanguageSpecificContentQueryHelper.expandQueryString(queryStr));
|
||||
} else {
|
||||
q.setQuery(queryStr);
|
||||
}
|
||||
q.setQuery(queryStr);
|
||||
q.setRows(MAX_RESULTS_PER_CURSOR_MARK);
|
||||
// Setting the sort order is necessary for cursor based paging to work.
|
||||
q.setSort(SolrQuery.SortClause.asc(Server.Schema.ID.toString()));
|
||||
@ -283,6 +313,11 @@ class LuceneQuery implements KeywordSearchQuery {
|
||||
Server.Schema.CHUNK_SIZE.toString(),
|
||||
Server.Schema.CONTENT_STR.toString());
|
||||
|
||||
if (2.2 <= indexSchemaVersion && originalKeyword.searchTermIsLiteral()) {
|
||||
q.addField(Server.Schema.LANGUAGE.toString());
|
||||
LanguageSpecificContentQueryHelper.configureTermfreqQuery(q, keywordStringEscaped);
|
||||
}
|
||||
|
||||
for (KeywordQueryFilter filter : filters) {
|
||||
q.addFilterQuery(filter.toString());
|
||||
}
|
||||
@ -300,8 +335,16 @@ class LuceneQuery implements KeywordSearchQuery {
|
||||
*
|
||||
* @param q The SolrQuery to configure.
|
||||
*/
|
||||
private static void configurwQueryForHighlighting(SolrQuery q) {
|
||||
q.addHighlightField(HIGHLIGHT_FIELD);
|
||||
private static void configurwQueryForHighlighting(SolrQuery q) throws NoOpenCoreException {
|
||||
double indexSchemaVersion = NumberUtils.toDouble(KeywordSearch.getServer().getIndexInfo().getSchemaVersion());
|
||||
if (2.2 <= indexSchemaVersion) {
|
||||
for (Server.Schema field : LanguageSpecificContentQueryHelper.getQueryFields()) {
|
||||
q.addHighlightField(field.toString());
|
||||
}
|
||||
} else {
|
||||
q.addHighlightField(HIGHLIGHT_FIELD);
|
||||
}
|
||||
|
||||
q.setHighlightSnippets(1);
|
||||
q.setHighlightFragsize(SNIPPET_LENGTH);
|
||||
|
||||
@ -404,7 +447,13 @@ class LuceneQuery implements KeywordSearchQuery {
|
||||
if (responseHighlightID == null) {
|
||||
return "";
|
||||
}
|
||||
List<String> contentHighlights = responseHighlightID.get(LuceneQuery.HIGHLIGHT_FIELD);
|
||||
double indexSchemaVersion = NumberUtils.toDouble(solrServer.getIndexInfo().getSchemaVersion());
|
||||
List<String> contentHighlights;
|
||||
if (2.2 <= indexSchemaVersion) {
|
||||
contentHighlights = LanguageSpecificContentQueryHelper.getHighlights(responseHighlightID).orElse(null);
|
||||
} else {
|
||||
contentHighlights = responseHighlightID.get(LuceneQuery.HIGHLIGHT_FIELD);
|
||||
}
|
||||
if (contentHighlights == null) {
|
||||
return "";
|
||||
} else {
|
||||
|
@ -130,6 +130,18 @@ public class Server {
|
||||
return "content_ws"; //NON-NLS
|
||||
}
|
||||
},
|
||||
CONTENT_JA {
|
||||
@Override
|
||||
public String toString() {
|
||||
return "content_ja"; //NON-NLS
|
||||
}
|
||||
},
|
||||
LANGUAGE {
|
||||
@Override
|
||||
public String toString() {
|
||||
return "language"; //NON-NLS
|
||||
}
|
||||
},
|
||||
FILE_NAME {
|
||||
@Override
|
||||
public String toString() {
|
||||
@ -175,7 +187,18 @@ public class Server {
|
||||
public String toString() {
|
||||
return "chunk_size"; //NON-NLS
|
||||
}
|
||||
}
|
||||
},
|
||||
/**
|
||||
* termfreq is a function which returns the number of times the term appears.
|
||||
* This is not an actual field defined in schema.xml, but can be gotten from returned documents
|
||||
* in the same way as fields.
|
||||
*/
|
||||
TERMFREQ {
|
||||
@Override
|
||||
public String toString() {
|
||||
return "termfreq"; //NON-NLS
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
public static final String HL_ANALYZE_CHARS_UNLIMITED = "500000"; //max 1MB in a chunk. use -1 for unlimited, but -1 option may not be supported (not documented)
|
||||
|
Loading…
x
Reference in New Issue
Block a user