begin to parse termvectors in LuceneQuery

2025-07-16 09:47:42 +00:00 · 2017-01-11 23:56:54 +01:00 · 2017-01-11 23:56:54 +01:00 · c8c8f45687
commit c8c8f45687
parent 64b07cb4d2
1 changed files with 19 additions and 8 deletions
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/LuceneQuery.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/LuceneQuery.java
@ -1,7 +1,7 @@
 /*
 * Autopsy Forensic Browser
 *
- * Copyright 2011-2015 Basis Technology Corp.
+ * Copyright 2011-2016 Basis Technology Corp.
 * Contact: carrier <at> sleuthkit <dot> org
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
@ -20,18 +20,16 @@ package org.sleuthkit.autopsy.keywordsearch;

 import java.util.ArrayList;
 import java.util.Collection;
-import java.util.Collections;
 import java.util.Comparator;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
-import java.util.TreeSet;
 import java.util.logging.Level;
 import org.apache.solr.client.solrj.SolrQuery;
 import org.apache.solr.client.solrj.SolrRequest.METHOD;
 import org.apache.solr.client.solrj.response.QueryResponse;
 import org.apache.solr.common.SolrDocument;
 import org.apache.solr.common.SolrDocumentList;
+import org.apache.solr.common.util.SimpleOrderedMap;
 import org.sleuthkit.autopsy.casemodule.Case;
 import org.sleuthkit.autopsy.coreutils.EscapeUtil;
 import org.sleuthkit.autopsy.coreutils.Logger;
@ -206,6 +204,7 @@ class LuceneQuery implements KeywordSearchQuery {
        response = solrServer.query(q, METHOD.POST);

        resultList = response.getResults();
+        SimpleOrderedMap termVectors = (SimpleOrderedMap) response.getResponse().get("termVectors");

        // objectId_chunk -> "text" -> List of previews
        highlightResponse = response.getHighlighting();
@ -225,8 +224,9 @@ class LuceneQuery implements KeywordSearchQuery {
            }
            for (SolrDocument resultDoc : resultList) {
                KeywordHit contentHit;
+
                try {
-                    contentHit = createKeywordtHit(resultDoc, highlightResponse, sleuthkitCase);
+                    contentHit = createKeywordtHit(resultDoc, highlightResponse, termVectors, sleuthkitCase);
                } catch (TskException ex) {
                    return matches;
                }
@ -247,7 +247,7 @@ class LuceneQuery implements KeywordSearchQuery {
        SolrQuery q = new SolrQuery();
        q.setShowDebugInfo(DEBUG); //debug
        // Wrap the query string in quotes if this is a literal search term.
-        String theQueryStr = keyword.searchTermIsLiteral() 
+        String theQueryStr = keyword.searchTermIsLiteral()
                ? KeywordSearchUtil.quoteQuery(keywordStringEscaped) : keywordStringEscaped;

        // Run the query against an optional alternative field. 
@ -260,12 +260,19 @@ class LuceneQuery implements KeywordSearchQuery {
        q.setQuery(theQueryStr);
        q.setRows(MAX_RESULTS);

-        q.setFields(Server.Schema.ID.toString());
+//        q.setFields(Server.Schema.ID.toString());
        q.addSort(Server.Schema.ID.toString(), SolrQuery.ORDER.asc);
        for (KeywordQueryFilter filter : filters) {
            q.addFilterQuery(filter.toString());
        }

+        q.setRequestHandler("/tvrh");
+        q.setParam("tv", true);
+        if (field != null) {
+            q.setParam("tv.fl", field);
+        }
+        q.setParam("tv.all", true);
+
        if (snippets) {
            q.addHighlightField(Server.Schema.TEXT.toString());
            //q.setHighlightSimplePre("&laquo;"); //original highlighter only
@ -290,12 +297,15 @@ class LuceneQuery implements KeywordSearchQuery {
        return q;
    }

-    private KeywordHit createKeywordtHit(SolrDocument solrDoc, Map<String, Map<String, List<String>>> highlightResponse, SleuthkitCase caseDb) throws TskException {
+    private KeywordHit createKeywordtHit(SolrDocument solrDoc, Map<String, Map<String, List<String>>> highlightResponse, SimpleOrderedMap termVectors, SleuthkitCase caseDb) throws TskException {
        /**
         * Get the first snippet from the document if keyword search is
         * configured to use snippets.
         */
        final String docId = solrDoc.getFieldValue(Server.Schema.ID.toString()).toString();
+
+        SimpleOrderedMap<?> get = (SimpleOrderedMap<?>) termVectors.get(docId);
+
        String snippet = "";
        if (KeywordSearchSettings.getShowSnippets()) {
            List<String> snippetList = highlightResponse.get(docId).get(Server.Schema.TEXT.toString());
@ -304,6 +314,7 @@ class LuceneQuery implements KeywordSearchQuery {
                snippet = EscapeUtil.unEscapeHtml(snippetList.get(0)).trim();
            }
        }
+
        return new KeywordHit(docId, snippet);
    }