diff --git a/Core/src/org/sleuthkit/autopsy/modules/encryptiondetection/Bundle.properties-MERGED b/Core/src/org/sleuthkit/autopsy/modules/encryptiondetection/Bundle.properties-MERGED index 36198317db..905c5b3d84 100755 --- a/Core/src/org/sleuthkit/autopsy/modules/encryptiondetection/Bundle.properties-MERGED +++ b/Core/src/org/sleuthkit/autopsy/modules/encryptiondetection/Bundle.properties-MERGED @@ -1,7 +1,6 @@ EncryptionDetectionDataSourceIngestModule.artifactComment.bitlocker=Bitlocker encryption detected. EncryptionDetectionDataSourceIngestModule.artifactComment.suspected=Suspected encryption due to high entropy (%f). EncryptionDetectionDataSourceIngestModule.processing.message=Checking image for encryption. -EncryptionDetectionFileIngestModule.artifactComment.location=High entropy and known location/extension. EncryptionDetectionFileIngestModule.artifactComment.password=Password protection detected. EncryptionDetectionFileIngestModule.artifactComment.suspected=Suspected encryption due to high entropy (%f). EncryptionDetectionFileIngestModule.getDesc.text=Looks for files with the specified minimum entropy. diff --git a/Core/src/org/sleuthkit/autopsy/modules/encryptiondetection/EncryptionDetectionFileIngestModule.java b/Core/src/org/sleuthkit/autopsy/modules/encryptiondetection/EncryptionDetectionFileIngestModule.java index 20252d4eb4..9afafb7831 100644 --- a/Core/src/org/sleuthkit/autopsy/modules/encryptiondetection/EncryptionDetectionFileIngestModule.java +++ b/Core/src/org/sleuthkit/autopsy/modules/encryptiondetection/EncryptionDetectionFileIngestModule.java @@ -29,8 +29,6 @@ import java.io.BufferedInputStream; import java.io.IOException; import java.io.InputStream; import java.nio.BufferUnderflowException; -import java.util.HashMap; -import java.util.Map; import java.util.logging.Level; import org.apache.tika.exception.EncryptedDocumentException; import org.apache.tika.exception.TikaException; @@ -77,11 +75,6 @@ final class EncryptionDetectionFileIngestModule extends FileIngestModuleAdapter private static final String MIME_TYPE_PDF = "application/pdf"; private static final String[] FILE_IGNORE_LIST = {"hiberfile.sys", "pagefile.sys"}; - - /** - * This maps file locations to file extensions that are known to be encrypted - */ - private static final Map knownEncryptedLocationExtensions = createLocationExtensionMap(); private final IngestServices services = IngestServices.getInstance(); private final Logger logger = services.getLogger(EncryptionDetectionModuleFactory.getModuleName()); @@ -113,7 +106,7 @@ final class EncryptionDetectionFileIngestModule extends FileIngestModuleAdapter public void startUp(IngestJobContext context) throws IngestModule.IngestModuleException { try { validateSettings(); - this.context = context; + this.context = context; blackboard = Case.getCurrentCaseThrows().getSleuthkitCase().getBlackboard(); fileTypeDetector = new FileTypeDetector(); @@ -126,7 +119,6 @@ final class EncryptionDetectionFileIngestModule extends FileIngestModuleAdapter @Messages({ "EncryptionDetectionFileIngestModule.artifactComment.password=Password protection detected.", - "EncryptionDetectionFileIngestModule.artifactComment.location=High entropy and known location/extension.", "EncryptionDetectionFileIngestModule.artifactComment.suspected=Suspected encryption due to high entropy (%f)." }) @Override @@ -138,12 +130,12 @@ final class EncryptionDetectionFileIngestModule extends FileIngestModuleAdapter * verify the file hasn't been deleted. */ if (!file.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS) - && !file.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.UNUSED_BLOCKS) - && !file.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.VIRTUAL_DIR) - && !file.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.LOCAL_DIR) - && (!file.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.SLACK) || slackFilesAllowed) - && !file.getKnown().equals(TskData.FileKnown.KNOWN) - && !file.isMetaFlagSet(TskData.TSK_FS_META_FLAG_ENUM.UNALLOC)) { + && !file.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.UNUSED_BLOCKS) + && !file.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.VIRTUAL_DIR) + && !file.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.LOCAL_DIR) + && (!file.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.SLACK) || slackFilesAllowed) + && !file.getKnown().equals(TskData.FileKnown.KNOWN) + && !file.isMetaFlagSet(TskData.TSK_FS_META_FLAG_ENUM.UNALLOC)) { /* * Is the file in FILE_IGNORE_LIST? */ @@ -163,9 +155,6 @@ final class EncryptionDetectionFileIngestModule extends FileIngestModuleAdapter */ String mimeType = fileTypeDetector.getMIMEType(file); if (mimeType.equals("application/octet-stream") && isFileEncryptionSuspected(file)) { - if (checkFileLocationExtension(file)) { - return flagFile(file, BlackboardArtifact.ARTIFACT_TYPE.TSK_ENCRYPTION_DETECTED, Bundle.EncryptionDetectionFileIngestModule_artifactComment_location()); - } return flagFile(file, BlackboardArtifact.ARTIFACT_TYPE.TSK_ENCRYPTION_SUSPECTED, String.format(Bundle.EncryptionDetectionFileIngestModule_artifactComment_suspected(), calculatedEntropy)); } else if (isFilePasswordProtected(file)) { @@ -209,7 +198,7 @@ final class EncryptionDetectionFileIngestModule extends FileIngestModuleAdapter if (context.fileIngestIsCancelled()) { return IngestModule.ProcessResult.OK; } - + BlackboardArtifact artifact = file.newArtifact(artifactType); artifact.addAttribute(new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_COMMENT, EncryptionDetectionModuleFactory.getModuleName(), comment)); @@ -336,7 +325,7 @@ final class EncryptionDetectionFileIngestModule extends FileIngestModuleAdapter try { accessDatabase = databaseBuilder.open(); } catch (IOException | BufferUnderflowException | IndexOutOfBoundsException ignored) { - return passwordProtected; + return passwordProtected; } /* * No exception has been thrown at this point, so the file @@ -417,36 +406,4 @@ final class EncryptionDetectionFileIngestModule extends FileIngestModuleAdapter return possiblyEncrypted; } - - /** - * This method checks if the AbstractFile input is in a location that is - * known to hold encrypted files. It must meet the requirements and location - * of known encrypted file(s) - * - * @param file AbstractFile to be checked. - * - * @return True if file extension and location match known values. - * - */ - private boolean checkFileLocationExtension(AbstractFile file) { - String filePath = file.getParentPath().replace("/", ""); - if ((knownEncryptedLocationExtensions.containsKey(filePath)) - && (knownEncryptedLocationExtensions.get(filePath).equals(file.getNameExtension()))) - { - return true; - } - return false; - } - - /* - * This method creates the map of paths and extensions that are known to - * have encrypted files - * - * @return Map of path and extension of files - */ - private static Map createLocationExtensionMap() { - Map locationExtensionMap = new HashMap(); - locationExtensionMap.put(".android_secure", "asec"); - return locationExtensionMap; - } } diff --git a/KeywordSearch/ivy.xml b/KeywordSearch/ivy.xml index 9f39e52d7c..2d7bb8b4a5 100644 --- a/KeywordSearch/ivy.xml +++ b/KeywordSearch/ivy.xml @@ -21,7 +21,6 @@ - diff --git a/KeywordSearch/nbproject/project.properties b/KeywordSearch/nbproject/project.properties index 86b4519538..1041bdd524 100644 --- a/KeywordSearch/nbproject/project.properties +++ b/KeywordSearch/nbproject/project.properties @@ -29,7 +29,6 @@ file.reference.jericho-html-3.3.jar=release/modules/ext/jericho-html-3.3.jar file.reference.joda-time-2.2.jar=release/modules/ext/joda-time-2.2.jar file.reference.json-simple-1.1.1.jar=release/modules/ext/json-simple-1.1.1.jar file.reference.juniversalchardet-1.0.3.jar=release/modules/ext/juniversalchardet-1.0.3.jar -file.reference.language-detector-0.6.jar=release/modules/ext/language-detector-0.6.jar file.reference.libsvm-3.1.jar=release/modules/ext/libsvm-3.1.jar file.reference.log4j-1.2.17.jar=release/modules/ext/log4j-1.2.17.jar file.reference.lucene-core-4.0.0.jar=release/modules/ext/lucene-core-4.0.0.jar diff --git a/KeywordSearch/nbproject/project.xml b/KeywordSearch/nbproject/project.xml index 96bbdef0c5..3f40ab3ace 100644 --- a/KeywordSearch/nbproject/project.xml +++ b/KeywordSearch/nbproject/project.xml @@ -467,10 +467,6 @@ ext/vorbis-java-tika-0.8.jar release/modules/ext/vorbis-java-tika-0.8.jar - - ext/language-detector-0.6.jar - release/modules/ext/language-detector-0.6.jar - diff --git a/KeywordSearch/solr/solr/configsets/AutopsyConfig/conf/lang/stoptags_ja.txt b/KeywordSearch/solr/solr/configsets/AutopsyConfig/conf/lang/stoptags_ja.txt deleted file mode 100644 index 71b750845e..0000000000 --- a/KeywordSearch/solr/solr/configsets/AutopsyConfig/conf/lang/stoptags_ja.txt +++ /dev/null @@ -1,420 +0,0 @@ -# -# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter. -# -# Any token with a part-of-speech tag that exactly matches those defined in this -# file are removed from the token stream. -# -# Set your own stoptags by uncommenting the lines below. Note that comments are -# not allowed on the same line as a stoptag. See LUCENE-3745 for frequency lists, -# etc. that can be useful for building you own stoptag set. -# -# The entire possible tagset is provided below for convenience. -# -##### -# noun: unclassified nouns -#名詞 -# -# noun-common: Common nouns or nouns where the sub-classification is undefined -#名詞-一般 -# -# noun-proper: Proper nouns where the sub-classification is undefined -#名詞-固有名詞 -# -# noun-proper-misc: miscellaneous proper nouns -#名詞-固有名詞-一般 -# -# noun-proper-person: Personal names where the sub-classification is undefined -#名詞-固有名詞-人名 -# -# noun-proper-person-misc: names that cannot be divided into surname and -# given name; foreign names; names where the surname or given name is unknown. -# e.g. お市の方 -#名詞-固有名詞-人名-一般 -# -# noun-proper-person-surname: Mainly Japanese surnames. -# e.g. 山田 -#名詞-固有名詞-人名-姓 -# -# noun-proper-person-given_name: Mainly Japanese given names. -# e.g. 太郎 -#名詞-固有名詞-人名-名 -# -# noun-proper-organization: Names representing organizations. -# e.g. 通産省, NHK -#名詞-固有名詞-組織 -# -# noun-proper-place: Place names where the sub-classification is undefined -#名詞-固有名詞-地域 -# -# noun-proper-place-misc: Place names excluding countries. -# e.g. アジア, バルセロナ, 京都 -#名詞-固有名詞-地域-一般 -# -# noun-proper-place-country: Country names. -# e.g. 日本, オーストラリア -#名詞-固有名詞-地域-国 -# -# noun-pronoun: Pronouns where the sub-classification is undefined -#名詞-代名詞 -# -# noun-pronoun-misc: miscellaneous pronouns: -# e.g. それ, ここ, あいつ, あなた, あちこち, いくつ, どこか, なに, みなさん, みんな, わたくし, われわれ -#名詞-代名詞-一般 -# -# noun-pronoun-contraction: Spoken language contraction made by combining a -# pronoun and the particle 'wa'. -# e.g. ありゃ, こりゃ, こりゃあ, そりゃ, そりゃあ -#名詞-代名詞-縮約 -# -# noun-adverbial: Temporal nouns such as names of days or months that behave -# like adverbs. Nouns that represent amount or ratios and can be used adverbially, -# e.g. 金曜, 一月, 午後, 少量 -#名詞-副詞可能 -# -# noun-verbal: Nouns that take arguments with case and can appear followed by -# 'suru' and related verbs (する, できる, なさる, くださる) -# e.g. インプット, 愛着, 悪化, 悪戦苦闘, 一安心, 下取り -#名詞-サ変接続 -# -# noun-adjective-base: The base form of adjectives, words that appear before な ("na") -# e.g. 健康, 安易, 駄目, だめ -#名詞-形容動詞語幹 -# -# noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), 数. -# e.g. 0, 1, 2, 何, 数, 幾 -#名詞-数 -# -# noun-affix: noun affixes where the sub-classification is undefined -#名詞-非自立 -# -# noun-affix-misc: Of adnominalizers, the case-marker の ("no"), and words that -# attach to the base form of inflectional words, words that cannot be classified -# into any of the other categories below. This category includes indefinite nouns. -# e.g. あかつき, 暁, かい, 甲斐, 気, きらい, 嫌い, くせ, 癖, こと, 事, ごと, 毎, しだい, 次第, -# 順, せい, 所為, ついで, 序で, つもり, 積もり, 点, どころ, の, はず, 筈, はずみ, 弾み, -# 拍子, ふう, ふり, 振り, ほう, 方, 旨, もの, 物, 者, ゆえ, 故, ゆえん, 所以, わけ, 訳, -# わり, 割り, 割, ん-口語/, もん-口語/ -#名詞-非自立-一般 -# -# noun-affix-adverbial: noun affixes that that can behave as adverbs. -# e.g. あいだ, 間, あげく, 挙げ句, あと, 後, 余り, 以外, 以降, 以後, 以上, 以前, 一方, うえ, -# 上, うち, 内, おり, 折り, かぎり, 限り, きり, っきり, 結果, ころ, 頃, さい, 際, 最中, さなか, -# 最中, じたい, 自体, たび, 度, ため, 為, つど, 都度, とおり, 通り, とき, 時, ところ, 所, -# とたん, 途端, なか, 中, のち, 後, ばあい, 場合, 日, ぶん, 分, ほか, 他, まえ, 前, まま, -# 儘, 侭, みぎり, 矢先 -#名詞-非自立-副詞可能 -# -# noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars -# with the stem よう(だ) ("you(da)"). -# e.g. よう, やう, 様 (よう) -#名詞-非自立-助動詞語幹 -# -# noun-affix-adjective-base: noun affixes that can connect to the indeclinable -# connection form な (aux "da"). -# e.g. みたい, ふう -#名詞-非自立-形容動詞語幹 -# -# noun-special: special nouns where the sub-classification is undefined. -#名詞-特殊 -# -# noun-special-aux: The そうだ ("souda") stem form that is used for reporting news, is -# treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base -# form of inflectional words. -# e.g. そう -#名詞-特殊-助動詞語幹 -# -# noun-suffix: noun suffixes where the sub-classification is undefined. -#名詞-接尾 -# -# noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect -# to ガル or タイ and can combine into compound nouns, words that cannot be classified into -# any of the other categories below. In general, this category is more inclusive than -# 接尾語 ("suffix") and is usually the last element in a compound noun. -# e.g. おき, かた, 方, 甲斐 (がい), がかり, ぎみ, 気味, ぐるみ, (~した) さ, 次第, 済 (ず) み, -# よう, (でき)っこ, 感, 観, 性, 学, 類, 面, 用 -#名詞-接尾-一般 -# -# noun-suffix-person: Suffixes that form nouns and attach to person names more often -# than other nouns. -# e.g. 君, 様, 著 -#名詞-接尾-人名 -# -# noun-suffix-place: Suffixes that form nouns and attach to place names more often -# than other nouns. -# e.g. 町, 市, 県 -#名詞-接尾-地域 -# -# noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that -# can appear before スル ("suru"). -# e.g. 化, 視, 分け, 入り, 落ち, 買い -#名詞-接尾-サ変接続 -# -# noun-suffix-aux: The stem form of そうだ (様態) that is used to indicate conditions, -# is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the -# conjunctive form of inflectional words. -# e.g. そう -#名詞-接尾-助動詞語幹 -# -# noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive -# form of inflectional words and appear before the copula だ ("da"). -# e.g. 的, げ, がち -#名詞-接尾-形容動詞語幹 -# -# noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs. -# e.g. 後 (ご), 以後, 以降, 以前, 前後, 中, 末, 上, 時 (じ) -#名詞-接尾-副詞可能 -# -# noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category -# is more inclusive than 助数詞 ("classifier") and includes common nouns that attach -# to numbers. -# e.g. 個, つ, 本, 冊, パーセント, cm, kg, カ月, か国, 区画, 時間, 時半 -#名詞-接尾-助数詞 -# -# noun-suffix-special: Special suffixes that mainly attach to inflecting words. -# e.g. (楽し) さ, (考え) 方 -#名詞-接尾-特殊 -# -# noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words -# together. -# e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) 兼 (主婦) -#名詞-接続詞的 -# -# noun-verbal_aux: Nouns that attach to the conjunctive particle て ("te") and are -# semantically verb-like. -# e.g. ごらん, ご覧, 御覧, 頂戴 -#名詞-動詞非自立的 -# -# noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry, -# dialects, English, etc. Currently, the only entry for 名詞 引用文字列 ("noun quotation") -# is いわく ("iwaku"). -#名詞-引用文字列 -# -# noun-nai_adjective: Words that appear before the auxiliary verb ない ("nai") and -# behave like an adjective. -# e.g. 申し訳, 仕方, とんでも, 違い -#名詞-ナイ形容詞語幹 -# -##### -# prefix: unclassified prefixes -#接頭詞 -# -# prefix-nominal: Prefixes that attach to nouns (including adjective stem forms) -# excluding numerical expressions. -# e.g. お (水), 某 (氏), 同 (社), 故 (~氏), 高 (品質), お (見事), ご (立派) -#接頭詞-名詞接続 -# -# prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb -# in conjunctive form followed by なる/なさる/くださる. -# e.g. お (読みなさい), お (座り) -#接頭詞-動詞接続 -# -# prefix-adjectival: Prefixes that attach to adjectives. -# e.g. お (寒いですねえ), バカ (でかい) -#接頭詞-形容詞接続 -# -# prefix-numerical: Prefixes that attach to numerical expressions. -# e.g. 約, およそ, 毎時 -#接頭詞-数接続 -# -##### -# verb: unclassified verbs -#動詞 -# -# verb-main: -#動詞-自立 -# -# verb-auxiliary: -#動詞-非自立 -# -# verb-suffix: -#動詞-接尾 -# -##### -# adjective: unclassified adjectives -#形容詞 -# -# adjective-main: -#形容詞-自立 -# -# adjective-auxiliary: -#形容詞-非自立 -# -# adjective-suffix: -#形容詞-接尾 -# -##### -# adverb: unclassified adverbs -#副詞 -# -# adverb-misc: Words that can be segmented into one unit and where adnominal -# modification is not possible. -# e.g. あいかわらず, 多分 -#副詞-一般 -# -# adverb-particle_conjunction: Adverbs that can be followed by の, は, に, -# な, する, だ, etc. -# e.g. こんなに, そんなに, あんなに, なにか, なんでも -#副詞-助詞類接続 -# -##### -# adnominal: Words that only have noun-modifying forms. -# e.g. この, その, あの, どの, いわゆる, なんらかの, 何らかの, いろんな, こういう, そういう, ああいう, -# どういう, こんな, そんな, あんな, どんな, 大きな, 小さな, おかしな, ほんの, たいした, -# 「(, も) さる (ことながら)」, 微々たる, 堂々たる, 単なる, いかなる, 我が」「同じ, 亡き -#連体詞 -# -##### -# conjunction: Conjunctions that can occur independently. -# e.g. が, けれども, そして, じゃあ, それどころか -接続詞 -# -##### -# particle: unclassified particles. -助詞 -# -# particle-case: case particles where the subclassification is undefined. -助詞-格助詞 -# -# particle-case-misc: Case particles. -# e.g. から, が, で, と, に, へ, より, を, の, にて -助詞-格助詞-一般 -# -# particle-case-quote: the "to" that appears after nouns, a person’s speech, -# quotation marks, expressions of decisions from a meeting, reasons, judgements, -# conjectures, etc. -# e.g. ( だ) と (述べた.), ( である) と (して執行猶予...) -助詞-格助詞-引用 -# -# particle-case-compound: Compounds of particles and verbs that mainly behave -# like case particles. -# e.g. という, といった, とかいう, として, とともに, と共に, でもって, にあたって, に当たって, に当って, -# にあたり, に当たり, に当り, に当たる, にあたる, において, に於いて,に於て, における, に於ける, -# にかけ, にかけて, にかんし, に関し, にかんして, に関して, にかんする, に関する, に際し, -# に際して, にしたがい, に従い, に従う, にしたがって, に従って, にたいし, に対し, にたいして, -# に対して, にたいする, に対する, について, につき, につけ, につけて, につれ, につれて, にとって, -# にとり, にまつわる, によって, に依って, に因って, により, に依り, に因り, による, に依る, に因る, -# にわたって, にわたる, をもって, を以って, を通じ, を通じて, を通して, をめぐって, をめぐり, をめぐる, -# って-口語/, ちゅう-関西弁「という」/, (何) ていう (人)-口語/, っていう-口語/, といふ, とかいふ -助詞-格助詞-連語 -# -# particle-conjunctive: -# e.g. から, からには, が, けれど, けれども, けど, し, つつ, て, で, と, ところが, どころか, とも, ども, -# ながら, なり, ので, のに, ば, ものの, や ( した), やいなや, (ころん) じゃ(いけない)-口語/, -# (行っ) ちゃ(いけない)-口語/, (言っ) たって (しかたがない)-口語/, (それがなく)ったって (平気)-口語/ -助詞-接続助詞 -# -# particle-dependency: -# e.g. こそ, さえ, しか, すら, は, も, ぞ -助詞-係助詞 -# -# particle-adverbial: -# e.g. がてら, かも, くらい, 位, ぐらい, しも, (学校) じゃ(これが流行っている)-口語/, -# (それ)じゃあ (よくない)-口語/, ずつ, (私) なぞ, など, (私) なり (に), (先生) なんか (大嫌い)-口語/, -# (私) なんぞ, (先生) なんて (大嫌い)-口語/, のみ, だけ, (私) だって-口語/, だに, -# (彼)ったら-口語/, (お茶) でも (いかが), 等 (とう), (今後) とも, ばかり, ばっか-口語/, ばっかり-口語/, -# ほど, 程, まで, 迄, (誰) も (が)([助詞-格助詞] および [助詞-係助詞] の前に位置する「も」) -助詞-副助詞 -# -# particle-interjective: particles with interjective grammatical roles. -# e.g. (松島) や -助詞-間投助詞 -# -# particle-coordinate: -# e.g. と, たり, だの, だり, とか, なり, や, やら -助詞-並立助詞 -# -# particle-final: -# e.g. かい, かしら, さ, ぜ, (だ)っけ-口語/, (とまってる) で-方言/, な, ナ, なあ-口語/, ぞ, ね, ネ, -# ねぇ-口語/, ねえ-口語/, ねん-方言/, の, のう-口語/, や, よ, ヨ, よぉ-口語/, わ, わい-口語/ -助詞-終助詞 -# -# particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is -# adverbial, conjunctive, or sentence final. For example: -# (a) 「A か B か」. Ex:「(国内で運用する) か,(海外で運用する) か (.)」 -# (b) Inside an adverb phrase. Ex:「(幸いという) か (, 死者はいなかった.)」 -# 「(祈りが届いたせい) か (, 試験に合格した.)」 -# (c) 「かのように」. Ex:「(何もなかった) か (のように振る舞った.)」 -# e.g. か -助詞-副助詞/並立助詞/終助詞 -# -# particle-adnominalizer: The "no" that attaches to nouns and modifies -# non-inflectional words. -助詞-連体化 -# -# particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs -# that are giongo, giseigo, or gitaigo. -# e.g. に, と -助詞-副詞化 -# -# particle-special: A particle that does not fit into one of the above classifications. -# This includes particles that are used in Tanka, Haiku, and other poetry. -# e.g. かな, けむ, ( しただろう) に, (あんた) にゃ(わからん), (俺) ん (家) -助詞-特殊 -# -##### -# auxiliary-verb: -助動詞 -# -##### -# interjection: Greetings and other exclamations. -# e.g. おはよう, おはようございます, こんにちは, こんばんは, ありがとう, どうもありがとう, ありがとうございます, -# いただきます, ごちそうさま, さよなら, さようなら, はい, いいえ, ごめん, ごめんなさい -#感動詞 -# -##### -# symbol: unclassified Symbols. -記号 -# -# symbol-misc: A general symbol not in one of the categories below. -# e.g. [○◎@$〒→+] -記号-一般 -# -# symbol-comma: Commas -# e.g. [,、] -記号-読点 -# -# symbol-period: Periods and full stops. -# e.g. [..。] -記号-句点 -# -# symbol-space: Full-width whitespace. -記号-空白 -# -# symbol-open_bracket: -# e.g. [({‘“『【] -記号-括弧開 -# -# symbol-close_bracket: -# e.g. [)}’”』」】] -記号-括弧閉 -# -# symbol-alphabetic: -#記号-アルファベット -# -##### -# other: unclassified other -#その他 -# -# other-interjection: Words that are hard to classify as noun-suffixes or -# sentence-final particles. -# e.g. (だ)ァ -その他-間投 -# -##### -# filler: Aizuchi that occurs during a conversation or sounds inserted as filler. -# e.g. あの, うんと, えと -フィラー -# -##### -# non-verbal: non-verbal sound. -非言語音 -# -##### -# fragment: -#語断片 -# -##### -# unknown: unknown part of speech. -#未知語 -# -##### End of file diff --git a/KeywordSearch/solr/solr/configsets/AutopsyConfig/conf/lang/stopwords_ja.txt b/KeywordSearch/solr/solr/configsets/AutopsyConfig/conf/lang/stopwords_ja.txt deleted file mode 100644 index d4321be6b1..0000000000 --- a/KeywordSearch/solr/solr/configsets/AutopsyConfig/conf/lang/stopwords_ja.txt +++ /dev/null @@ -1,127 +0,0 @@ -# -# This file defines a stopword set for Japanese. -# -# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia. -# Punctuation characters and frequent kanji have mostly been left out. See LUCENE-3745 -# for frequency lists, etc. that can be useful for making your own set (if desired) -# -# Note that there is an overlap between these stopwords and the terms stopped when used -# in combination with the JapanesePartOfSpeechStopFilter. When editing this file, note -# that comments are not allowed on the same line as stopwords. -# -# Also note that stopping is done in a case-insensitive manner. Change your StopFilter -# configuration if you need case-sensitive stopping. Lastly, note that stopping is done -# using the same character width as the entries in this file. Since this StopFilter is -# normally done after a CJKWidthFilter in your chain, you would usually want your romaji -# entries to be in half-width and your kana entries to be in full-width. -# -の -に -は -を -た -が -で -て -と -し -れ -さ -ある -いる -も -する -から -な -こと -として -い -や -れる -など -なっ -ない -この -ため -その -あっ -よう -また -もの -という -あり -まで -られ -なる -へ -か -だ -これ -によって -により -おり -より -による -ず -なり -られる -において -ば -なかっ -なく -しかし -について -せ -だっ -その後 -できる -それ -う -ので -なお -のみ -でき -き -つ -における -および -いう -さらに -でも -ら -たり -その他 -に関する -たち -ます -ん -なら -に対して -特に -せる -及び -これら -とき -では -にて -ほか -ながら -うち -そして -とともに -ただし -かつて -それぞれ -または -お -ほど -ものの -に対する -ほとんど -と共に -といった -です -とも -ところ -ここ -##### End of file diff --git a/KeywordSearch/solr/solr/configsets/AutopsyConfig/conf/schema.xml b/KeywordSearch/solr/solr/configsets/AutopsyConfig/conf/schema.xml index bbc68fea00..05ea8891a5 100644 --- a/KeywordSearch/solr/solr/configsets/AutopsyConfig/conf/schema.xml +++ b/KeywordSearch/solr/solr/configsets/AutopsyConfig/conf/schema.xml @@ -45,7 +45,7 @@ that avoids logging every request --> - + @@ -244,18 +243,6 @@ - - - - - - - - - - - - - - - - - diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/HighlightedText.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/HighlightedText.java index 99d4f40820..240c10e431 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/HighlightedText.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/HighlightedText.java @@ -38,7 +38,6 @@ import org.apache.commons.lang3.math.NumberUtils; import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrRequest.METHOD; import org.apache.solr.client.solrj.response.QueryResponse; -import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocumentList; import org.openide.util.NbBundle; import org.sleuthkit.autopsy.coreutils.Logger; @@ -347,8 +346,6 @@ class HighlightedText implements IndexedText { String chunkID = ""; String highlightField = ""; try { - double indexSchemaVersion = NumberUtils.toDouble(solrServer.getIndexInfo().getSchemaVersion()); - loadPageInfo(); //inits once SolrQuery q = new SolrQuery(); q.setShowDebugInfo(DEBUG); //debug @@ -362,46 +359,22 @@ class HighlightedText implements IndexedText { highlightField = LuceneQuery.HIGHLIGHT_FIELD; if (isLiteral) { - if (2.2 <= indexSchemaVersion) { - //if the query is literal try to get solr to do the highlighting - final String highlightQuery = keywords.stream().map(s -> - LanguageSpecificContentQueryHelper.expandQueryString(KeywordSearchUtil.quoteQuery(KeywordSearchUtil.escapeLuceneQuery(s)))) - .collect(Collectors.joining(" OR ")); - q.setQuery(highlightQuery); - for (Server.Schema field : LanguageSpecificContentQueryHelper.getQueryFields()) { - q.addField(field.toString()); - q.addHighlightField(field.toString()); - } - q.addField(Server.Schema.LANGUAGE.toString()); - // in case of single term literal query there is only 1 term - LanguageSpecificContentQueryHelper.configureTermfreqQuery(q, keywords.iterator().next()); - q.addFilterQuery(filterQuery); - q.setHighlightFragsize(0); // don't fragment the highlight, works with original highlighter, or needs "single" list builder with FVH - } else { - //if the query is literal try to get solr to do the highlighting - final String highlightQuery = keywords.stream() - .map(HighlightedText::constructEscapedSolrQuery) - .collect(Collectors.joining(" ")); + //if the query is literal try to get solr to do the highlighting + final String highlightQuery = keywords.stream() + .map(HighlightedText::constructEscapedSolrQuery) + .collect(Collectors.joining(" ")); - q.setQuery(highlightQuery); - q.addField(highlightField); - q.addFilterQuery(filterQuery); - q.addHighlightField(highlightField); - q.setHighlightFragsize(0); // don't fragment the highlight, works with original highlighter, or needs "single" list builder with FVH - } + q.setQuery(highlightQuery); + q.addField(highlightField); + q.addFilterQuery(filterQuery); + q.addHighlightField(highlightField); + q.setHighlightFragsize(0); // don't fragment the highlight, works with original highlighter, or needs "single" list builder with FVH //tune the highlighter - if (shouldUseOriginalHighlighter(contentIdStr)) { - // use original highlighter - q.setParam("hl.useFastVectorHighlighter", "off"); - q.setParam("hl.simple.pre", HIGHLIGHT_PRE); - q.setParam("hl.simple.post", HIGHLIGHT_POST); - } else { - q.setParam("hl.useFastVectorHighlighter", "on"); //fast highlighter scales better than standard one NON-NLS - q.setParam("hl.tag.pre", HIGHLIGHT_PRE); //makes sense for FastVectorHighlighter only NON-NLS - q.setParam("hl.tag.post", HIGHLIGHT_POST); //makes sense for FastVectorHighlighter only NON-NLS - q.setParam("hl.fragListBuilder", "single"); //makes sense for FastVectorHighlighter only NON-NLS - } + q.setParam("hl.useFastVectorHighlighter", "on"); //fast highlighter scales better than standard one NON-NLS + q.setParam("hl.tag.pre", HIGHLIGHT_PRE); //makes sense for FastVectorHighlighter only NON-NLS + q.setParam("hl.tag.post", HIGHLIGHT_POST); //makes sense for FastVectorHighlighter only NON-NLS + q.setParam("hl.fragListBuilder", "single"); //makes sense for FastVectorHighlighter only NON-NLS //docs says makes sense for the original Highlighter only, but not really q.setParam("hl.maxAnalyzedChars", Server.HL_ANALYZE_CHARS_UNLIMITED); //NON-NLS @@ -433,40 +406,12 @@ class HighlightedText implements IndexedText { if (responseHighlightID == null) { highlightedContent = attemptManualHighlighting(response.getResults(), highlightField, keywords); } else { - SolrDocument document = response.getResults().get(0); - Object language = document.getFieldValue(Server.Schema.LANGUAGE.toString()); - if (2.2 <= indexSchemaVersion && language != null) { - List contentHighlights = LanguageSpecificContentQueryHelper.getHighlights(responseHighlightID).orElse(null); - if (contentHighlights == null) { - highlightedContent = ""; - } else { - int hitCountInMiniChunk = LanguageSpecificContentQueryHelper.queryChunkTermfreq(keywords, MiniChunkHelper.getChunkIdString(contentIdStr)); - String s = contentHighlights.get(0).trim(); - // If there is a mini-chunk, trim the content not to show highlighted text in it. - if (0 < hitCountInMiniChunk) { - int hitCountInChunk = ((Float) document.getFieldValue(Server.Schema.TERMFREQ.toString())).intValue(); - int idx = LanguageSpecificContentQueryHelper.findNthIndexOf( - s, - HIGHLIGHT_PRE, - // trim after the last hit in chunk - hitCountInChunk - hitCountInMiniChunk); - if (idx != -1) { - highlightedContent = s.substring(0, idx); - } else { - highlightedContent = s; - } - } else { - highlightedContent = s; - } - } + List contentHighlights = responseHighlightID.get(LuceneQuery.HIGHLIGHT_FIELD); + if (contentHighlights == null) { + highlightedContent = attemptManualHighlighting(response.getResults(), highlightField, keywords); } else { - List contentHighlights = responseHighlightID.get(LuceneQuery.HIGHLIGHT_FIELD); - if (contentHighlights == null) { - highlightedContent = attemptManualHighlighting(response.getResults(), highlightField, keywords); - } else { - // extracted content (minus highlight tags) is HTML-escaped - highlightedContent = contentHighlights.get(0).trim(); - } + // extracted content (minus highlight tags) is HTML-escaped + highlightedContent = contentHighlights.get(0).trim(); } } } @@ -606,37 +551,4 @@ class HighlightedText implements IndexedText { return buf.toString(); } - /** - * Return true if we should use original highlighter instead of FastVectorHighlighter. - * - * In the case Japanese text and phrase query, FastVectorHighlighter does not work well. - * - * Note about highlighters: - * If the query is "雨が降る" (phrase query), Solr divides it into 雨 and 降る. が is a stop word here. - * It seems that FastVector highlighter does not produce any snippet when there is a stop word between terms. - * On the other hand, original highlighter produces multiple matches, for example: - * > 降っています - * Unified highlighter (from Solr 6.4) handles the case as expected: - * > 雨が降っています。 - */ - private boolean shouldUseOriginalHighlighter(String contentID) throws NoOpenCoreException, KeywordSearchModuleException { - final SolrQuery q = new SolrQuery(); - q.setQuery("*:*"); - q.addFilterQuery(Server.Schema.ID.toString() + ":" + contentID); - q.setFields(Server.Schema.LANGUAGE.toString()); - - QueryResponse response = solrServer.query(q, METHOD.POST); - SolrDocumentList solrDocuments = response.getResults(); - - if (!solrDocuments.isEmpty()) { - SolrDocument solrDocument = solrDocuments.get(0); - if (solrDocument != null) { - Object languageField = solrDocument.getFieldValue(Server.Schema.LANGUAGE.toString()); - if (languageField != null) { - return languageField.equals("ja"); - } - } - } - return false; - } } diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/IndexFinder.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/IndexFinder.java index e2abde6eb0..e46791d270 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/IndexFinder.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/IndexFinder.java @@ -39,7 +39,7 @@ class IndexFinder { private static final String KWS_DATA_FOLDER_NAME = "data"; private static final String INDEX_FOLDER_NAME = "index"; private static final String CURRENT_SOLR_VERSION = "4"; - private static final String CURRENT_SOLR_SCHEMA_VERSION = "2.2"; + private static final String CURRENT_SOLR_SCHEMA_VERSION = "2.1"; static String getCurrentSolrVersion() { return CURRENT_SOLR_VERSION; diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java index be0b93088d..bcdf143697 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java @@ -20,10 +20,8 @@ package org.sleuthkit.autopsy.keywordsearch; import java.io.BufferedReader; import java.io.Reader; -import java.util.Collections; import java.util.HashMap; import java.util.Map; -import java.util.Optional; import java.util.logging.Level; import org.apache.commons.lang3.math.NumberUtils; import org.apache.solr.client.solrj.SolrServerException; @@ -61,8 +59,6 @@ class Ingester { private final Server solrServer = KeywordSearch.getServer(); private static final SolrFieldsVisitor SOLR_FIELDS_VISITOR = new SolrFieldsVisitor(); private static Ingester instance; - private final LanguageSpecificContentIndexingHelper languageSpecificContentIndexingHelper - = new LanguageSpecificContentIndexingHelper(); private Ingester() { } @@ -97,7 +93,7 @@ class Ingester { * file, but the Solr server is probably fine. */ void indexMetaDataOnly(AbstractFile file) throws IngesterException { - indexChunk("", file.getName().toLowerCase(), new HashMap<>(getContentFields(file))); + indexChunk("", file.getName().toLowerCase(), getContentFields(file)); } /** @@ -111,7 +107,7 @@ class Ingester { * artifact, but the Solr server is probably fine. */ void indexMetaDataOnly(BlackboardArtifact artifact, String sourceName) throws IngesterException { - indexChunk("", sourceName, new HashMap<>(getContentFields(artifact))); + indexChunk("", sourceName, getContentFields(artifact)); } /** @@ -147,30 +143,21 @@ class Ingester { < T extends SleuthkitVisitableItem> boolean indexText(Reader sourceReader, long sourceID, String sourceName, T source, IngestJobContext context) throws Ingester.IngesterException { int numChunks = 0; //unknown until chunking is done - Map contentFields = Collections.unmodifiableMap(getContentFields(source)); + Map fields = getContentFields(source); //Get a reader for the content of the given source try (BufferedReader reader = new BufferedReader(sourceReader)) { Chunker chunker = new Chunker(reader); - while (chunker.hasNext()) { + for (Chunk chunk : chunker) { if (context != null && context.fileIngestIsCancelled()) { logger.log(Level.INFO, "File ingest cancelled. Cancelling keyword search indexing of {0}", sourceName); return false; } - - Chunk chunk = chunker.next(); - Map fields = new HashMap<>(contentFields); String chunkId = Server.getChunkIdString(sourceID, numChunks + 1); fields.put(Server.Schema.ID.toString(), chunkId); fields.put(Server.Schema.CHUNK_SIZE.toString(), String.valueOf(chunk.getBaseChunkLength())); - Optional language = languageSpecificContentIndexingHelper.detectLanguageIfNeeded(chunk); - language.ifPresent(lang -> languageSpecificContentIndexingHelper.updateLanguageSpecificFields(fields, chunk, lang)); try { //add the chunk text to Solr index indexChunk(chunk.toString(), sourceName, fields); - // add mini chunk when there's a language specific field - if (chunker.hasNext() && language.isPresent()) { - languageSpecificContentIndexingHelper.indexMiniChunk(chunk, sourceName, new HashMap<>(contentFields), chunkId, language.get()); - } numChunks++; } catch (Ingester.IngesterException ingEx) { logger.log(Level.WARNING, "Ingester had a problem with extracted string from file '" //NON-NLS @@ -190,7 +177,6 @@ class Ingester { if (context != null && context.fileIngestIsCancelled()) { return false; } else { - Map fields = new HashMap<>(contentFields); //after all chunks, index just the meta data, including the numChunks, of the parent file fields.put(Server.Schema.NUM_CHUNKS.toString(), Integer.toString(numChunks)); //reset id field to base document id @@ -216,7 +202,7 @@ class Ingester { * * @throws org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException */ - private void indexChunk(String chunk, String sourceName, Map fields) throws IngesterException { + private void indexChunk(String chunk, String sourceName, Map fields) throws IngesterException { if (fields.get(Server.Schema.IMAGE_ID.toString()) == null) { //JMTODO: actually if the we couldn't get the image id it is set to -1, // but does this really mean we don't want to index it? diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Language.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Language.java deleted file mode 100644 index 5fb1f859d3..0000000000 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Language.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Autopsy Forensic Browser - * - * Copyright 2011-2019 Basis Technology Corp. - * Contact: carrier sleuthkit org - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.sleuthkit.autopsy.keywordsearch; - -import java.util.Arrays; -import java.util.Optional; - -/** - * Language. - * - * Contents which are detected to have these languages should be indexed to a corresponding language-specific field - * such as content_ja. - */ -public enum Language { - JAPANESE("ja"); - - private String value; - - String getValue() { - return value; - } - - static Optional fromValue(String value) { - return Arrays.stream(Language.values()).filter(x -> x.value.equals(value)).findFirst(); - } - - Language(String value) { - this.value = value; - } -} diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/LanguageDetector.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/LanguageDetector.java deleted file mode 100644 index f527a2fc0e..0000000000 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/LanguageDetector.java +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Autopsy Forensic Browser - * - * Copyright 2011-2019 Basis Technology Corp. - * Contact: carrier sleuthkit org - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.sleuthkit.autopsy.keywordsearch; - -import com.optimaize.langdetect.LanguageDetectorBuilder; -import com.optimaize.langdetect.i18n.LdLocale; -import com.optimaize.langdetect.ngram.NgramExtractors; -import com.optimaize.langdetect.profiles.LanguageProfileReader; -import com.optimaize.langdetect.text.CommonTextObjectFactories; -import com.optimaize.langdetect.text.TextObject; -import com.optimaize.langdetect.text.TextObjectFactory; - -import java.io.IOException; -import java.io.UncheckedIOException; -import java.util.Optional; - -/** - * Detects the language of the given contents. Only languages which should be indexed to a corresponding - * language-specific field are detected. - */ -class LanguageDetector { - - private com.optimaize.langdetect.LanguageDetector impl; - private TextObjectFactory textObjectFactory; - - LanguageDetector() { - try { - impl = LanguageDetectorBuilder.create(NgramExtractors.standard()) - .withProfiles(new LanguageProfileReader().readAllBuiltIn()) - .build(); - textObjectFactory = CommonTextObjectFactories.forDetectingOnLargeText(); - } catch (IOException e) { - // The IOException here could occur when failing to read the language profiles from the classpath. - // That can be considered to be a severe IO problem. Nothing can be done here. - throw new UncheckedIOException(e); - } - } - - Optional detect(String text) { - TextObject textObject = textObjectFactory.forText(text); - Optional localeOpt = impl.detect(textObject).transform(Optional::of).or(Optional.empty()); - return localeOpt.map(LdLocale::getLanguage).flatMap(Language::fromValue); - } -} diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/LanguageSpecificContentIndexingHelper.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/LanguageSpecificContentIndexingHelper.java deleted file mode 100644 index d0988c83f3..0000000000 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/LanguageSpecificContentIndexingHelper.java +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Autopsy Forensic Browser - * - * Copyright 2011-2019 Basis Technology Corp. - * Contact: carrier sleuthkit org - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.sleuthkit.autopsy.keywordsearch; - -import org.apache.commons.lang3.math.NumberUtils; -import org.apache.solr.common.SolrInputDocument; -import org.openide.util.NbBundle; -import org.sleuthkit.autopsy.healthmonitor.HealthMonitor; -import org.sleuthkit.autopsy.healthmonitor.TimingMetric; - -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.Optional; - -/** - * A helper class to support indexing language-specific fields. - */ -class LanguageSpecificContentIndexingHelper { - - private final LanguageDetector languageDetector = new LanguageDetector(); - - Optional detectLanguageIfNeeded(Chunker.Chunk chunk) throws NoOpenCoreException { - double indexSchemaVersion = NumberUtils.toDouble(KeywordSearch.getServer().getIndexInfo().getSchemaVersion()); - if (2.2 <= indexSchemaVersion) { - return languageDetector.detect(chunk.toString()); - } else { - return Optional.empty(); - } - } - - void updateLanguageSpecificFields(Map fields, Chunker.Chunk chunk, Language language) { - List values = new ArrayList<>(); - values.add(chunk.toString()); - if (fields.containsKey(Server.Schema.FILE_NAME.toString())) { - values.add(fields.get(Server.Schema.FILE_NAME.toString()).toString()); - } - - // index the chunk to a language specific field - fields.put(Server.Schema.CONTENT_JA.toString(), values); - fields.put(Server.Schema.LANGUAGE.toString(), language.getValue()); - } - - void indexMiniChunk(Chunker.Chunk chunk, String sourceName, Map fields, String baseChunkID, Language language) - throws Ingester.IngesterException { - //Make a SolrInputDocument out of the field map - SolrInputDocument updateDoc = new SolrInputDocument(); - for (String key : fields.keySet()) { - updateDoc.addField(key, fields.get(key)); - } - - try { - updateDoc.setField(Server.Schema.ID.toString(), MiniChunkHelper.getChunkIdString(baseChunkID)); - - // index the chunk to a language specific field - updateDoc.addField(Server.Schema.CONTENT_JA.toString(), chunk.toString().substring(chunk.getBaseChunkLength())); - updateDoc.addField(Server.Schema.LANGUAGE.toString(), language.getValue()); - - TimingMetric metric = HealthMonitor.getTimingMetric("Solr: Index chunk"); - - KeywordSearch.getServer().addDocument(updateDoc); - HealthMonitor.submitTimingMetric(metric); - - } catch (KeywordSearchModuleException | NoOpenCoreException ex) { - throw new Ingester.IngesterException( - NbBundle.getMessage(Ingester.class, "Ingester.ingest.exception.err.msg", sourceName), ex); - } - } -} diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/LanguageSpecificContentQueryHelper.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/LanguageSpecificContentQueryHelper.java deleted file mode 100644 index a3ed8a7876..0000000000 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/LanguageSpecificContentQueryHelper.java +++ /dev/null @@ -1,248 +0,0 @@ -/* - * Autopsy Forensic Browser - * - * Copyright 2011-2019 Basis Technology Corp. - * Contact: carrier sleuthkit org - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.sleuthkit.autopsy.keywordsearch; - -import org.apache.solr.client.solrj.SolrQuery; -import org.apache.solr.client.solrj.SolrRequest; -import org.apache.solr.client.solrj.response.QueryResponse; -import org.apache.solr.common.SolrDocument; -import org.apache.solr.common.SolrDocumentList; -import org.sleuthkit.autopsy.coreutils.EscapeUtil; -import org.sleuthkit.autopsy.coreutils.Version; -import org.sleuthkit.datamodel.TskException; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.Set; -import java.util.stream.Collectors; - -/** - * A helper class to support querying documents which have language-specific fields. - */ -final class LanguageSpecificContentQueryHelper { - - private LanguageSpecificContentQueryHelper() {} - - private static final List QUERY_FIELDS = new ArrayList<>(); - private static final List LANGUAGE_SPECIFIC_CONTENT_FIELDS - = Collections.singletonList(Server.Schema.CONTENT_JA); - private static final boolean DEBUG = (Version.getBuildType() == Version.Type.DEVELOPMENT); - - static { - QUERY_FIELDS.add(Server.Schema.TEXT); - QUERY_FIELDS.addAll(LANGUAGE_SPECIFIC_CONTENT_FIELDS); - } - - /** - * Holds query response for later processes related to language-specific fields - */ - static class QueryResults { - List chunks = new ArrayList<>(); - Map miniChunks = new HashMap<>(); - // objectId_chunk -> "text" -> List of previews - Map>> highlighting = new HashMap<>(); - } - - /** - * Make a query string from the given one by applying it to the multiple query fields - * - * @param queryStr escaped query string - * @return query string - */ - static String expandQueryString(final String queryStr) { - List fieldQueries = new ArrayList<>(); - fieldQueries.add(Server.Schema.TEXT.toString() + ":" + queryStr); - fieldQueries.addAll(LANGUAGE_SPECIFIC_CONTENT_FIELDS.stream().map(field -> field.toString() + ":" + queryStr).collect(Collectors.toList())); - return String.join(" OR ", fieldQueries); - } - - static List getQueryFields() { - return QUERY_FIELDS; - } - - static void updateQueryResults(QueryResults results, SolrDocument document) { - String id = (String) document.getFieldValue(Server.Schema.ID.toString()); - if (MiniChunkHelper.isMiniChunkID(id)) { - results.miniChunks.put(MiniChunkHelper.getBaseChunkID(id), document); - } else { - results.chunks.add(document); - } - } - - /** - * Get snippets - * - * @param highlight field ID -> snippets - * @return snippets of appropriate fields. - * Note that this method returns {@code Optional.empty} if the result is empty for convenience to interact with the existing code. - */ - static Optional> getHighlights(Map> highlight) { - for (Server.Schema field : LANGUAGE_SPECIFIC_CONTENT_FIELDS) { - if (highlight.containsKey(field.toString())) { - return Optional.of(highlight.get(field.toString())); - } - } - return Optional.empty(); - } - - /** - * Merge KeywordHits from TEXT field and a language specific field - * - * Replace KeywordHits in the given {@code matches} if its chunk ID is same. - */ - static List mergeKeywordHits(List matches, Keyword originalKeyword, QueryResults queryResults) throws KeywordSearchModuleException { - Map map = findMatches(originalKeyword, queryResults).stream().collect(Collectors.toMap(KeywordHit::getSolrDocumentId, x -> x)); - List merged = new ArrayList<>(); - - // first, replace KeywordHit in matches - for (KeywordHit match : matches) { - String key = match.getSolrDocumentId(); - if (map.containsKey(key)) { - merged.add(map.get(key)); - map.remove(key); - } else { - merged.add(match); - } - } - // second, add rest of KeywordHits from queryResults - merged.addAll(map.values()); - - return merged; - } - - static void configureTermfreqQuery(SolrQuery query, String keyword) throws KeywordSearchModuleException, NoOpenCoreException { - // make a request to Solr to parse query. - QueryTermHelper.Result queryParserResult = QueryTermHelper.parse(keyword, LANGUAGE_SPECIFIC_CONTENT_FIELDS); - query.addField(buildTermfreqQuery(keyword, queryParserResult)); - } - - static String buildTermfreqQuery(String keyword, QueryTermHelper.Result result) { - List termfreqs = new ArrayList<>(); - for (Map.Entry> e : result.fieldTermsMap.entrySet()) { - String field = e.getKey(); - for (String term : e.getValue()) { - termfreqs.add(String.format("termfreq(\"%s\",\"%s\")", field, KeywordSearchUtil.escapeLuceneQuery(term))); - } - } - - // sum of all language specific query fields. - // only one of these fields could be non-zero. - return String.format("termfreq:sum(%s)", String.join(",", termfreqs)); - } - - static int queryChunkTermfreq(Set keywords, String contentID) throws KeywordSearchModuleException, NoOpenCoreException { - SolrQuery q = new SolrQuery(); - q.setShowDebugInfo(DEBUG); - - final String filterQuery = Server.Schema.ID.toString() + ":" + KeywordSearchUtil.escapeLuceneQuery(contentID); - final String highlightQuery = keywords.stream() - .map(s -> LanguageSpecificContentQueryHelper.expandQueryString( - KeywordSearchUtil.quoteQuery(KeywordSearchUtil.escapeLuceneQuery(s)))) - .collect(Collectors.joining(" ")); - - q.addFilterQuery(filterQuery); - q.setQuery(highlightQuery); - LanguageSpecificContentQueryHelper.configureTermfreqQuery(q, keywords.iterator().next()); - - QueryResponse response = KeywordSearch.getServer().query(q, SolrRequest.METHOD.POST); - SolrDocumentList results = response.getResults(); - if (results.isEmpty()) { - return 0; - } - - SolrDocument document = results.get(0); - return ((Float) document.getFieldValue(Server.Schema.TERMFREQ.toString())).intValue(); - } - - static int findNthIndexOf(String s, String pattern, int n) { - int found = 0; - int idx = -1; - int len = s.length(); - while (idx < len && found <= n) { - idx = s.indexOf(pattern, idx + 1); - if (idx == -1) { - break; - } - found++; - } - - return idx; - } - - private static List findMatches(Keyword originalKeyword, QueryResults queryResults) throws KeywordSearchModuleException { - List matches = new ArrayList<>(); - for (SolrDocument document : queryResults.chunks) { - String docId = (String) document.getFieldValue(Server.Schema.ID.toString()); - - try { - int hitCountInChunk = ((Float) document.getFieldValue(Server.Schema.TERMFREQ.toString())).intValue(); - SolrDocument miniChunk = queryResults.miniChunks.get(docId); - if (miniChunk == null) { - // last chunk does not have mini chunk because there's no overlapped region with next one - matches.add(createKeywordHit(originalKeyword, queryResults.highlighting, docId)); - } else { - int hitCountInMiniChunk = ((Float) miniChunk.getFieldValue(Server.Schema.TERMFREQ.toString())).intValue(); - if (hitCountInMiniChunk < hitCountInChunk) { - // there are at least one hit in base chunk - matches.add(createKeywordHit(originalKeyword, queryResults.highlighting, docId)); - } - } - } catch (TskException ex) { - throw new KeywordSearchModuleException(ex); - } - } - return matches; - } - - /** - * copied from LuceneQuery and modified to use getHighlightFieldValue - */ - private static KeywordHit createKeywordHit(Keyword originalKeyword, Map>> highlightResponse, String docId) throws TskException { - /** - * Get the first snippet from the document if keyword search is - * configured to use snippets. - */ - String snippet = ""; - if (KeywordSearchSettings.getShowSnippets()) { - List snippetList = getHighlightFieldValue(highlightResponse.get(docId)).orElse(null); - // list is null if there wasn't a snippet - if (snippetList != null) { - snippet = EscapeUtil.unEscapeHtml(snippetList.get(0)).trim(); - } - } - - return new KeywordHit(docId, snippet, originalKeyword.getSearchTerm()); - } - - /** - * @return Optional.empty if empty - */ - private static Optional> getHighlightFieldValue(Map> highlight) { - for (Server.Schema field : LANGUAGE_SPECIFIC_CONTENT_FIELDS) { - if (highlight.containsKey(field.toString())) { - return Optional.of(highlight.get(field.toString())); - } - } - return Optional.empty(); - } -} diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/LuceneQuery.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/LuceneQuery.java index a324c03324..70c6155d5f 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/LuceneQuery.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/LuceneQuery.java @@ -134,7 +134,6 @@ class LuceneQuery implements KeywordSearchQuery { String cursorMark = CursorMarkParams.CURSOR_MARK_START; boolean allResultsProcessed = false; List matches = new ArrayList<>(); - LanguageSpecificContentQueryHelper.QueryResults languageSpecificQueryResults = new LanguageSpecificContentQueryHelper.QueryResults(); while (!allResultsProcessed) { solrQuery.set(CursorMarkParams.CURSOR_MARK_PARAM, cursorMark); QueryResponse response = solrServer.query(solrQuery, SolrRequest.METHOD.POST); @@ -142,18 +141,7 @@ class LuceneQuery implements KeywordSearchQuery { // objectId_chunk -> "text" -> List of previews Map>> highlightResponse = response.getHighlighting(); - if (2.2 <= indexSchemaVersion) { - languageSpecificQueryResults.highlighting.putAll(response.getHighlighting()); - } - for (SolrDocument resultDoc : resultList) { - if (2.2 <= indexSchemaVersion) { - Object language = resultDoc.getFieldValue(Server.Schema.LANGUAGE.toString()); - if (language != null) { - LanguageSpecificContentQueryHelper.updateQueryResults(languageSpecificQueryResults, resultDoc); - } - } - try { /* * for each result doc, check that the first occurence of @@ -165,11 +153,6 @@ class LuceneQuery implements KeywordSearchQuery { final Integer chunkSize = (Integer) resultDoc.getFieldValue(Server.Schema.CHUNK_SIZE.toString()); final Collection content = resultDoc.getFieldValues(Server.Schema.CONTENT_STR.toString()); - // if the document has language, it should be hit in language specific content fields. So skip here. - if (resultDoc.containsKey(Server.Schema.LANGUAGE.toString())) { - continue; - } - if (indexSchemaVersion < 2.0) { //old schema versions don't support chunk_size or the content_str fields, so just accept hits matches.add(createKeywordtHit(highlightResponse, docId)); @@ -196,16 +179,9 @@ class LuceneQuery implements KeywordSearchQuery { cursorMark = nextCursorMark; } - List mergedMatches; - if (2.2 <= indexSchemaVersion) { - mergedMatches = LanguageSpecificContentQueryHelper.mergeKeywordHits(matches, originalKeyword, languageSpecificQueryResults); - } else { - mergedMatches = matches; - } - QueryResults results = new QueryResults(this); //in case of single term literal query there is only 1 term - results.addResult(new Keyword(originalKeyword.getSearchTerm(), true, true, originalKeyword.getListName(), originalKeyword.getOriginalTerm()), mergedMatches); + results.addResult(new Keyword(originalKeyword.getSearchTerm(), true, true, originalKeyword.getListName(), originalKeyword.getOriginalTerm()), matches); return results; } @@ -286,25 +262,19 @@ class LuceneQuery implements KeywordSearchQuery { * * @return */ - private SolrQuery createAndConfigureSolrQuery(boolean snippets) throws NoOpenCoreException, KeywordSearchModuleException { - double indexSchemaVersion = NumberUtils.toDouble(KeywordSearch.getServer().getIndexInfo().getSchemaVersion()); - + private SolrQuery createAndConfigureSolrQuery(boolean snippets) { SolrQuery q = new SolrQuery(); q.setShowDebugInfo(DEBUG); //debug // Wrap the query string in quotes if this is a literal search term. String queryStr = originalKeyword.searchTermIsLiteral() - ? KeywordSearchUtil.quoteQuery(keywordStringEscaped) : keywordStringEscaped; + ? KeywordSearchUtil.quoteQuery(keywordStringEscaped) : keywordStringEscaped; // Run the query against an optional alternative field. if (field != null) { //use the optional field queryStr = field + ":" + queryStr; - q.setQuery(queryStr); - } else if (2.2 <= indexSchemaVersion && originalKeyword.searchTermIsLiteral()) { - q.setQuery(LanguageSpecificContentQueryHelper.expandQueryString(queryStr)); - } else { - q.setQuery(queryStr); } + q.setQuery(queryStr); q.setRows(MAX_RESULTS_PER_CURSOR_MARK); // Setting the sort order is necessary for cursor based paging to work. q.setSort(SolrQuery.SortClause.asc(Server.Schema.ID.toString())); @@ -313,11 +283,6 @@ class LuceneQuery implements KeywordSearchQuery { Server.Schema.CHUNK_SIZE.toString(), Server.Schema.CONTENT_STR.toString()); - if (2.2 <= indexSchemaVersion && originalKeyword.searchTermIsLiteral()) { - q.addField(Server.Schema.LANGUAGE.toString()); - LanguageSpecificContentQueryHelper.configureTermfreqQuery(q, keywordStringEscaped); - } - for (KeywordQueryFilter filter : filters) { q.addFilterQuery(filter.toString()); } @@ -335,16 +300,8 @@ class LuceneQuery implements KeywordSearchQuery { * * @param q The SolrQuery to configure. */ - private static void configurwQueryForHighlighting(SolrQuery q) throws NoOpenCoreException { - double indexSchemaVersion = NumberUtils.toDouble(KeywordSearch.getServer().getIndexInfo().getSchemaVersion()); - if (2.2 <= indexSchemaVersion) { - for (Server.Schema field : LanguageSpecificContentQueryHelper.getQueryFields()) { - q.addHighlightField(field.toString()); - } - } else { - q.addHighlightField(HIGHLIGHT_FIELD); - } - + private static void configurwQueryForHighlighting(SolrQuery q) { + q.addHighlightField(HIGHLIGHT_FIELD); q.setHighlightSnippets(1); q.setHighlightFragsize(SNIPPET_LENGTH); @@ -447,13 +404,7 @@ class LuceneQuery implements KeywordSearchQuery { if (responseHighlightID == null) { return ""; } - double indexSchemaVersion = NumberUtils.toDouble(solrServer.getIndexInfo().getSchemaVersion()); - List contentHighlights; - if (2.2 <= indexSchemaVersion) { - contentHighlights = LanguageSpecificContentQueryHelper.getHighlights(responseHighlightID).orElse(null); - } else { - contentHighlights = responseHighlightID.get(LuceneQuery.HIGHLIGHT_FIELD); - } + List contentHighlights = responseHighlightID.get(LuceneQuery.HIGHLIGHT_FIELD); if (contentHighlights == null) { return ""; } else { diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/MiniChunkHelper.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/MiniChunkHelper.java deleted file mode 100644 index 9e958587cd..0000000000 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/MiniChunkHelper.java +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Autopsy Forensic Browser - * - * Copyright 2011-2019 Basis Technology Corp. - * Contact: carrier sleuthkit org - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.sleuthkit.autopsy.keywordsearch; - -/** - * Mini-chunk related methods. - */ -final class MiniChunkHelper { - - private MiniChunkHelper() {} - - static String SUFFIX = "_mini"; - - static String getChunkIdString(String baseChunkID) { - return baseChunkID + SUFFIX; - } - - static boolean isMiniChunkID(String chunkID) { - return chunkID.endsWith(SUFFIX); - } - - static String getBaseChunkID(String miniChunkID) { - return miniChunkID.replaceFirst(SUFFIX + "$", ""); - } -} diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/QueryTermHelper.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/QueryTermHelper.java deleted file mode 100644 index 39a050c47f..0000000000 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/QueryTermHelper.java +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Autopsy Forensic Browser - * - * Copyright 2011-2019 Basis Technology Corp. - * Contact: carrier sleuthkit org - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.sleuthkit.autopsy.keywordsearch; - -import org.apache.solr.client.solrj.SolrServerException; -import org.apache.solr.client.solrj.request.FieldAnalysisRequest; -import org.apache.solr.client.solrj.response.AnalysisResponseBase; -import org.apache.solr.client.solrj.response.FieldAnalysisResponse; - -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.stream.Collectors; - -/** - * Get terms from query using Solr. - * - * This class is used to find matched terms from query results. - */ -final class QueryTermHelper { - - private QueryTermHelper() {} - - /** - * Result of {@link #parse} method - */ - static class Result { - /** - * field name -> [term] - */ - final Map> fieldTermsMap = new HashMap<>(); - } - - /** - * Parse the given query string on Solr and return the result - * - * @param query query to parse - * @param fields field names to use for parsing - */ - static Result parse(String query, List fields) throws KeywordSearchModuleException, NoOpenCoreException { - Server server = KeywordSearch.getServer(); - - FieldAnalysisRequest request = new FieldAnalysisRequest(); - for (Server.Schema field : fields) { - request.addFieldName(field.toString()); - } - // FieldAnalysisRequest requires to set its field value property, - // while the corresponding analysis.fieldvalue parameter is not needed in the API. - // Setting an empty value does not effect on the result. - request.setFieldValue(""); - request.setQuery(query); - - FieldAnalysisResponse response = new FieldAnalysisResponse(); - try { - response.setResponse(server.request(request)); - } catch (SolrServerException e) { - throw new KeywordSearchModuleException(e); - } - - Result result = new Result(); - for (Map.Entry entry : response.getAllFieldNameAnalysis()) { - Iterator it = entry.getValue().getQueryPhases().iterator(); - - // The last phase is the one which is used in the search process. - AnalysisResponseBase.AnalysisPhase lastPhase = null; - while (it.hasNext()) { - lastPhase = it.next(); - } - - if (lastPhase != null) { - List tokens = lastPhase.getTokens().stream().map(AnalysisResponseBase.TokenInfo::getText).collect(Collectors.toList()); - result.fieldTermsMap.put(entry.getKey(), tokens); - } - } - - return result; - } -} diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java index 2db4bd6638..0e047456f6 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java @@ -130,18 +130,6 @@ public class Server { return "content_ws"; //NON-NLS } }, - CONTENT_JA { - @Override - public String toString() { - return "content_ja"; //NON-NLS - } - }, - LANGUAGE { - @Override - public String toString() { - return "language"; //NON-NLS - } - }, FILE_NAME { @Override public String toString() { @@ -187,17 +175,6 @@ public class Server { public String toString() { return "chunk_size"; //NON-NLS } - }, - /** - * termfreq is a function which returns the number of times the term appears. - * This is not an actual field defined in schema.xml, but can be gotten from returned documents - * in the same way as fields. - */ - TERMFREQ { - @Override - public String toString() { - return "termfreq"; //NON-NLS - } } }; @@ -1658,8 +1635,7 @@ public class Server { private int queryNumFileChunks(long contentID) throws SolrServerException, IOException { String id = KeywordSearchUtil.escapeLuceneQuery(Long.toString(contentID)); final SolrQuery q - = new SolrQuery(Server.Schema.ID + ":" + id + Server.CHUNK_ID_SEPARATOR + "*" - + " NOT " + Server.Schema.ID + ":*" + MiniChunkHelper.SUFFIX); + = new SolrQuery(Server.Schema.ID + ":" + id + Server.CHUNK_ID_SEPARATOR + "*"); q.setRows(0); return (int) query(q).getResults().getNumFound(); } diff --git a/KeywordSearch/test/unit/src/org/sleuthkit/autopsy/keywordsearch/LanguageSpecificContentQueryHelperTest.java b/KeywordSearch/test/unit/src/org/sleuthkit/autopsy/keywordsearch/LanguageSpecificContentQueryHelperTest.java deleted file mode 100644 index d8c876592e..0000000000 --- a/KeywordSearch/test/unit/src/org/sleuthkit/autopsy/keywordsearch/LanguageSpecificContentQueryHelperTest.java +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Autopsy Forensic Browser - * - * Copyright 2011-2019 Basis Technology Corp. - * Contact: carrier sleuthkit org - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.sleuthkit.autopsy.keywordsearch; - -import org.junit.Test; - -import java.util.Arrays; - -import static org.junit.Assert.assertEquals; - -/** - * tests for LanguageSpecificContentQueryHelper - */ -public class LanguageSpecificContentQueryHelperTest { - - @Test - public void makeQueryString() { - assertEquals("text:query OR content_ja:query", LanguageSpecificContentQueryHelper.expandQueryString("query")); - } - - @Test - public void findNthIndexOf() { - assertEquals(-1, LanguageSpecificContentQueryHelper.findNthIndexOf("A1AA45", "_", 0)); - assertEquals(0, LanguageSpecificContentQueryHelper.findNthIndexOf("A1AA45", "A", 0)); - assertEquals(2, LanguageSpecificContentQueryHelper.findNthIndexOf("A1AA45", "A", 1)); - assertEquals(3, LanguageSpecificContentQueryHelper.findNthIndexOf("A1AA45", "A", 2)); - assertEquals(-1, LanguageSpecificContentQueryHelper.findNthIndexOf("A1AA45", "A", 3)); - assertEquals(0, LanguageSpecificContentQueryHelper.findNthIndexOf("A1AA45", "", 0)); - assertEquals(-1, LanguageSpecificContentQueryHelper.findNthIndexOf("", "A", 0)); - assertEquals(-1, LanguageSpecificContentQueryHelper.findNthIndexOf("A1AA45", "A", -1)); - assertEquals(-1, LanguageSpecificContentQueryHelper.findNthIndexOf("A1AA45", "A", 999)); - } - - @Test - public void buildTermfreqQuery() { - QueryTermHelper.Result result = new QueryTermHelper.Result(); - result.fieldTermsMap.put("field1", Arrays.asList("term1")); - result.fieldTermsMap.put("field2", Arrays.asList("term1", "term2")); - assertEquals( - "termfreq:sum(termfreq(\"field1\",\"term1\"),termfreq(\"field2\",\"term1\"),termfreq(\"field2\",\"term2\"))", - LanguageSpecificContentQueryHelper.buildTermfreqQuery("query", result)); - } -} diff --git a/KeywordSearch/test/unit/src/org/sleuthkit/autopsy/keywordsearch/MiniChunkHelperTest.java b/KeywordSearch/test/unit/src/org/sleuthkit/autopsy/keywordsearch/MiniChunkHelperTest.java deleted file mode 100644 index 27336b8297..0000000000 --- a/KeywordSearch/test/unit/src/org/sleuthkit/autopsy/keywordsearch/MiniChunkHelperTest.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Autopsy Forensic Browser - * - * Copyright 2011-2019 Basis Technology Corp. - * Contact: carrier sleuthkit org - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.sleuthkit.autopsy.keywordsearch; - -import org.junit.Assert; -import org.junit.Test; - -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; - -/** - * tests for MiniChunkHelper - */ -public class MiniChunkHelperTest { - - @Test - public void isMiniChunkID() { - assertTrue(MiniChunkHelper.isMiniChunkID("1_1_mini")); - assertFalse(MiniChunkHelper.isMiniChunkID("1_1")); - assertFalse(MiniChunkHelper.isMiniChunkID("1")); - } - - @Test - public void getBaseChunkID() { - Assert.assertEquals("1_1", MiniChunkHelper.getBaseChunkID("1_1_mini")); - Assert.assertEquals("1_1", MiniChunkHelper.getBaseChunkID("1_1")); - Assert.assertEquals("1", MiniChunkHelper.getBaseChunkID("1")); - } - -} \ No newline at end of file