Revert "5537-file-encryption-handle-asec-files-better"

This commit is contained in:
Richard Cordovano 2019-09-25 11:53:55 -04:00 committed by GitHub
parent 25aedbbb02
commit ee03b898cb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
21 changed files with 42 additions and 1512 deletions

View File

@ -1,7 +1,6 @@
EncryptionDetectionDataSourceIngestModule.artifactComment.bitlocker=Bitlocker encryption detected.
EncryptionDetectionDataSourceIngestModule.artifactComment.suspected=Suspected encryption due to high entropy (%f).
EncryptionDetectionDataSourceIngestModule.processing.message=Checking image for encryption.
EncryptionDetectionFileIngestModule.artifactComment.location=High entropy and known location/extension.
EncryptionDetectionFileIngestModule.artifactComment.password=Password protection detected.
EncryptionDetectionFileIngestModule.artifactComment.suspected=Suspected encryption due to high entropy (%f).
EncryptionDetectionFileIngestModule.getDesc.text=Looks for files with the specified minimum entropy.

View File

@ -29,8 +29,6 @@ import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.BufferUnderflowException;
import java.util.HashMap;
import java.util.Map;
import java.util.logging.Level;
import org.apache.tika.exception.EncryptedDocumentException;
import org.apache.tika.exception.TikaException;
@ -77,11 +75,6 @@ final class EncryptionDetectionFileIngestModule extends FileIngestModuleAdapter
private static final String MIME_TYPE_PDF = "application/pdf";
private static final String[] FILE_IGNORE_LIST = {"hiberfile.sys", "pagefile.sys"};
/**
* This maps file locations to file extensions that are known to be encrypted
*/
private static final Map<String, String> knownEncryptedLocationExtensions = createLocationExtensionMap();
private final IngestServices services = IngestServices.getInstance();
private final Logger logger = services.getLogger(EncryptionDetectionModuleFactory.getModuleName());
@ -113,7 +106,7 @@ final class EncryptionDetectionFileIngestModule extends FileIngestModuleAdapter
public void startUp(IngestJobContext context) throws IngestModule.IngestModuleException {
try {
validateSettings();
this.context = context;
this.context = context;
blackboard = Case.getCurrentCaseThrows().getSleuthkitCase().getBlackboard();
fileTypeDetector = new FileTypeDetector();
@ -126,7 +119,6 @@ final class EncryptionDetectionFileIngestModule extends FileIngestModuleAdapter
@Messages({
"EncryptionDetectionFileIngestModule.artifactComment.password=Password protection detected.",
"EncryptionDetectionFileIngestModule.artifactComment.location=High entropy and known location/extension.",
"EncryptionDetectionFileIngestModule.artifactComment.suspected=Suspected encryption due to high entropy (%f)."
})
@Override
@ -138,12 +130,12 @@ final class EncryptionDetectionFileIngestModule extends FileIngestModuleAdapter
* verify the file hasn't been deleted.
*/
if (!file.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS)
&& !file.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.UNUSED_BLOCKS)
&& !file.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.VIRTUAL_DIR)
&& !file.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.LOCAL_DIR)
&& (!file.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.SLACK) || slackFilesAllowed)
&& !file.getKnown().equals(TskData.FileKnown.KNOWN)
&& !file.isMetaFlagSet(TskData.TSK_FS_META_FLAG_ENUM.UNALLOC)) {
&& !file.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.UNUSED_BLOCKS)
&& !file.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.VIRTUAL_DIR)
&& !file.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.LOCAL_DIR)
&& (!file.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.SLACK) || slackFilesAllowed)
&& !file.getKnown().equals(TskData.FileKnown.KNOWN)
&& !file.isMetaFlagSet(TskData.TSK_FS_META_FLAG_ENUM.UNALLOC)) {
/*
* Is the file in FILE_IGNORE_LIST?
*/
@ -163,9 +155,6 @@ final class EncryptionDetectionFileIngestModule extends FileIngestModuleAdapter
*/
String mimeType = fileTypeDetector.getMIMEType(file);
if (mimeType.equals("application/octet-stream") && isFileEncryptionSuspected(file)) {
if (checkFileLocationExtension(file)) {
return flagFile(file, BlackboardArtifact.ARTIFACT_TYPE.TSK_ENCRYPTION_DETECTED, Bundle.EncryptionDetectionFileIngestModule_artifactComment_location());
}
return flagFile(file, BlackboardArtifact.ARTIFACT_TYPE.TSK_ENCRYPTION_SUSPECTED,
String.format(Bundle.EncryptionDetectionFileIngestModule_artifactComment_suspected(), calculatedEntropy));
} else if (isFilePasswordProtected(file)) {
@ -209,7 +198,7 @@ final class EncryptionDetectionFileIngestModule extends FileIngestModuleAdapter
if (context.fileIngestIsCancelled()) {
return IngestModule.ProcessResult.OK;
}
BlackboardArtifact artifact = file.newArtifact(artifactType);
artifact.addAttribute(new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_COMMENT,
EncryptionDetectionModuleFactory.getModuleName(), comment));
@ -336,7 +325,7 @@ final class EncryptionDetectionFileIngestModule extends FileIngestModuleAdapter
try {
accessDatabase = databaseBuilder.open();
} catch (IOException | BufferUnderflowException | IndexOutOfBoundsException ignored) {
return passwordProtected;
return passwordProtected;
}
/*
* No exception has been thrown at this point, so the file
@ -417,36 +406,4 @@ final class EncryptionDetectionFileIngestModule extends FileIngestModuleAdapter
return possiblyEncrypted;
}
/**
* This method checks if the AbstractFile input is in a location that is
* known to hold encrypted files. It must meet the requirements and location
* of known encrypted file(s)
*
* @param file AbstractFile to be checked.
*
* @return True if file extension and location match known values.
*
*/
private boolean checkFileLocationExtension(AbstractFile file) {
String filePath = file.getParentPath().replace("/", "");
if ((knownEncryptedLocationExtensions.containsKey(filePath))
&& (knownEncryptedLocationExtensions.get(filePath).equals(file.getNameExtension())))
{
return true;
}
return false;
}
/*
* This method creates the map of paths and extensions that are known to
* have encrypted files
*
* @return Map of path and extension of files
*/
private static Map<String, String> createLocationExtensionMap() {
Map<String, String> locationExtensionMap = new HashMap<String, String>();
locationExtensionMap.put(".android_secure", "asec");
return locationExtensionMap;
}
}

View File

@ -21,7 +21,6 @@
<dependency conf="autopsy->*" org="org.apache.solr" name="solr-solrj" rev="4.9.1"/>
<dependency conf="autopsy->*" org="commons-lang" name="commons-lang" rev="2.4"/>
<dependency conf="autopsy->*" org="commons-validator" name="commons-validator" rev="1.5.1"/>
<dependency conf="autopsy->*" org="com.optimaize.languagedetector" name="language-detector" rev="0.6"/>
<!-- Exclude the version of cxf-rt-rs-client from Tika 1.20, one of its depedencies breaks Ivy -->
<dependency conf="autopsy->*" org="org.apache.tika" name="tika-parsers" rev="1.20">
<exclude module="cxf-rt-rs-client"/>

View File

@ -29,7 +29,6 @@ file.reference.jericho-html-3.3.jar=release/modules/ext/jericho-html-3.3.jar
file.reference.joda-time-2.2.jar=release/modules/ext/joda-time-2.2.jar
file.reference.json-simple-1.1.1.jar=release/modules/ext/json-simple-1.1.1.jar
file.reference.juniversalchardet-1.0.3.jar=release/modules/ext/juniversalchardet-1.0.3.jar
file.reference.language-detector-0.6.jar=release/modules/ext/language-detector-0.6.jar
file.reference.libsvm-3.1.jar=release/modules/ext/libsvm-3.1.jar
file.reference.log4j-1.2.17.jar=release/modules/ext/log4j-1.2.17.jar
file.reference.lucene-core-4.0.0.jar=release/modules/ext/lucene-core-4.0.0.jar

View File

@ -467,10 +467,6 @@
<runtime-relative-path>ext/vorbis-java-tika-0.8.jar</runtime-relative-path>
<binary-origin>release/modules/ext/vorbis-java-tika-0.8.jar</binary-origin>
</class-path-extension>
<class-path-extension>
<runtime-relative-path>ext/language-detector-0.6.jar</runtime-relative-path>
<binary-origin>release/modules/ext/language-detector-0.6.jar</binary-origin>
</class-path-extension>
</data>
</configuration>
</project>

View File

@ -1,420 +0,0 @@
#
# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter.
#
# Any token with a part-of-speech tag that exactly matches those defined in this
# file are removed from the token stream.
#
# Set your own stoptags by uncommenting the lines below. Note that comments are
# not allowed on the same line as a stoptag. See LUCENE-3745 for frequency lists,
# etc. that can be useful for building you own stoptag set.
#
# The entire possible tagset is provided below for convenience.
#
#####
# noun: unclassified nouns
#名詞
#
# noun-common: Common nouns or nouns where the sub-classification is undefined
#名詞-一般
#
# noun-proper: Proper nouns where the sub-classification is undefined
#名詞-固有名詞
#
# noun-proper-misc: miscellaneous proper nouns
#名詞-固有名詞-一般
#
# noun-proper-person: Personal names where the sub-classification is undefined
#名詞-固有名詞-人名
#
# noun-proper-person-misc: names that cannot be divided into surname and
# given name; foreign names; names where the surname or given name is unknown.
# e.g. お市の方
#名詞-固有名詞-人名-一般
#
# noun-proper-person-surname: Mainly Japanese surnames.
# e.g. 山田
#名詞-固有名詞-人名-姓
#
# noun-proper-person-given_name: Mainly Japanese given names.
# e.g. 太郎
#名詞-固有名詞-人名-名
#
# noun-proper-organization: Names representing organizations.
# e.g. 通産省, NHK
#名詞-固有名詞-組織
#
# noun-proper-place: Place names where the sub-classification is undefined
#名詞-固有名詞-地域
#
# noun-proper-place-misc: Place names excluding countries.
# e.g. アジア, バルセロナ, 京都
#名詞-固有名詞-地域-一般
#
# noun-proper-place-country: Country names.
# e.g. 日本, オーストラリア
#名詞-固有名詞-地域-国
#
# noun-pronoun: Pronouns where the sub-classification is undefined
#名詞-代名詞
#
# noun-pronoun-misc: miscellaneous pronouns:
# e.g. それ, ここ, あいつ, あなた, あちこち, いくつ, どこか, なに, みなさん, みんな, わたくし, われわれ
#名詞-代名詞-一般
#
# noun-pronoun-contraction: Spoken language contraction made by combining a
# pronoun and the particle 'wa'.
# e.g. ありゃ, こりゃ, こりゃあ, そりゃ, そりゃあ
#名詞-代名詞-縮約
#
# noun-adverbial: Temporal nouns such as names of days or months that behave
# like adverbs. Nouns that represent amount or ratios and can be used adverbially,
# e.g. 金曜, 一月, 午後, 少量
#名詞-副詞可能
#
# noun-verbal: Nouns that take arguments with case and can appear followed by
# 'suru' and related verbs (する, できる, なさる, くださる)
# e.g. インプット, 愛着, 悪化, 悪戦苦闘, 一安心, 下取り
#名詞-サ変接続
#
# noun-adjective-base: The base form of adjectives, words that appear before な ("na")
# e.g. 健康, 安易, 駄目, だめ
#名詞-形容動詞語幹
#
# noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), 数.
# e.g. 0, 1, 2, 何, 数, 幾
#名詞-数
#
# noun-affix: noun affixes where the sub-classification is undefined
#名詞-非自立
#
# noun-affix-misc: Of adnominalizers, the case-marker の ("no"), and words that
# attach to the base form of inflectional words, words that cannot be classified
# into any of the other categories below. This category includes indefinite nouns.
# e.g. あかつき, 暁, かい, 甲斐, 気, きらい, 嫌い, くせ, 癖, こと, 事, ごと, 毎, しだい, 次第,
# 順, せい, 所為, ついで, 序で, つもり, 積もり, 点, どころ, の, はず, 筈, はずみ, 弾み,
# 拍子, ふう, ふり, 振り, ほう, 方, 旨, もの, 物, 者, ゆえ, 故, ゆえん, 所以, わけ, 訳,
# わり, 割り, 割, ん-口語/, もん-口語/
#名詞-非自立-一般
#
# noun-affix-adverbial: noun affixes that that can behave as adverbs.
# e.g. あいだ, 間, あげく, 挙げ句, あと, 後, 余り, 以外, 以降, 以後, 以上, 以前, 一方, うえ,
# 上, うち, 内, おり, 折り, かぎり, 限り, きり, っきり, 結果, ころ, 頃, さい, 際, 最中, さなか,
# 最中, じたい, 自体, たび, 度, ため, 為, つど, 都度, とおり, 通り, とき, 時, ところ, 所,
# とたん, 途端, なか, 中, のち, 後, ばあい, 場合, 日, ぶん, 分, ほか, 他, まえ, 前, まま,
# 儘, 侭, みぎり, 矢先
#名詞-非自立-副詞可能
#
# noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars
# with the stem よう(だ) ("you(da)").
# e.g. よう, やう, 様 (よう)
#名詞-非自立-助動詞語幹
#
# noun-affix-adjective-base: noun affixes that can connect to the indeclinable
# connection form な (aux "da").
# e.g. みたい, ふう
#名詞-非自立-形容動詞語幹
#
# noun-special: special nouns where the sub-classification is undefined.
#名詞-特殊
#
# noun-special-aux: The そうだ ("souda") stem form that is used for reporting news, is
# treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base
# form of inflectional words.
# e.g. そう
#名詞-特殊-助動詞語幹
#
# noun-suffix: noun suffixes where the sub-classification is undefined.
#名詞-接尾
#
# noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect
# to ガル or タイ and can combine into compound nouns, words that cannot be classified into
# any of the other categories below. In general, this category is more inclusive than
# 接尾語 ("suffix") and is usually the last element in a compound noun.
# e.g. おき, かた, 方, 甲斐 (がい), がかり, ぎみ, 気味, ぐるみ, (~した) さ, 次第, 済 (ず) み,
# よう, (でき)っこ, 感, 観, 性, 学, 類, 面, 用
#名詞-接尾-一般
#
# noun-suffix-person: Suffixes that form nouns and attach to person names more often
# than other nouns.
# e.g. 君, 様, 著
#名詞-接尾-人名
#
# noun-suffix-place: Suffixes that form nouns and attach to place names more often
# than other nouns.
# e.g. 町, 市, 県
#名詞-接尾-地域
#
# noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that
# can appear before スル ("suru").
# e.g. 化, 視, 分け, 入り, 落ち, 買い
#名詞-接尾-サ変接続
#
# noun-suffix-aux: The stem form of そうだ (様態) that is used to indicate conditions,
# is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the
# conjunctive form of inflectional words.
# e.g. そう
#名詞-接尾-助動詞語幹
#
# noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive
# form of inflectional words and appear before the copula だ ("da").
# e.g. 的, げ, がち
#名詞-接尾-形容動詞語幹
#
# noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs.
# e.g. 後 (ご), 以後, 以降, 以前, 前後, 中, 末, 上, 時 (じ)
#名詞-接尾-副詞可能
#
# noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category
# is more inclusive than 助数詞 ("classifier") and includes common nouns that attach
# to numbers.
# e.g. 個, つ, 本, 冊, パーセント, cm, kg, カ月, か国, 区画, 時間, 時半
#名詞-接尾-助数詞
#
# noun-suffix-special: Special suffixes that mainly attach to inflecting words.
# e.g. (楽し) さ, (考え) 方
#名詞-接尾-特殊
#
# noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words
# together.
# e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) 兼 (主婦)
#名詞-接続詞的
#
# noun-verbal_aux: Nouns that attach to the conjunctive particle て ("te") and are
# semantically verb-like.
# e.g. ごらん, ご覧, 御覧, 頂戴
#名詞-動詞非自立的
#
# noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry,
# dialects, English, etc. Currently, the only entry for 名詞 引用文字列 ("noun quotation")
# is いわく ("iwaku").
#名詞-引用文字列
#
# noun-nai_adjective: Words that appear before the auxiliary verb ない ("nai") and
# behave like an adjective.
# e.g. 申し訳, 仕方, とんでも, 違い
#名詞-ナイ形容詞語幹
#
#####
# prefix: unclassified prefixes
#接頭詞
#
# prefix-nominal: Prefixes that attach to nouns (including adjective stem forms)
# excluding numerical expressions.
# e.g. お (水), 某 (氏), 同 (社), 故 (~氏), 高 (品質), お (見事), ご (立派)
#接頭詞-名詞接続
#
# prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb
# in conjunctive form followed by なる/なさる/くださる.
# e.g. お (読みなさい), お (座り)
#接頭詞-動詞接続
#
# prefix-adjectival: Prefixes that attach to adjectives.
# e.g. お (寒いですねえ), バカ (でかい)
#接頭詞-形容詞接続
#
# prefix-numerical: Prefixes that attach to numerical expressions.
# e.g. 約, およそ, 毎時
#接頭詞-数接続
#
#####
# verb: unclassified verbs
#動詞
#
# verb-main:
#動詞-自立
#
# verb-auxiliary:
#動詞-非自立
#
# verb-suffix:
#動詞-接尾
#
#####
# adjective: unclassified adjectives
#形容詞
#
# adjective-main:
#形容詞-自立
#
# adjective-auxiliary:
#形容詞-非自立
#
# adjective-suffix:
#形容詞-接尾
#
#####
# adverb: unclassified adverbs
#副詞
#
# adverb-misc: Words that can be segmented into one unit and where adnominal
# modification is not possible.
# e.g. あいかわらず, 多分
#副詞-一般
#
# adverb-particle_conjunction: Adverbs that can be followed by の, は, に,
# な, する, だ, etc.
# e.g. こんなに, そんなに, あんなに, なにか, なんでも
#副詞-助詞類接続
#
#####
# adnominal: Words that only have noun-modifying forms.
# e.g. この, その, あの, どの, いわゆる, なんらかの, 何らかの, いろんな, こういう, そういう, ああいう,
# どういう, こんな, そんな, あんな, どんな, 大きな, 小さな, おかしな, ほんの, たいした,
# 「(, も) さる (ことながら)」, 微々たる, 堂々たる, 単なる, いかなる, 我が」「同じ, 亡き
#連体詞
#
#####
# conjunction: Conjunctions that can occur independently.
# e.g. が, けれども, そして, じゃあ, それどころか
接続詞
#
#####
# particle: unclassified particles.
助詞
#
# particle-case: case particles where the subclassification is undefined.
助詞-格助詞
#
# particle-case-misc: Case particles.
# e.g. から, が, で, と, に, へ, より, を, の, にて
助詞-格助詞-一般
#
# particle-case-quote: the "to" that appears after nouns, a persons speech,
# quotation marks, expressions of decisions from a meeting, reasons, judgements,
# conjectures, etc.
# e.g. ( だ) と (述べた.), ( である) と (して執行猶予...)
助詞-格助詞-引用
#
# particle-case-compound: Compounds of particles and verbs that mainly behave
# like case particles.
# e.g. という, といった, とかいう, として, とともに, と共に, でもって, にあたって, に当たって, に当って,
# にあたり, に当たり, に当り, に当たる, にあたる, において, に於いて,に於て, における, に於ける,
# にかけ, にかけて, にかんし, に関し, にかんして, に関して, にかんする, に関する, に際し,
# に際して, にしたがい, に従い, に従う, にしたがって, に従って, にたいし, に対し, にたいして,
# に対して, にたいする, に対する, について, につき, につけ, につけて, につれ, につれて, にとって,
# にとり, にまつわる, によって, に依って, に因って, により, に依り, に因り, による, に依る, に因る,
# にわたって, にわたる, をもって, を以って, を通じ, を通じて, を通して, をめぐって, をめぐり, をめぐる,
# って-口語/, ちゅう-関西弁「という」/, (何) ていう (人)-口語/, っていう-口語/, といふ, とかいふ
助詞-格助詞-連語
#
# particle-conjunctive:
# e.g. から, からには, が, けれど, けれども, けど, し, つつ, て, で, と, ところが, どころか, とも, ども,
# ながら, なり, ので, のに, ば, ものの, や ( した), やいなや, (ころん) じゃ(いけない)-口語/,
# (行っ) ちゃ(いけない)-口語/, (言っ) たって (しかたがない)-口語/, (それがなく)ったって (平気)-口語/
助詞-接続助詞
#
# particle-dependency:
# e.g. こそ, さえ, しか, すら, は, も, ぞ
助詞-係助詞
#
# particle-adverbial:
# e.g. がてら, かも, くらい, 位, ぐらい, しも, (学校) じゃ(これが流行っている)-口語/,
# (それ)じゃあ (よくない)-口語/, ずつ, (私) なぞ, など, (私) なり (に), (先生) なんか (大嫌い)-口語/,
# (私) なんぞ, (先生) なんて (大嫌い)-口語/, のみ, だけ, (私) だって-口語/, だに,
# (彼)ったら-口語/, (お茶) でも (いかが), 等 (とう), (今後) とも, ばかり, ばっか-口語/, ばっかり-口語/,
# ほど, 程, まで, 迄, (誰) も (が)([助詞-格助詞] および [助詞-係助詞] の前に位置する「も」)
助詞-副助詞
#
# particle-interjective: particles with interjective grammatical roles.
# e.g. (松島) や
助詞-間投助詞
#
# particle-coordinate:
# e.g. と, たり, だの, だり, とか, なり, や, やら
助詞-並立助詞
#
# particle-final:
# e.g. かい, かしら, さ, ぜ, (だ)っけ-口語/, (とまってる) で-方言/, な, ナ, なあ-口語/, ぞ, ね, ネ,
# ねぇ-口語/, ねえ-口語/, ねん-方言/, の, のう-口語/, や, よ, ヨ, よぉ-口語/, わ, わい-口語/
助詞-終助詞
#
# particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is
# adverbial, conjunctive, or sentence final. For example:
# (a) 「A か B か」. Ex:「(国内で運用する) か,(海外で運用する) か (.)」
# (b) Inside an adverb phrase. Ex:「(幸いという) か (, 死者はいなかった.)」
# 「(祈りが届いたせい) か (, 試験に合格した.)」
# (c) 「かのように」. Ex:「(何もなかった) か (のように振る舞った.)」
# e.g. か
助詞-副助詞/並立助詞/終助詞
#
# particle-adnominalizer: The "no" that attaches to nouns and modifies
# non-inflectional words.
助詞-連体化
#
# particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs
# that are giongo, giseigo, or gitaigo.
# e.g. に, と
助詞-副詞化
#
# particle-special: A particle that does not fit into one of the above classifications.
# This includes particles that are used in Tanka, Haiku, and other poetry.
# e.g. かな, けむ, ( しただろう) に, (あんた) にゃ(わからん), (俺) ん (家)
助詞-特殊
#
#####
# auxiliary-verb:
助動詞
#
#####
# interjection: Greetings and other exclamations.
# e.g. おはよう, おはようございます, こんにちは, こんばんは, ありがとう, どうもありがとう, ありがとうございます,
# いただきます, ごちそうさま, さよなら, さようなら, はい, いいえ, ごめん, ごめんなさい
#感動詞
#
#####
# symbol: unclassified Symbols.
記号
#
# symbol-misc: A general symbol not in one of the categories below.
# e.g. [○◎@$〒→+]
記号-一般
#
# symbol-comma: Commas
# e.g. [,、]
記号-読点
#
# symbol-period: Periods and full stops.
# e.g. [..。]
記号-句点
#
# symbol-space: Full-width whitespace.
記号-空白
#
# symbol-open_bracket:
# e.g. [({‘“『【]
記号-括弧開
#
# symbol-close_bracket:
# e.g. [)}’”』」】]
記号-括弧閉
#
# symbol-alphabetic:
#記号-アルファベット
#
#####
# other: unclassified other
#その他
#
# other-interjection: Words that are hard to classify as noun-suffixes or
# sentence-final particles.
# e.g. (だ)ァ
その他-間投
#
#####
# filler: Aizuchi that occurs during a conversation or sounds inserted as filler.
# e.g. あの, うんと, えと
フィラー
#
#####
# non-verbal: non-verbal sound.
非言語音
#
#####
# fragment:
#語断片
#
#####
# unknown: unknown part of speech.
#未知語
#
##### End of file

View File

@ -1,127 +0,0 @@
#
# This file defines a stopword set for Japanese.
#
# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia.
# Punctuation characters and frequent kanji have mostly been left out. See LUCENE-3745
# for frequency lists, etc. that can be useful for making your own set (if desired)
#
# Note that there is an overlap between these stopwords and the terms stopped when used
# in combination with the JapanesePartOfSpeechStopFilter. When editing this file, note
# that comments are not allowed on the same line as stopwords.
#
# Also note that stopping is done in a case-insensitive manner. Change your StopFilter
# configuration if you need case-sensitive stopping. Lastly, note that stopping is done
# using the same character width as the entries in this file. Since this StopFilter is
# normally done after a CJKWidthFilter in your chain, you would usually want your romaji
# entries to be in half-width and your kana entries to be in full-width.
#
ある
いる
する
から
こと
として
れる
など
なっ
ない
この
ため
その
あっ
よう
また
もの
という
あり
まで
られ
なる
これ
によって
により
おり
より
による
なり
られる
において
なかっ
なく
しかし
について
だっ
その後
できる
それ
ので
なお
のみ
でき
における
および
いう
さらに
でも
たり
その他
に関する
たち
ます
なら
に対して
特に
せる
及び
これら
とき
では
にて
ほか
ながら
うち
そして
とともに
ただし
かつて
それぞれ
または
ほど
ものの
に対する
ほとんど
と共に
といった
です
とも
ところ
ここ
##### End of file

View File

@ -45,7 +45,7 @@
that avoids logging every request
-->
<schema name="Autopsy Keyword Search" version="2.2">
<schema name="Autopsy Keyword Search" version="2.1">
<!-- attribute "name" is the name of this schema and is only used for display purposes.
Applications should change this to reflect the nature of the search collection.
version="1.4" is Solr's version number for the schema syntax and semantics. It should
@ -62,7 +62,6 @@
2.0 added chunk_size field
2.1 to facilitate case insensitive regex search,no longer copying content into content_str.
content_str will be populated with lowercase content by Autopsy.
2.2 added text_ja type, content_ja and language fields to support Japanese text search
-->
<types>
@ -244,18 +243,6 @@
</analyzer>
</fieldType>
<fieldType name="text_ja" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="false">
<analyzer>
<tokenizer class="solr.JapaneseTokenizerFactory" mode="search"/>
<filter class="solr.JapaneseBaseFormFilterFactory"/>
<filter class="solr.JapanesePartOfSpeechStopFilterFactory" tags="lang/stoptags_ja.txt" />
<filter class="solr.CJKWidthFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ja.txt" />
<filter class="solr.JapaneseKatakanaStemFilterFactory" minimumLength="4"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
<!-- A text field with defaults appropriate for English: it
tokenizes with StandardTokenizer, removes English stop words
(stopwords_en.txt), down cases, protects words from protwords.txt, and
@ -570,11 +557,6 @@
via copyField further on in this schema -->
<field name="text" type="text_general" indexed="true" stored="true" termVectors="true" termPositions="true" termOffsets="true" multiValued="true"/>
<!-- Store language detection result. Only parents of text documents have this -->
<field name="language" type="string" indexed="false" stored="true" required="false"/>
<field name="content_ja" type="text_ja" indexed="true" stored="true" termVectors="true" termPositions="true" termOffsets="true" multiValued="true"/>
<!-- catchall text field that indexes tokens both normally and in reverse for efficient
leading wildcard queries. -->
<!--<field name="text_rev" type="text_general_rev" indexed="true" stored="false" multiValued="true"/>-->

View File

@ -38,7 +38,6 @@ import org.apache.commons.lang3.math.NumberUtils;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrRequest.METHOD;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.openide.util.NbBundle;
import org.sleuthkit.autopsy.coreutils.Logger;
@ -347,8 +346,6 @@ class HighlightedText implements IndexedText {
String chunkID = "";
String highlightField = "";
try {
double indexSchemaVersion = NumberUtils.toDouble(solrServer.getIndexInfo().getSchemaVersion());
loadPageInfo(); //inits once
SolrQuery q = new SolrQuery();
q.setShowDebugInfo(DEBUG); //debug
@ -362,46 +359,22 @@ class HighlightedText implements IndexedText {
highlightField = LuceneQuery.HIGHLIGHT_FIELD;
if (isLiteral) {
if (2.2 <= indexSchemaVersion) {
//if the query is literal try to get solr to do the highlighting
final String highlightQuery = keywords.stream().map(s ->
LanguageSpecificContentQueryHelper.expandQueryString(KeywordSearchUtil.quoteQuery(KeywordSearchUtil.escapeLuceneQuery(s))))
.collect(Collectors.joining(" OR "));
q.setQuery(highlightQuery);
for (Server.Schema field : LanguageSpecificContentQueryHelper.getQueryFields()) {
q.addField(field.toString());
q.addHighlightField(field.toString());
}
q.addField(Server.Schema.LANGUAGE.toString());
// in case of single term literal query there is only 1 term
LanguageSpecificContentQueryHelper.configureTermfreqQuery(q, keywords.iterator().next());
q.addFilterQuery(filterQuery);
q.setHighlightFragsize(0); // don't fragment the highlight, works with original highlighter, or needs "single" list builder with FVH
} else {
//if the query is literal try to get solr to do the highlighting
final String highlightQuery = keywords.stream()
.map(HighlightedText::constructEscapedSolrQuery)
.collect(Collectors.joining(" "));
//if the query is literal try to get solr to do the highlighting
final String highlightQuery = keywords.stream()
.map(HighlightedText::constructEscapedSolrQuery)
.collect(Collectors.joining(" "));
q.setQuery(highlightQuery);
q.addField(highlightField);
q.addFilterQuery(filterQuery);
q.addHighlightField(highlightField);
q.setHighlightFragsize(0); // don't fragment the highlight, works with original highlighter, or needs "single" list builder with FVH
}
q.setQuery(highlightQuery);
q.addField(highlightField);
q.addFilterQuery(filterQuery);
q.addHighlightField(highlightField);
q.setHighlightFragsize(0); // don't fragment the highlight, works with original highlighter, or needs "single" list builder with FVH
//tune the highlighter
if (shouldUseOriginalHighlighter(contentIdStr)) {
// use original highlighter
q.setParam("hl.useFastVectorHighlighter", "off");
q.setParam("hl.simple.pre", HIGHLIGHT_PRE);
q.setParam("hl.simple.post", HIGHLIGHT_POST);
} else {
q.setParam("hl.useFastVectorHighlighter", "on"); //fast highlighter scales better than standard one NON-NLS
q.setParam("hl.tag.pre", HIGHLIGHT_PRE); //makes sense for FastVectorHighlighter only NON-NLS
q.setParam("hl.tag.post", HIGHLIGHT_POST); //makes sense for FastVectorHighlighter only NON-NLS
q.setParam("hl.fragListBuilder", "single"); //makes sense for FastVectorHighlighter only NON-NLS
}
q.setParam("hl.useFastVectorHighlighter", "on"); //fast highlighter scales better than standard one NON-NLS
q.setParam("hl.tag.pre", HIGHLIGHT_PRE); //makes sense for FastVectorHighlighter only NON-NLS
q.setParam("hl.tag.post", HIGHLIGHT_POST); //makes sense for FastVectorHighlighter only NON-NLS
q.setParam("hl.fragListBuilder", "single"); //makes sense for FastVectorHighlighter only NON-NLS
//docs says makes sense for the original Highlighter only, but not really
q.setParam("hl.maxAnalyzedChars", Server.HL_ANALYZE_CHARS_UNLIMITED); //NON-NLS
@ -433,40 +406,12 @@ class HighlightedText implements IndexedText {
if (responseHighlightID == null) {
highlightedContent = attemptManualHighlighting(response.getResults(), highlightField, keywords);
} else {
SolrDocument document = response.getResults().get(0);
Object language = document.getFieldValue(Server.Schema.LANGUAGE.toString());
if (2.2 <= indexSchemaVersion && language != null) {
List<String> contentHighlights = LanguageSpecificContentQueryHelper.getHighlights(responseHighlightID).orElse(null);
if (contentHighlights == null) {
highlightedContent = "";
} else {
int hitCountInMiniChunk = LanguageSpecificContentQueryHelper.queryChunkTermfreq(keywords, MiniChunkHelper.getChunkIdString(contentIdStr));
String s = contentHighlights.get(0).trim();
// If there is a mini-chunk, trim the content not to show highlighted text in it.
if (0 < hitCountInMiniChunk) {
int hitCountInChunk = ((Float) document.getFieldValue(Server.Schema.TERMFREQ.toString())).intValue();
int idx = LanguageSpecificContentQueryHelper.findNthIndexOf(
s,
HIGHLIGHT_PRE,
// trim after the last hit in chunk
hitCountInChunk - hitCountInMiniChunk);
if (idx != -1) {
highlightedContent = s.substring(0, idx);
} else {
highlightedContent = s;
}
} else {
highlightedContent = s;
}
}
List<String> contentHighlights = responseHighlightID.get(LuceneQuery.HIGHLIGHT_FIELD);
if (contentHighlights == null) {
highlightedContent = attemptManualHighlighting(response.getResults(), highlightField, keywords);
} else {
List<String> contentHighlights = responseHighlightID.get(LuceneQuery.HIGHLIGHT_FIELD);
if (contentHighlights == null) {
highlightedContent = attemptManualHighlighting(response.getResults(), highlightField, keywords);
} else {
// extracted content (minus highlight tags) is HTML-escaped
highlightedContent = contentHighlights.get(0).trim();
}
// extracted content (minus highlight tags) is HTML-escaped
highlightedContent = contentHighlights.get(0).trim();
}
}
}
@ -606,37 +551,4 @@ class HighlightedText implements IndexedText {
return buf.toString();
}
/**
* Return true if we should use original highlighter instead of FastVectorHighlighter.
*
* In the case Japanese text and phrase query, FastVectorHighlighter does not work well.
*
* Note about highlighters:
* If the query is "雨が降る" (phrase query), Solr divides it into and 降る. is a stop word here.
* It seems that FastVector highlighter does not produce any snippet when there is a stop word between terms.
* On the other hand, original highlighter produces multiple matches, for example:
* > <em></em><em>降っ</em>ています
* Unified highlighter (from Solr 6.4) handles the case as expected:
* > <em>雨が降っ</em>ています
*/
private boolean shouldUseOriginalHighlighter(String contentID) throws NoOpenCoreException, KeywordSearchModuleException {
final SolrQuery q = new SolrQuery();
q.setQuery("*:*");
q.addFilterQuery(Server.Schema.ID.toString() + ":" + contentID);
q.setFields(Server.Schema.LANGUAGE.toString());
QueryResponse response = solrServer.query(q, METHOD.POST);
SolrDocumentList solrDocuments = response.getResults();
if (!solrDocuments.isEmpty()) {
SolrDocument solrDocument = solrDocuments.get(0);
if (solrDocument != null) {
Object languageField = solrDocument.getFieldValue(Server.Schema.LANGUAGE.toString());
if (languageField != null) {
return languageField.equals("ja");
}
}
}
return false;
}
}

View File

@ -39,7 +39,7 @@ class IndexFinder {
private static final String KWS_DATA_FOLDER_NAME = "data";
private static final String INDEX_FOLDER_NAME = "index";
private static final String CURRENT_SOLR_VERSION = "4";
private static final String CURRENT_SOLR_SCHEMA_VERSION = "2.2";
private static final String CURRENT_SOLR_SCHEMA_VERSION = "2.1";
static String getCurrentSolrVersion() {
return CURRENT_SOLR_VERSION;

View File

@ -20,10 +20,8 @@ package org.sleuthkit.autopsy.keywordsearch;
import java.io.BufferedReader;
import java.io.Reader;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;
import java.util.logging.Level;
import org.apache.commons.lang3.math.NumberUtils;
import org.apache.solr.client.solrj.SolrServerException;
@ -61,8 +59,6 @@ class Ingester {
private final Server solrServer = KeywordSearch.getServer();
private static final SolrFieldsVisitor SOLR_FIELDS_VISITOR = new SolrFieldsVisitor();
private static Ingester instance;
private final LanguageSpecificContentIndexingHelper languageSpecificContentIndexingHelper
= new LanguageSpecificContentIndexingHelper();
private Ingester() {
}
@ -97,7 +93,7 @@ class Ingester {
* file, but the Solr server is probably fine.
*/
void indexMetaDataOnly(AbstractFile file) throws IngesterException {
indexChunk("", file.getName().toLowerCase(), new HashMap<>(getContentFields(file)));
indexChunk("", file.getName().toLowerCase(), getContentFields(file));
}
/**
@ -111,7 +107,7 @@ class Ingester {
* artifact, but the Solr server is probably fine.
*/
void indexMetaDataOnly(BlackboardArtifact artifact, String sourceName) throws IngesterException {
indexChunk("", sourceName, new HashMap<>(getContentFields(artifact)));
indexChunk("", sourceName, getContentFields(artifact));
}
/**
@ -147,30 +143,21 @@ class Ingester {
< T extends SleuthkitVisitableItem> boolean indexText(Reader sourceReader, long sourceID, String sourceName, T source, IngestJobContext context) throws Ingester.IngesterException {
int numChunks = 0; //unknown until chunking is done
Map<String, String> contentFields = Collections.unmodifiableMap(getContentFields(source));
Map<String, String> fields = getContentFields(source);
//Get a reader for the content of the given source
try (BufferedReader reader = new BufferedReader(sourceReader)) {
Chunker chunker = new Chunker(reader);
while (chunker.hasNext()) {
for (Chunk chunk : chunker) {
if (context != null && context.fileIngestIsCancelled()) {
logger.log(Level.INFO, "File ingest cancelled. Cancelling keyword search indexing of {0}", sourceName);
return false;
}
Chunk chunk = chunker.next();
Map<String, Object> fields = new HashMap<>(contentFields);
String chunkId = Server.getChunkIdString(sourceID, numChunks + 1);
fields.put(Server.Schema.ID.toString(), chunkId);
fields.put(Server.Schema.CHUNK_SIZE.toString(), String.valueOf(chunk.getBaseChunkLength()));
Optional<Language> language = languageSpecificContentIndexingHelper.detectLanguageIfNeeded(chunk);
language.ifPresent(lang -> languageSpecificContentIndexingHelper.updateLanguageSpecificFields(fields, chunk, lang));
try {
//add the chunk text to Solr index
indexChunk(chunk.toString(), sourceName, fields);
// add mini chunk when there's a language specific field
if (chunker.hasNext() && language.isPresent()) {
languageSpecificContentIndexingHelper.indexMiniChunk(chunk, sourceName, new HashMap<>(contentFields), chunkId, language.get());
}
numChunks++;
} catch (Ingester.IngesterException ingEx) {
logger.log(Level.WARNING, "Ingester had a problem with extracted string from file '" //NON-NLS
@ -190,7 +177,6 @@ class Ingester {
if (context != null && context.fileIngestIsCancelled()) {
return false;
} else {
Map<String, Object> fields = new HashMap<>(contentFields);
//after all chunks, index just the meta data, including the numChunks, of the parent file
fields.put(Server.Schema.NUM_CHUNKS.toString(), Integer.toString(numChunks));
//reset id field to base document id
@ -216,7 +202,7 @@ class Ingester {
*
* @throws org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException
*/
private void indexChunk(String chunk, String sourceName, Map<String, Object> fields) throws IngesterException {
private void indexChunk(String chunk, String sourceName, Map<String, String> fields) throws IngesterException {
if (fields.get(Server.Schema.IMAGE_ID.toString()) == null) {
//JMTODO: actually if the we couldn't get the image id it is set to -1,
// but does this really mean we don't want to index it?

View File

@ -1,46 +0,0 @@
/*
* Autopsy Forensic Browser
*
* Copyright 2011-2019 Basis Technology Corp.
* Contact: carrier <at> sleuthkit <dot> org
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.sleuthkit.autopsy.keywordsearch;
import java.util.Arrays;
import java.util.Optional;
/**
* Language.
*
* Contents which are detected to have these languages should be indexed to a corresponding language-specific field
* such as content_ja.
*/
public enum Language {
JAPANESE("ja");
private String value;
String getValue() {
return value;
}
static Optional<Language> fromValue(String value) {
return Arrays.stream(Language.values()).filter(x -> x.value.equals(value)).findFirst();
}
Language(String value) {
this.value = value;
}
}

View File

@ -1,60 +0,0 @@
/*
* Autopsy Forensic Browser
*
* Copyright 2011-2019 Basis Technology Corp.
* Contact: carrier <at> sleuthkit <dot> org
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.sleuthkit.autopsy.keywordsearch;
import com.optimaize.langdetect.LanguageDetectorBuilder;
import com.optimaize.langdetect.i18n.LdLocale;
import com.optimaize.langdetect.ngram.NgramExtractors;
import com.optimaize.langdetect.profiles.LanguageProfileReader;
import com.optimaize.langdetect.text.CommonTextObjectFactories;
import com.optimaize.langdetect.text.TextObject;
import com.optimaize.langdetect.text.TextObjectFactory;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.Optional;
/**
* Detects the language of the given contents. Only languages which should be indexed to a corresponding
* language-specific field are detected.
*/
class LanguageDetector {
private com.optimaize.langdetect.LanguageDetector impl;
private TextObjectFactory textObjectFactory;
LanguageDetector() {
try {
impl = LanguageDetectorBuilder.create(NgramExtractors.standard())
.withProfiles(new LanguageProfileReader().readAllBuiltIn())
.build();
textObjectFactory = CommonTextObjectFactories.forDetectingOnLargeText();
} catch (IOException e) {
// The IOException here could occur when failing to read the language profiles from the classpath.
// That can be considered to be a severe IO problem. Nothing can be done here.
throw new UncheckedIOException(e);
}
}
Optional<Language> detect(String text) {
TextObject textObject = textObjectFactory.forText(text);
Optional<LdLocale> localeOpt = impl.detect(textObject).transform(Optional::of).or(Optional.empty());
return localeOpt.map(LdLocale::getLanguage).flatMap(Language::fromValue);
}
}

View File

@ -1,85 +0,0 @@
/*
* Autopsy Forensic Browser
*
* Copyright 2011-2019 Basis Technology Corp.
* Contact: carrier <at> sleuthkit <dot> org
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.sleuthkit.autopsy.keywordsearch;
import org.apache.commons.lang3.math.NumberUtils;
import org.apache.solr.common.SolrInputDocument;
import org.openide.util.NbBundle;
import org.sleuthkit.autopsy.healthmonitor.HealthMonitor;
import org.sleuthkit.autopsy.healthmonitor.TimingMetric;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Optional;
/**
* A helper class to support indexing language-specific fields.
*/
class LanguageSpecificContentIndexingHelper {
private final LanguageDetector languageDetector = new LanguageDetector();
Optional<Language> detectLanguageIfNeeded(Chunker.Chunk chunk) throws NoOpenCoreException {
double indexSchemaVersion = NumberUtils.toDouble(KeywordSearch.getServer().getIndexInfo().getSchemaVersion());
if (2.2 <= indexSchemaVersion) {
return languageDetector.detect(chunk.toString());
} else {
return Optional.empty();
}
}
void updateLanguageSpecificFields(Map<String, Object> fields, Chunker.Chunk chunk, Language language) {
List<String> values = new ArrayList<>();
values.add(chunk.toString());
if (fields.containsKey(Server.Schema.FILE_NAME.toString())) {
values.add(fields.get(Server.Schema.FILE_NAME.toString()).toString());
}
// index the chunk to a language specific field
fields.put(Server.Schema.CONTENT_JA.toString(), values);
fields.put(Server.Schema.LANGUAGE.toString(), language.getValue());
}
void indexMiniChunk(Chunker.Chunk chunk, String sourceName, Map<String, Object> fields, String baseChunkID, Language language)
throws Ingester.IngesterException {
//Make a SolrInputDocument out of the field map
SolrInputDocument updateDoc = new SolrInputDocument();
for (String key : fields.keySet()) {
updateDoc.addField(key, fields.get(key));
}
try {
updateDoc.setField(Server.Schema.ID.toString(), MiniChunkHelper.getChunkIdString(baseChunkID));
// index the chunk to a language specific field
updateDoc.addField(Server.Schema.CONTENT_JA.toString(), chunk.toString().substring(chunk.getBaseChunkLength()));
updateDoc.addField(Server.Schema.LANGUAGE.toString(), language.getValue());
TimingMetric metric = HealthMonitor.getTimingMetric("Solr: Index chunk");
KeywordSearch.getServer().addDocument(updateDoc);
HealthMonitor.submitTimingMetric(metric);
} catch (KeywordSearchModuleException | NoOpenCoreException ex) {
throw new Ingester.IngesterException(
NbBundle.getMessage(Ingester.class, "Ingester.ingest.exception.err.msg", sourceName), ex);
}
}
}

View File

@ -1,248 +0,0 @@
/*
* Autopsy Forensic Browser
*
* Copyright 2011-2019 Basis Technology Corp.
* Contact: carrier <at> sleuthkit <dot> org
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.sleuthkit.autopsy.keywordsearch;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrRequest;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.sleuthkit.autopsy.coreutils.EscapeUtil;
import org.sleuthkit.autopsy.coreutils.Version;
import org.sleuthkit.datamodel.TskException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
/**
* A helper class to support querying documents which have language-specific fields.
*/
final class LanguageSpecificContentQueryHelper {
private LanguageSpecificContentQueryHelper() {}
private static final List<Server.Schema> QUERY_FIELDS = new ArrayList<>();
private static final List<Server.Schema> LANGUAGE_SPECIFIC_CONTENT_FIELDS
= Collections.singletonList(Server.Schema.CONTENT_JA);
private static final boolean DEBUG = (Version.getBuildType() == Version.Type.DEVELOPMENT);
static {
QUERY_FIELDS.add(Server.Schema.TEXT);
QUERY_FIELDS.addAll(LANGUAGE_SPECIFIC_CONTENT_FIELDS);
}
/**
* Holds query response for later processes related to language-specific fields
*/
static class QueryResults {
List<SolrDocument> chunks = new ArrayList<>();
Map</* ID */ String, SolrDocument> miniChunks = new HashMap<>();
// objectId_chunk -> "text" -> List of previews
Map<String, Map<String, List<String>>> highlighting = new HashMap<>();
}
/**
* Make a query string from the given one by applying it to the multiple query fields
*
* @param queryStr escaped query string
* @return query string
*/
static String expandQueryString(final String queryStr) {
List<String> fieldQueries = new ArrayList<>();
fieldQueries.add(Server.Schema.TEXT.toString() + ":" + queryStr);
fieldQueries.addAll(LANGUAGE_SPECIFIC_CONTENT_FIELDS.stream().map(field -> field.toString() + ":" + queryStr).collect(Collectors.toList()));
return String.join(" OR ", fieldQueries);
}
static List<Server.Schema> getQueryFields() {
return QUERY_FIELDS;
}
static void updateQueryResults(QueryResults results, SolrDocument document) {
String id = (String) document.getFieldValue(Server.Schema.ID.toString());
if (MiniChunkHelper.isMiniChunkID(id)) {
results.miniChunks.put(MiniChunkHelper.getBaseChunkID(id), document);
} else {
results.chunks.add(document);
}
}
/**
* Get snippets
*
* @param highlight field ID -> snippets
* @return snippets of appropriate fields.
* Note that this method returns {@code Optional.empty} if the result is empty for convenience to interact with the existing code.
*/
static Optional<List<String>> getHighlights(Map<String, List<String>> highlight) {
for (Server.Schema field : LANGUAGE_SPECIFIC_CONTENT_FIELDS) {
if (highlight.containsKey(field.toString())) {
return Optional.of(highlight.get(field.toString()));
}
}
return Optional.empty();
}
/**
* Merge KeywordHits from TEXT field and a language specific field
*
* Replace KeywordHits in the given {@code matches} if its chunk ID is same.
*/
static List<KeywordHit> mergeKeywordHits(List<KeywordHit> matches, Keyword originalKeyword, QueryResults queryResults) throws KeywordSearchModuleException {
Map<String, KeywordHit> map = findMatches(originalKeyword, queryResults).stream().collect(Collectors.toMap(KeywordHit::getSolrDocumentId, x -> x));
List<KeywordHit> merged = new ArrayList<>();
// first, replace KeywordHit in matches
for (KeywordHit match : matches) {
String key = match.getSolrDocumentId();
if (map.containsKey(key)) {
merged.add(map.get(key));
map.remove(key);
} else {
merged.add(match);
}
}
// second, add rest of KeywordHits from queryResults
merged.addAll(map.values());
return merged;
}
static void configureTermfreqQuery(SolrQuery query, String keyword) throws KeywordSearchModuleException, NoOpenCoreException {
// make a request to Solr to parse query.
QueryTermHelper.Result queryParserResult = QueryTermHelper.parse(keyword, LANGUAGE_SPECIFIC_CONTENT_FIELDS);
query.addField(buildTermfreqQuery(keyword, queryParserResult));
}
static String buildTermfreqQuery(String keyword, QueryTermHelper.Result result) {
List<String> termfreqs = new ArrayList<>();
for (Map.Entry<String, List<String>> e : result.fieldTermsMap.entrySet()) {
String field = e.getKey();
for (String term : e.getValue()) {
termfreqs.add(String.format("termfreq(\"%s\",\"%s\")", field, KeywordSearchUtil.escapeLuceneQuery(term)));
}
}
// sum of all language specific query fields.
// only one of these fields could be non-zero.
return String.format("termfreq:sum(%s)", String.join(",", termfreqs));
}
static int queryChunkTermfreq(Set<String> keywords, String contentID) throws KeywordSearchModuleException, NoOpenCoreException {
SolrQuery q = new SolrQuery();
q.setShowDebugInfo(DEBUG);
final String filterQuery = Server.Schema.ID.toString() + ":" + KeywordSearchUtil.escapeLuceneQuery(contentID);
final String highlightQuery = keywords.stream()
.map(s -> LanguageSpecificContentQueryHelper.expandQueryString(
KeywordSearchUtil.quoteQuery(KeywordSearchUtil.escapeLuceneQuery(s))))
.collect(Collectors.joining(" "));
q.addFilterQuery(filterQuery);
q.setQuery(highlightQuery);
LanguageSpecificContentQueryHelper.configureTermfreqQuery(q, keywords.iterator().next());
QueryResponse response = KeywordSearch.getServer().query(q, SolrRequest.METHOD.POST);
SolrDocumentList results = response.getResults();
if (results.isEmpty()) {
return 0;
}
SolrDocument document = results.get(0);
return ((Float) document.getFieldValue(Server.Schema.TERMFREQ.toString())).intValue();
}
static int findNthIndexOf(String s, String pattern, int n) {
int found = 0;
int idx = -1;
int len = s.length();
while (idx < len && found <= n) {
idx = s.indexOf(pattern, idx + 1);
if (idx == -1) {
break;
}
found++;
}
return idx;
}
private static List<KeywordHit> findMatches(Keyword originalKeyword, QueryResults queryResults) throws KeywordSearchModuleException {
List<KeywordHit> matches = new ArrayList<>();
for (SolrDocument document : queryResults.chunks) {
String docId = (String) document.getFieldValue(Server.Schema.ID.toString());
try {
int hitCountInChunk = ((Float) document.getFieldValue(Server.Schema.TERMFREQ.toString())).intValue();
SolrDocument miniChunk = queryResults.miniChunks.get(docId);
if (miniChunk == null) {
// last chunk does not have mini chunk because there's no overlapped region with next one
matches.add(createKeywordHit(originalKeyword, queryResults.highlighting, docId));
} else {
int hitCountInMiniChunk = ((Float) miniChunk.getFieldValue(Server.Schema.TERMFREQ.toString())).intValue();
if (hitCountInMiniChunk < hitCountInChunk) {
// there are at least one hit in base chunk
matches.add(createKeywordHit(originalKeyword, queryResults.highlighting, docId));
}
}
} catch (TskException ex) {
throw new KeywordSearchModuleException(ex);
}
}
return matches;
}
/**
* copied from LuceneQuery and modified to use getHighlightFieldValue
*/
private static KeywordHit createKeywordHit(Keyword originalKeyword, Map<String, Map<String, List<String>>> highlightResponse, String docId) throws TskException {
/**
* Get the first snippet from the document if keyword search is
* configured to use snippets.
*/
String snippet = "";
if (KeywordSearchSettings.getShowSnippets()) {
List<String> snippetList = getHighlightFieldValue(highlightResponse.get(docId)).orElse(null);
// list is null if there wasn't a snippet
if (snippetList != null) {
snippet = EscapeUtil.unEscapeHtml(snippetList.get(0)).trim();
}
}
return new KeywordHit(docId, snippet, originalKeyword.getSearchTerm());
}
/**
* @return Optional.empty if empty
*/
private static Optional<List<String>> getHighlightFieldValue(Map<String, List<String>> highlight) {
for (Server.Schema field : LANGUAGE_SPECIFIC_CONTENT_FIELDS) {
if (highlight.containsKey(field.toString())) {
return Optional.of(highlight.get(field.toString()));
}
}
return Optional.empty();
}
}

View File

@ -134,7 +134,6 @@ class LuceneQuery implements KeywordSearchQuery {
String cursorMark = CursorMarkParams.CURSOR_MARK_START;
boolean allResultsProcessed = false;
List<KeywordHit> matches = new ArrayList<>();
LanguageSpecificContentQueryHelper.QueryResults languageSpecificQueryResults = new LanguageSpecificContentQueryHelper.QueryResults();
while (!allResultsProcessed) {
solrQuery.set(CursorMarkParams.CURSOR_MARK_PARAM, cursorMark);
QueryResponse response = solrServer.query(solrQuery, SolrRequest.METHOD.POST);
@ -142,18 +141,7 @@ class LuceneQuery implements KeywordSearchQuery {
// objectId_chunk -> "text" -> List of previews
Map<String, Map<String, List<String>>> highlightResponse = response.getHighlighting();
if (2.2 <= indexSchemaVersion) {
languageSpecificQueryResults.highlighting.putAll(response.getHighlighting());
}
for (SolrDocument resultDoc : resultList) {
if (2.2 <= indexSchemaVersion) {
Object language = resultDoc.getFieldValue(Server.Schema.LANGUAGE.toString());
if (language != null) {
LanguageSpecificContentQueryHelper.updateQueryResults(languageSpecificQueryResults, resultDoc);
}
}
try {
/*
* for each result doc, check that the first occurence of
@ -165,11 +153,6 @@ class LuceneQuery implements KeywordSearchQuery {
final Integer chunkSize = (Integer) resultDoc.getFieldValue(Server.Schema.CHUNK_SIZE.toString());
final Collection<Object> content = resultDoc.getFieldValues(Server.Schema.CONTENT_STR.toString());
// if the document has language, it should be hit in language specific content fields. So skip here.
if (resultDoc.containsKey(Server.Schema.LANGUAGE.toString())) {
continue;
}
if (indexSchemaVersion < 2.0) {
//old schema versions don't support chunk_size or the content_str fields, so just accept hits
matches.add(createKeywordtHit(highlightResponse, docId));
@ -196,16 +179,9 @@ class LuceneQuery implements KeywordSearchQuery {
cursorMark = nextCursorMark;
}
List<KeywordHit> mergedMatches;
if (2.2 <= indexSchemaVersion) {
mergedMatches = LanguageSpecificContentQueryHelper.mergeKeywordHits(matches, originalKeyword, languageSpecificQueryResults);
} else {
mergedMatches = matches;
}
QueryResults results = new QueryResults(this);
//in case of single term literal query there is only 1 term
results.addResult(new Keyword(originalKeyword.getSearchTerm(), true, true, originalKeyword.getListName(), originalKeyword.getOriginalTerm()), mergedMatches);
results.addResult(new Keyword(originalKeyword.getSearchTerm(), true, true, originalKeyword.getListName(), originalKeyword.getOriginalTerm()), matches);
return results;
}
@ -286,25 +262,19 @@ class LuceneQuery implements KeywordSearchQuery {
*
* @return
*/
private SolrQuery createAndConfigureSolrQuery(boolean snippets) throws NoOpenCoreException, KeywordSearchModuleException {
double indexSchemaVersion = NumberUtils.toDouble(KeywordSearch.getServer().getIndexInfo().getSchemaVersion());
private SolrQuery createAndConfigureSolrQuery(boolean snippets) {
SolrQuery q = new SolrQuery();
q.setShowDebugInfo(DEBUG); //debug
// Wrap the query string in quotes if this is a literal search term.
String queryStr = originalKeyword.searchTermIsLiteral()
? KeywordSearchUtil.quoteQuery(keywordStringEscaped) : keywordStringEscaped;
? KeywordSearchUtil.quoteQuery(keywordStringEscaped) : keywordStringEscaped;
// Run the query against an optional alternative field.
if (field != null) {
//use the optional field
queryStr = field + ":" + queryStr;
q.setQuery(queryStr);
} else if (2.2 <= indexSchemaVersion && originalKeyword.searchTermIsLiteral()) {
q.setQuery(LanguageSpecificContentQueryHelper.expandQueryString(queryStr));
} else {
q.setQuery(queryStr);
}
q.setQuery(queryStr);
q.setRows(MAX_RESULTS_PER_CURSOR_MARK);
// Setting the sort order is necessary for cursor based paging to work.
q.setSort(SolrQuery.SortClause.asc(Server.Schema.ID.toString()));
@ -313,11 +283,6 @@ class LuceneQuery implements KeywordSearchQuery {
Server.Schema.CHUNK_SIZE.toString(),
Server.Schema.CONTENT_STR.toString());
if (2.2 <= indexSchemaVersion && originalKeyword.searchTermIsLiteral()) {
q.addField(Server.Schema.LANGUAGE.toString());
LanguageSpecificContentQueryHelper.configureTermfreqQuery(q, keywordStringEscaped);
}
for (KeywordQueryFilter filter : filters) {
q.addFilterQuery(filter.toString());
}
@ -335,16 +300,8 @@ class LuceneQuery implements KeywordSearchQuery {
*
* @param q The SolrQuery to configure.
*/
private static void configurwQueryForHighlighting(SolrQuery q) throws NoOpenCoreException {
double indexSchemaVersion = NumberUtils.toDouble(KeywordSearch.getServer().getIndexInfo().getSchemaVersion());
if (2.2 <= indexSchemaVersion) {
for (Server.Schema field : LanguageSpecificContentQueryHelper.getQueryFields()) {
q.addHighlightField(field.toString());
}
} else {
q.addHighlightField(HIGHLIGHT_FIELD);
}
private static void configurwQueryForHighlighting(SolrQuery q) {
q.addHighlightField(HIGHLIGHT_FIELD);
q.setHighlightSnippets(1);
q.setHighlightFragsize(SNIPPET_LENGTH);
@ -447,13 +404,7 @@ class LuceneQuery implements KeywordSearchQuery {
if (responseHighlightID == null) {
return "";
}
double indexSchemaVersion = NumberUtils.toDouble(solrServer.getIndexInfo().getSchemaVersion());
List<String> contentHighlights;
if (2.2 <= indexSchemaVersion) {
contentHighlights = LanguageSpecificContentQueryHelper.getHighlights(responseHighlightID).orElse(null);
} else {
contentHighlights = responseHighlightID.get(LuceneQuery.HIGHLIGHT_FIELD);
}
List<String> contentHighlights = responseHighlightID.get(LuceneQuery.HIGHLIGHT_FIELD);
if (contentHighlights == null) {
return "";
} else {

View File

@ -1,41 +0,0 @@
/*
* Autopsy Forensic Browser
*
* Copyright 2011-2019 Basis Technology Corp.
* Contact: carrier <at> sleuthkit <dot> org
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.sleuthkit.autopsy.keywordsearch;
/**
* Mini-chunk related methods.
*/
final class MiniChunkHelper {
private MiniChunkHelper() {}
static String SUFFIX = "_mini";
static String getChunkIdString(String baseChunkID) {
return baseChunkID + SUFFIX;
}
static boolean isMiniChunkID(String chunkID) {
return chunkID.endsWith(SUFFIX);
}
static String getBaseChunkID(String miniChunkID) {
return miniChunkID.replaceFirst(SUFFIX + "$", "");
}
}

View File

@ -1,95 +0,0 @@
/*
* Autopsy Forensic Browser
*
* Copyright 2011-2019 Basis Technology Corp.
* Contact: carrier <at> sleuthkit <dot> org
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.sleuthkit.autopsy.keywordsearch;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.request.FieldAnalysisRequest;
import org.apache.solr.client.solrj.response.AnalysisResponseBase;
import org.apache.solr.client.solrj.response.FieldAnalysisResponse;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
/**
* Get terms from query using Solr.
*
* This class is used to find matched terms from query results.
*/
final class QueryTermHelper {
private QueryTermHelper() {}
/**
* Result of {@link #parse} method
*/
static class Result {
/**
* field name -> [term]
*/
final Map<String, List<String>> fieldTermsMap = new HashMap<>();
}
/**
* Parse the given query string on Solr and return the result
*
* @param query query to parse
* @param fields field names to use for parsing
*/
static Result parse(String query, List<Server.Schema> fields) throws KeywordSearchModuleException, NoOpenCoreException {
Server server = KeywordSearch.getServer();
FieldAnalysisRequest request = new FieldAnalysisRequest();
for (Server.Schema field : fields) {
request.addFieldName(field.toString());
}
// FieldAnalysisRequest requires to set its field value property,
// while the corresponding analysis.fieldvalue parameter is not needed in the API.
// Setting an empty value does not effect on the result.
request.setFieldValue("");
request.setQuery(query);
FieldAnalysisResponse response = new FieldAnalysisResponse();
try {
response.setResponse(server.request(request));
} catch (SolrServerException e) {
throw new KeywordSearchModuleException(e);
}
Result result = new Result();
for (Map.Entry<String, FieldAnalysisResponse.Analysis> entry : response.getAllFieldNameAnalysis()) {
Iterator<AnalysisResponseBase.AnalysisPhase> it = entry.getValue().getQueryPhases().iterator();
// The last phase is the one which is used in the search process.
AnalysisResponseBase.AnalysisPhase lastPhase = null;
while (it.hasNext()) {
lastPhase = it.next();
}
if (lastPhase != null) {
List<String> tokens = lastPhase.getTokens().stream().map(AnalysisResponseBase.TokenInfo::getText).collect(Collectors.toList());
result.fieldTermsMap.put(entry.getKey(), tokens);
}
}
return result;
}
}

View File

@ -130,18 +130,6 @@ public class Server {
return "content_ws"; //NON-NLS
}
},
CONTENT_JA {
@Override
public String toString() {
return "content_ja"; //NON-NLS
}
},
LANGUAGE {
@Override
public String toString() {
return "language"; //NON-NLS
}
},
FILE_NAME {
@Override
public String toString() {
@ -187,17 +175,6 @@ public class Server {
public String toString() {
return "chunk_size"; //NON-NLS
}
},
/**
* termfreq is a function which returns the number of times the term appears.
* This is not an actual field defined in schema.xml, but can be gotten from returned documents
* in the same way as fields.
*/
TERMFREQ {
@Override
public String toString() {
return "termfreq"; //NON-NLS
}
}
};
@ -1658,8 +1635,7 @@ public class Server {
private int queryNumFileChunks(long contentID) throws SolrServerException, IOException {
String id = KeywordSearchUtil.escapeLuceneQuery(Long.toString(contentID));
final SolrQuery q
= new SolrQuery(Server.Schema.ID + ":" + id + Server.CHUNK_ID_SEPARATOR + "*"
+ " NOT " + Server.Schema.ID + ":*" + MiniChunkHelper.SUFFIX);
= new SolrQuery(Server.Schema.ID + ":" + id + Server.CHUNK_ID_SEPARATOR + "*");
q.setRows(0);
return (int) query(q).getResults().getNumFound();
}

View File

@ -1,59 +0,0 @@
/*
* Autopsy Forensic Browser
*
* Copyright 2011-2019 Basis Technology Corp.
* Contact: carrier <at> sleuthkit <dot> org
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.sleuthkit.autopsy.keywordsearch;
import org.junit.Test;
import java.util.Arrays;
import static org.junit.Assert.assertEquals;
/**
* tests for LanguageSpecificContentQueryHelper
*/
public class LanguageSpecificContentQueryHelperTest {
@Test
public void makeQueryString() {
assertEquals("text:query OR content_ja:query", LanguageSpecificContentQueryHelper.expandQueryString("query"));
}
@Test
public void findNthIndexOf() {
assertEquals(-1, LanguageSpecificContentQueryHelper.findNthIndexOf("A1AA45", "_", 0));
assertEquals(0, LanguageSpecificContentQueryHelper.findNthIndexOf("A1AA45", "A", 0));
assertEquals(2, LanguageSpecificContentQueryHelper.findNthIndexOf("A1AA45", "A", 1));
assertEquals(3, LanguageSpecificContentQueryHelper.findNthIndexOf("A1AA45", "A", 2));
assertEquals(-1, LanguageSpecificContentQueryHelper.findNthIndexOf("A1AA45", "A", 3));
assertEquals(0, LanguageSpecificContentQueryHelper.findNthIndexOf("A1AA45", "", 0));
assertEquals(-1, LanguageSpecificContentQueryHelper.findNthIndexOf("", "A", 0));
assertEquals(-1, LanguageSpecificContentQueryHelper.findNthIndexOf("A1AA45", "A", -1));
assertEquals(-1, LanguageSpecificContentQueryHelper.findNthIndexOf("A1AA45", "A", 999));
}
@Test
public void buildTermfreqQuery() {
QueryTermHelper.Result result = new QueryTermHelper.Result();
result.fieldTermsMap.put("field1", Arrays.asList("term1"));
result.fieldTermsMap.put("field2", Arrays.asList("term1", "term2"));
assertEquals(
"termfreq:sum(termfreq(\"field1\",\"term1\"),termfreq(\"field2\",\"term1\"),termfreq(\"field2\",\"term2\"))",
LanguageSpecificContentQueryHelper.buildTermfreqQuery("query", result));
}
}

View File

@ -1,46 +0,0 @@
/*
* Autopsy Forensic Browser
*
* Copyright 2011-2019 Basis Technology Corp.
* Contact: carrier <at> sleuthkit <dot> org
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.sleuthkit.autopsy.keywordsearch;
import org.junit.Assert;
import org.junit.Test;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
/**
* tests for MiniChunkHelper
*/
public class MiniChunkHelperTest {
@Test
public void isMiniChunkID() {
assertTrue(MiniChunkHelper.isMiniChunkID("1_1_mini"));
assertFalse(MiniChunkHelper.isMiniChunkID("1_1"));
assertFalse(MiniChunkHelper.isMiniChunkID("1"));
}
@Test
public void getBaseChunkID() {
Assert.assertEquals("1_1", MiniChunkHelper.getBaseChunkID("1_1_mini"));
Assert.assertEquals("1_1", MiniChunkHelper.getBaseChunkID("1_1"));
Assert.assertEquals("1", MiniChunkHelper.getBaseChunkID("1"));
}
}