Added link to TIKA story about IBM500 issue

This commit is contained in:
U-BASIS\dsmyda 2020-07-20 11:43:16 -04:00
parent 4e7695dc12
commit 8532d1dc4c

View File

@ -89,7 +89,7 @@ public class EncodingUtils {
detector.setText(stream); detector.setText(stream);
CharsetMatch[] tikaResults = detector.detectAll(); CharsetMatch[] tikaResults = detector.detectAll();
// Get all guesses by Tika. These CharsetMatch's are ordered // Get all guesses by Tika. These matches are ordered
// by descending confidence (largest first). // by descending confidence (largest first).
if (tikaResults.length > 0) { if (tikaResults.length > 0) {
CharsetMatch topPick = tikaResults[0]; CharsetMatch topPick = tikaResults[0];
@ -98,7 +98,8 @@ public class EncodingUtils {
// Legacy encoding, let's discard this one in favor // Legacy encoding, let's discard this one in favor
// of the second pick. Tika has some problems with // of the second pick. Tika has some problems with
// mistakenly identifying text as IBM500. See JIRA-6600 // mistakenly identifying text as IBM500. See JIRA-6600
// for more details. // and https://issues.apache.org/jira/browse/TIKA-2771 for
// more details.
topPick = tikaResults[1]; topPick = tikaResults[1];
} }