regex fix for mbox parsing

This commit is contained in:
Greg DiCristofaro 2022-04-21 14:15:55 -04:00
parent f417f5ac3a
commit e4d8742968

View File

@ -133,7 +133,12 @@ class MboxParser extends MimeJ4MessageParser implements Iterator<EmailMessage> {
// That will usually be one of the first ones. // That will usually be one of the first ones.
for (CharsetEncoder encoder : encoders) { for (CharsetEncoder encoder : encoders) {
try { try {
mboxIterable = MboxIterator.fromFile(mboxFile).charset(encoder.charset()).build(); mboxIterable = MboxIterator
.fromFile(mboxFile)
// use more permissive from line from mbox iterator 0.8.0, but handling CRLF/LF
.fromLine("^From .*\r?\n")
.charset(encoder.charset())
.build();
if (mboxIterable != null) { if (mboxIterable != null) {
emailIterator = new MBoxEmailIterator(mboxIterable.iterator(), encoder, fileID, wholeMsg); emailIterator = new MBoxEmailIterator(mboxIterable.iterator(), encoder, fileID, wholeMsg);
} }