Merge pull request #6664 from gdicristofaro/7215b-externalLibNonPrintableRemove

7215b remove erroneous rows
This commit is contained in:
Richard Cordovano 2021-01-22 07:26:26 -05:00 committed by GitHub
commit c3a0dbc794
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -314,9 +314,10 @@ public final class LeappFileProcessor {
idx -> idx, idx -> idx,
(val1, val2) -> val1)); (val1, val2) -> val1));
int lineNum = 1; int lineNum = 2;
while (iterator.hasNext()) { while (iterator.hasNext()) {
Collection<BlackboardAttribute> bbattributes = processReadLine(iterator.next(), columnIndexes, attrList, fileName, lineNum++); List<String> columnItems = iterator.next();
Collection<BlackboardAttribute> bbattributes = processReadLine(columnItems, columnIndexes, attrList, fileName, lineNum);
if (!bbattributes.isEmpty()) { if (!bbattributes.isEmpty()) {
BlackboardArtifact bbartifact = createArtifactWithAttributes(artifactType.getTypeID(), dataSource, bbattributes); BlackboardArtifact bbartifact = createArtifactWithAttributes(artifactType.getTypeID(), dataSource, bbattributes);
@ -324,6 +325,8 @@ public final class LeappFileProcessor {
bbartifacts.add(bbartifact); bbartifacts.add(bbartifact);
} }
} }
lineNum++;
} }
} }
} }
@ -334,7 +337,8 @@ public final class LeappFileProcessor {
* *
* @param lineValues List of column values. * @param lineValues List of column values.
* @param columnIndexes Mapping of column headers (trimmed; to lower case) * @param columnIndexes Mapping of column headers (trimmed; to lower case)
* to column index. * to column index. All header columns and only all header columns should be
* present.
* @param attrList The list of attributes as specified for the schema of * @param attrList The list of attributes as specified for the schema of
* this file. * this file.
* @param fileName The name of the file being processed. * @param fileName The name of the file being processed.
@ -349,25 +353,38 @@ public final class LeappFileProcessor {
if (MapUtils.isEmpty(columnIndexes) || CollectionUtils.isEmpty(lineValues) if (MapUtils.isEmpty(columnIndexes) || CollectionUtils.isEmpty(lineValues)
|| (lineValues.size() == 1 && StringUtils.isEmpty(lineValues.get(0)))) { || (lineValues.size() == 1 && StringUtils.isEmpty(lineValues.get(0)))) {
return Collections.emptyList(); return Collections.emptyList();
} else if (lineValues.size() != columnIndexes.size()) {
logger.log(Level.WARNING, String.format(
"Row at line number %d in file %s has %d columns when %d were expected based on the header row.",
lineNum, fileName, lineValues.size(), columnIndexes.size()));
return Collections.emptyList();
} }
List<BlackboardAttribute> attrsToRet = new ArrayList<>(); List<BlackboardAttribute> attrsToRet = new ArrayList<>();
for (TsvColumn colAttr : attrList) { for (TsvColumn colAttr : attrList) {
if (colAttr.getAttributeType() == null) { if (colAttr.getAttributeType() == null) {
// this handles columns that are currently ignored.
continue; continue;
} }
Integer columnIdx = columnIndexes.get(colAttr.getColumnName()); Integer columnIdx = columnIndexes.get(colAttr.getColumnName());
String value = (columnIdx == null || columnIdx >= lineValues.size() || columnIdx < 0) ? null : lineValues.get(columnIdx); if (columnIdx == null) {
if (value == null) { logger.log(Level.WARNING, String.format("No column mapping found for %s in file %s. Omitting column.", colAttr.getColumnName(), fileName));
logger.log(Level.WARNING, String.format("No value found for column %s at line %d in file %s.", colAttr.getColumnName(), lineNum, fileName));
continue; continue;
} }
BlackboardAttribute attr = (value == null) ? null : getAttribute(colAttr.getAttributeType(), value, fileName); String value = (columnIdx >= lineValues.size() || columnIdx < 0) ? null : lineValues.get(columnIdx);
if (attr != null) { if (value == null) {
attrsToRet.add(attr); logger.log(Level.WARNING, String.format("No value found for column %s at line %d in file %s. Omitting row.", colAttr.getColumnName(), lineNum, fileName));
return Collections.emptyList();
} }
BlackboardAttribute attr = (value == null) ? null : getAttribute(colAttr.getAttributeType(), value, fileName);
if (attr == null) {
logger.log(Level.WARNING, String.format("Blackboard attribute could not be parsed column %s at line %d in file %s. Omitting row.", colAttr.getColumnName(), lineNum, fileName));
return Collections.emptyList();
}
attrsToRet.add(attr);
} }
if (tsvFileArtifactComments.containsKey(fileName)) { if (tsvFileArtifactComments.containsKey(fileName)) {
@ -458,6 +475,10 @@ public final class LeappFileProcessor {
* @return The generated blackboard attribute or null if not determined. * @return The generated blackboard attribute or null if not determined.
*/ */
private BlackboardAttribute parseAttrValue(String value, BlackboardAttribute.Type attrType, String fileName, boolean blankIsNull, boolean zeroIsNull, ParseExceptionFunction valueConverter) { private BlackboardAttribute parseAttrValue(String value, BlackboardAttribute.Type attrType, String fileName, boolean blankIsNull, boolean zeroIsNull, ParseExceptionFunction valueConverter) {
// remove non-printable characters from tsv input
// https://stackoverflow.com/a/6199346
value = value.replaceAll("\\p{C}", "");
if (blankIsNull && StringUtils.isBlank(value)) { if (blankIsNull && StringUtils.isBlank(value)) {
return null; return null;
} }