From 44f02903040bc7f16c834d26ed324e96b2c463a9 Mon Sep 17 00:00:00 2001 From: rnveach Date: Thu, 22 Oct 2020 20:27:57 -0400 Subject: [PATCH] Issue #344: add multi-thread support to CheckstyleReportsParser --- .../main/java/com/github/checkstyle/Main.java | 7 +- .../github/checkstyle/data/DiffReport.java | 109 +++++++- .../github/checkstyle/data/Statistics.java | 12 + .../parser/CheckstyleReportsParser.java | 257 ++++++++++-------- .../parser/CheckstyleTextParser.java | 2 +- 5 files changed, 263 insertions(+), 124 deletions(-) diff --git a/patch-diff-report-tool/src/main/java/com/github/checkstyle/Main.java b/patch-diff-report-tool/src/main/java/com/github/checkstyle/Main.java index 04f10a48..2e0c22cd 100644 --- a/patch-diff-report-tool/src/main/java/com/github/checkstyle/Main.java +++ b/patch-diff-report-tool/src/main/java/com/github/checkstyle/Main.java @@ -67,11 +67,6 @@ public final class Main { + "if this folder exists its content will be purged;\n" + "\t-h - simply shows help message."; - /** - * Number of "file" xml tags parsed at one iteration of parser. - */ - public static final int XML_PARSE_PORTION_SIZE = 50; - /** * Name for the site file. */ @@ -162,7 +157,7 @@ public static void main(final String... args) throws Exception { // XML parsing stage System.out.println("XML parsing is started."); diffReport = CheckstyleReportsParser.parse(paths.getBaseReportPath(), - paths.getPatchReportPath(), XML_PARSE_PORTION_SIZE); + paths.getPatchReportPath()); } else { // file parsing stage diff --git a/patch-diff-report-tool/src/main/java/com/github/checkstyle/data/DiffReport.java b/patch-diff-report-tool/src/main/java/com/github/checkstyle/data/DiffReport.java index 57260be0..25e0dc2b 100644 --- a/patch-diff-report-tool/src/main/java/com/github/checkstyle/data/DiffReport.java +++ b/patch-diff-report-tool/src/main/java/com/github/checkstyle/data/DiffReport.java @@ -24,7 +24,13 @@ import java.util.Comparator; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.TreeMap; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; import com.github.checkstyle.parser.CheckstyleReportsParser; @@ -38,6 +44,9 @@ */ public final class DiffReport { + /** Number of records to process at a time when looking for differences. */ + private static final int SPLIT_SIZE = 100; + /** * Container for parsed data, * note it is a TreeMap for memory keeping purposes. @@ -63,6 +72,22 @@ public Statistics getStatistics() { return statistics; } + /** + * Utility to merge the patch related contents of the {@code other} class to the current. + * + * @param other The other class to merge. + */ + public void mergePatch(DiffReport other) { + mergeRecords(other.records); + statistics.copyPatch(other.statistics); + } + + private void mergeRecords(Map> other) { + for (Entry> item : other.entrySet()) { + addRecords(item.getValue(), item.getKey()); + } + } + /** * Adds new records to the diff report, * when there are records with this filename, comparison @@ -103,16 +128,44 @@ public void addRecords(List newRecords, */ private static List produceDiff( List list1, List list2) { + final List diff; + try { + diff = produceDiffEx(list1, list2); + diff.addAll(produceDiffEx(list2, list1)); + } + catch (InterruptedException | ExecutionException ex) { + throw new IllegalStateException("Multi-threading failure reported", ex); + } + + return diff; + } + + private static List produceDiffEx( + List list1, List list2) + throws InterruptedException, ExecutionException { final List diff = new ArrayList<>(); - for (CheckstyleRecord rec1 : list1) { - if (!isInList(list2, rec1)) { - diff.add(rec1); + if (list1.size() < SPLIT_SIZE) { + for (CheckstyleRecord rec1 : list1) { + if (!isInList(list2, rec1)) { + diff.add(rec1); + } } } - for (CheckstyleRecord rec2 : list2) { - if (!isInList(list1, rec2)) { - diff.add(rec2); + else { + final ExecutorService executor = + Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()); + final List>> futures = new ArrayList<>(); + final int size = list1.size(); + for (int i = 0; i < size; i += SPLIT_SIZE) { + futures.add(executor.submit(new MultiThreadedDiff(list1, list2, i, Math.min(size, i + + SPLIT_SIZE)))); + } + + for (Future> future : futures) { + diff.addAll(future.get()); } + + executor.shutdown(); } return diff; } @@ -141,7 +194,7 @@ private static boolean isInList(List list, /** * Generates statistical information and puts in in the accumulator. */ - public void getDiffStatistics() { + public void generateDiffStatistics() { statistics.setFileNumDiff(records.size()); for (Map.Entry> entry : records.entrySet()) { @@ -183,4 +236,46 @@ public int compare(final CheckstyleRecord arg0, } } + /** Separate class to multi-thread 2 lists checking if items from 1 is in the other. */ + private static final class MultiThreadedDiff implements Callable> { + /** First list to examine. */ + private List list1; + /** Second list to examine. */ + private List list2; + /** Inclusive start position of the first list. */ + private int list1Start; + /** Non-inclusive End position of the first list. */ + private int list1End; + + /** + * Default constructor. + * + * @param list1 First list to examine. + * @param list2 Second list to examine. + * @param list1Start Inclusive start position of the first list. + * @param list1End Non-inclusive End position of the first list. + */ + private MultiThreadedDiff(List list1, List list2, + int list1Start, int list1End) { + this.list1 = list1; + this.list2 = list2; + this.list1Start = list1Start; + this.list1End = list1End; + } + + @Override + public List call() throws Exception { + final List diff = new ArrayList<>(); + + for (int i = list1Start; i < list1End; i++) { + final CheckstyleRecord rec1 = list1.get(i); + + if (!isInList(list2, rec1)) { + diff.add(rec1); + } + } + return diff; + } + } + } diff --git a/patch-diff-report-tool/src/main/java/com/github/checkstyle/data/Statistics.java b/patch-diff-report-tool/src/main/java/com/github/checkstyle/data/Statistics.java index 58a0f05a..30311a95 100644 --- a/patch-diff-report-tool/src/main/java/com/github/checkstyle/data/Statistics.java +++ b/patch-diff-report-tool/src/main/java/com/github/checkstyle/data/Statistics.java @@ -113,6 +113,18 @@ public final Map getSeverityNumDiff() { return severityNumDiff; } + /** + * Utility to merge the patch related contents of the {@code other} class to the current. + * + * @param other The other class to merge. + */ + public void copyPatch(Statistics other) { + severityNumPatch = other.severityNumPatch; + moduleNumPatch = other.moduleNumPatch; + fileNumPatch = other.fileNumPatch; + uniqueMessagesPatch = other.uniqueMessagesPatch; + } + /** * Getter for total number of severity records for difference. * diff --git a/patch-diff-report-tool/src/main/java/com/github/checkstyle/parser/CheckstyleReportsParser.java b/patch-diff-report-tool/src/main/java/com/github/checkstyle/parser/CheckstyleReportsParser.java index c4a1cee3..d7a73c04 100644 --- a/patch-diff-report-tool/src/main/java/com/github/checkstyle/parser/CheckstyleReportsParser.java +++ b/patch-diff-report-tool/src/main/java/com/github/checkstyle/parser/CheckstyleReportsParser.java @@ -24,6 +24,11 @@ import java.util.ArrayList; import java.util.Iterator; import java.util.List; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; import javax.xml.stream.XMLEventReader; import javax.xml.stream.XMLStreamException; @@ -116,136 +121,168 @@ private CheckstyleReportsParser() { * path to base XML file. * @param patchXml * path to patch XML file. - * @param portionSize - * single portion of XML file processed at once by any parser. * @return parsed content. * @throws FileNotFoundException * if files not found. * @throws XMLStreamException * on internal parser error. */ - public static DiffReport parse(Path baseXml, Path patchXml, int portionSize) - throws FileNotFoundException, XMLStreamException { - final DiffReport content = new DiffReport(); - final XMLEventReader baseReader = StaxUtils.createReader(baseXml); - final XMLEventReader patchReader = StaxUtils.createReader(patchXml); - while (baseReader.hasNext() || patchReader.hasNext()) { - parseXmlPortion(content, baseReader, portionSize, BASE_REPORT_INDEX); - parseXmlPortion(content, patchReader, portionSize, PATCH_REPORT_INDEX); + public static DiffReport parse(Path baseXml, Path patchXml) + throws XMLStreamException { + final ExecutorService executor = Executors.newCachedThreadPool(); + + final Future baseFuture = + executor.submit(new MultiThreadedParser(baseXml, BASE_REPORT_INDEX)); + final Future patchFuture = + executor.submit(new MultiThreadedParser(patchXml, PATCH_REPORT_INDEX)); + + final DiffReport content; + final DiffReport patchContent; + try { + content = baseFuture.get(); + patchContent = patchFuture.get(); } - content.getDiffStatistics(); + catch (InterruptedException | ExecutionException ex) { + throw new XMLStreamException("Multi-threading failure reported", ex); + } + + executor.shutdown(); + + content.mergePatch(patchContent); + content.generateDiffStatistics(); + return content; } - /** - * Parses portion of the XML report. - * - * @param diffReport - * container for parsed data. - * @param reader - * StAX parser interface. - * @param numOfFilenames - * number of "file" tags to parse. - * @param index - * internal index of the parsed file. - * @throws XMLStreamException - * on internal parser error. - */ - private static void parseXmlPortion(DiffReport diffReport, - XMLEventReader reader, int numOfFilenames, int index) - throws XMLStreamException { - int counter = numOfFilenames; - String filename = null; - List records = null; - while (reader.hasNext()) { - final XMLEvent event = reader.nextEvent(); - if (event.isStartElement()) { - final StartElement startElement = event.asStartElement(); - final String startElementName = startElement.getName() - .getLocalPart(); - // file tag encounter - if (startElementName.equals(FILE_TAG)) { - counter--; - diffReport.getStatistics().incrementFileCount(index); - final Iterator attributes = startElement - .getAttributes(); - while (attributes.hasNext()) { - final Attribute attribute = attributes.next(); - if (attribute.getName().toString() - .equals(FILENAME_ATTR)) { - filename = attribute.getValue(); + /** Separate class to multi-thread parsing of XML files. */ + private static final class MultiThreadedParser implements Callable { + /** Path of XML file to parse. */ + private Path xml; + /** Internal index of the parsed file. */ + private int readerIndex; + + /** + * Default constructor. + * + * @param xml Path of XML file to parse. + * @param readerIndex Internal index of the parsed file. + */ + private MultiThreadedParser(Path xml, int readerIndex) { + this.xml = xml; + this.readerIndex = readerIndex; + } + + @Override + public DiffReport call() throws Exception { + final DiffReport output = new DiffReport(); + final XMLEventReader reader = StaxUtils.createReader(xml); + parseXmlPortion(output, reader, readerIndex); + return output; + } + + /** + * Parses portion of the XML report. + * + * @param diffReport + * container for parsed data. + * @param reader + * StAX parser interface. + * @param index + * internal index of the parsed file. + * @throws XMLStreamException + * on internal parser error. + */ + private static void parseXmlPortion(DiffReport diffReport, + XMLEventReader reader, int index) + throws XMLStreamException { + String filename = null; + List records = null; + while (reader.hasNext()) { + final XMLEvent event = reader.nextEvent(); + if (event.isStartElement()) { + final StartElement startElement = event.asStartElement(); + final String startElementName = startElement.getName() + .getLocalPart(); + // file tag encounter + if (startElementName.equals(FILE_TAG)) { + diffReport.getStatistics().incrementFileCount(index); + final Iterator attributes = startElement + .getAttributes(); + while (attributes.hasNext()) { + final Attribute attribute = attributes.next(); + if (attribute.getName().toString() + .equals(FILENAME_ATTR)) { + filename = attribute.getValue(); + } } + records = new ArrayList<>(); + } + // error tag encounter + else if (startElementName.equals(ERROR_TAG)) { + records.add(parseErrorTag(startElement, diffReport.getStatistics(), index, + filename)); } - records = new ArrayList<>(); - } - // error tag encounter - else if (startElementName.equals(ERROR_TAG)) { - records.add(parseErrorTag(startElement, diffReport.getStatistics(), index, - filename)); } - } - if (event.isEndElement()) { - final EndElement endElement = event.asEndElement(); - if (endElement.getName().getLocalPart().equals(FILE_TAG)) { - diffReport.addRecords(records, filename); - if (counter == 0) { - break; + if (event.isEndElement()) { + final EndElement endElement = event.asEndElement(); + if (endElement.getName().getLocalPart().equals(FILE_TAG)) { + diffReport.addRecords(records, filename); } } } } - } - /** - * Parses "error" XML tag. - * - * @param startElement - * cursor of StAX parser pointed on the tag. - * @param statistics - * container accumulating statistics. - * @param index - * internal index of the parsed file. - * @param filename - * file name. - * @return parsed data as CheckstyleRecord instance. - */ - private static CheckstyleRecord parseErrorTag(StartElement startElement, - Statistics statistics, int index, String filename) { - int line = -1; - int column = -1; - String source = null; - String message = null; - String severity = null; - final Iterator attributes = startElement - .getAttributes(); - while (attributes.hasNext()) { - final Attribute attribute = attributes.next(); - final String attrName = attribute.getName().toString(); - switch (attrName) { - case LINE_ATTR: - line = Integer.parseInt(attribute.getValue()); - break; - case COLUMN_ATTR: - column = Integer.parseInt(attribute.getValue()); - break; - case SEVERITY_ATTR: - severity = attribute.getValue(); - statistics.addSeverityRecord(severity, index); - break; - case MESSAGE_ATTR: - message = attribute.getValue(); - break; - case SOURCE_ATTR: - source = attribute.getValue(); - statistics.addModuleRecord(source, index); - break; - default: - break; + /** + * Parses "error" XML tag. + * + * @param startElement + * cursor of StAX parser pointed on the tag. + * @param statistics + * container accumulating statistics. + * @param index + * internal index of the parsed file. + * @param filename + * file name. + * @return parsed data as CheckstyleRecord instance. + */ + private static CheckstyleRecord parseErrorTag(StartElement startElement, + Statistics statistics, int index, String filename) { + int line = -1; + int column = -1; + String source = null; + String message = null; + String severity = null; + final Iterator attributes = startElement + .getAttributes(); + while (attributes.hasNext()) { + final Attribute attribute = attributes.next(); + final String attrName = attribute.getName().toString(); + switch (attrName) { + case LINE_ATTR: + line = Integer.parseInt(attribute.getValue()); + break; + case COLUMN_ATTR: + column = Integer.parseInt(attribute.getValue()); + break; + case SEVERITY_ATTR: + severity = attribute.getValue(); + statistics.addSeverityRecord(severity, index); + break; + case MESSAGE_ATTR: + message = attribute.getValue(); + break; + case SOURCE_ATTR: + source = attribute.getValue(); + statistics.addModuleRecord(source, index); + break; + default: + break; + } } - } - return new CheckstyleRecord(index, - line, column, severity, source, message, filename); + return new CheckstyleRecord(index, + line, column, severity, source, message, filename); + } } } diff --git a/patch-diff-report-tool/src/main/java/com/github/checkstyle/parser/CheckstyleTextParser.java b/patch-diff-report-tool/src/main/java/com/github/checkstyle/parser/CheckstyleTextParser.java index cb597195..94014854 100644 --- a/patch-diff-report-tool/src/main/java/com/github/checkstyle/parser/CheckstyleTextParser.java +++ b/patch-diff-report-tool/src/main/java/com/github/checkstyle/parser/CheckstyleTextParser.java @@ -99,7 +99,7 @@ else if (baseNext != patchNext) { break; } } - content.getDiffStatistics(); + content.generateDiffStatistics(); return content; }