|
| 1 | +using System; |
| 2 | +using System.Collections; |
| 3 | +using System.Collections.Generic; |
| 4 | +using Microsoft.VisualStudio.TestTools.UnitTesting; |
| 5 | +using System.IO; |
| 6 | +using System.Linq; |
| 7 | +using System.Threading; |
| 8 | +using System.Threading.Tasks; |
| 9 | +using Irony.Parsing; |
| 10 | + |
| 11 | +namespace XLParser.Tests |
| 12 | +{ |
| 13 | + [TestClass] |
| 14 | + // Visual studio standard datasources where tried for this class, but it was found very slow |
| 15 | + public class DatasetTests |
| 16 | + { |
| 17 | + public TestContext TestContext { get; set; } |
| 18 | + |
| 19 | + private const int MaxParseErrors = 10; |
| 20 | + |
| 21 | + [TestMethod] |
| 22 | + [TestCategory("Slow")] |
| 23 | + // Uncomment this to execute the test |
| 24 | + //[Ignore] |
| 25 | + public void EnronFormulasParseTest() |
| 26 | + { |
| 27 | + parseCSVDataSet("data/enron/formulas.csv", "data/enron/knownfails.csv"); |
| 28 | + } |
| 29 | + |
| 30 | + [TestMethod] |
| 31 | + [TestCategory("Slow")] |
| 32 | + // Uncomment this to execute the test |
| 33 | + //[Ignore] |
| 34 | + public void EusesFormulasParseTest() |
| 35 | + { |
| 36 | + parseCSVDataSet("data/euses/formulas.csv", "data/euses/knownfails.csv"); |
| 37 | + } |
| 38 | + |
| 39 | + private void parseCSVDataSet(string filename, string knownfailsfile = null) |
| 40 | + { |
| 41 | + ISet<string> knownfails = new HashSet<string>(readFormulaCSV(knownfailsfile)); |
| 42 | + int parseErrors = 0; |
| 43 | + var LOCK = new object(); |
| 44 | + |
| 45 | + Parallel.ForEach(readFormulaCSV(filename), (formula, control, linenr) => |
| 46 | + { |
| 47 | + if (parseErrors > MaxParseErrors) |
| 48 | + { |
| 49 | + control.Stop(); |
| 50 | + return; |
| 51 | + } |
| 52 | + try |
| 53 | + { |
| 54 | + ExcelFormulaParser.Parse(formula); |
| 55 | + } |
| 56 | + catch (ArgumentException e) |
| 57 | + { |
| 58 | + if (!knownfails.Contains(formula)) |
| 59 | + { |
| 60 | + lock (LOCK) |
| 61 | + { |
| 62 | + TestContext.WriteLine(String.Format("Failed parsing line {0} <<{1}>>", linenr, formula)); |
| 63 | + parseErrors++; |
| 64 | + } |
| 65 | + } |
| 66 | + } |
| 67 | + }); |
| 68 | + if (parseErrors > 0) Assert.Fail("Parse Errors on file " + filename); |
| 69 | + } |
| 70 | + |
| 71 | + private static IEnumerable<string> readFormulaCSV(string f) |
| 72 | + { |
| 73 | + if (f == null) return Enumerable.Empty<string>(); |
| 74 | + // using ReadAllLines instead of ReadLines shaves about 10s of the enron test, so it's worth the memory usage. |
| 75 | + return File.ReadLines(f) |
| 76 | + .Where(line => line != "") |
| 77 | + .Select(unQuote) |
| 78 | + ; |
| 79 | + } |
| 80 | + |
| 81 | + private static string unQuote(string line) |
| 82 | + { |
| 83 | + return line.Length > 0 && line[0] == '"' ? |
| 84 | + line.Substring(1, line.Length - 2).Replace("\"\"", "\"") |
| 85 | + : line; |
| 86 | + } |
| 87 | + } |
| 88 | +} |
0 commit comments