Skip to content

Commit 2f447a4

Browse files
committed
Merge pull request #15 from PerfectXL/dataset_tests
Adds tests for EUSES and Enron data set
2 parents 267dd28 + c665a1d commit 2f447a4

File tree

6 files changed

+1035748
-0
lines changed

6 files changed

+1035748
-0
lines changed

src/XLParser.Tests/DatasetTests.cs

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
using System;
2+
using System.Collections;
3+
using System.Collections.Generic;
4+
using Microsoft.VisualStudio.TestTools.UnitTesting;
5+
using System.IO;
6+
using System.Linq;
7+
using System.Threading;
8+
using System.Threading.Tasks;
9+
using Irony.Parsing;
10+
11+
namespace XLParser.Tests
12+
{
13+
[TestClass]
14+
// Visual studio standard datasources where tried for this class, but it was found very slow
15+
public class DatasetTests
16+
{
17+
public TestContext TestContext { get; set; }
18+
19+
private const int MaxParseErrors = 10;
20+
21+
[TestMethod]
22+
[TestCategory("Slow")]
23+
// Uncomment this to execute the test
24+
//[Ignore]
25+
public void EnronFormulasParseTest()
26+
{
27+
parseCSVDataSet("data/enron/formulas.csv", "data/enron/knownfails.csv");
28+
}
29+
30+
[TestMethod]
31+
[TestCategory("Slow")]
32+
// Uncomment this to execute the test
33+
//[Ignore]
34+
public void EusesFormulasParseTest()
35+
{
36+
parseCSVDataSet("data/euses/formulas.csv", "data/euses/knownfails.csv");
37+
}
38+
39+
private void parseCSVDataSet(string filename, string knownfailsfile = null)
40+
{
41+
ISet<string> knownfails = new HashSet<string>(readFormulaCSV(knownfailsfile));
42+
int parseErrors = 0;
43+
var LOCK = new object();
44+
45+
Parallel.ForEach(readFormulaCSV(filename), (formula, control, linenr) =>
46+
{
47+
if (parseErrors > MaxParseErrors)
48+
{
49+
control.Stop();
50+
return;
51+
}
52+
try
53+
{
54+
ExcelFormulaParser.Parse(formula);
55+
}
56+
catch (ArgumentException e)
57+
{
58+
if (!knownfails.Contains(formula))
59+
{
60+
lock (LOCK)
61+
{
62+
TestContext.WriteLine(String.Format("Failed parsing line {0} <<{1}>>", linenr, formula));
63+
parseErrors++;
64+
}
65+
}
66+
}
67+
});
68+
if (parseErrors > 0) Assert.Fail("Parse Errors on file " + filename);
69+
}
70+
71+
private static IEnumerable<string> readFormulaCSV(string f)
72+
{
73+
if (f == null) return Enumerable.Empty<string>();
74+
// using ReadAllLines instead of ReadLines shaves about 10s of the enron test, so it's worth the memory usage.
75+
return File.ReadLines(f)
76+
.Where(line => line != "")
77+
.Select(unQuote)
78+
;
79+
}
80+
81+
private static string unQuote(string line)
82+
{
83+
return line.Length > 0 && line[0] == '"' ?
84+
line.Substring(1, line.Length - 2).Replace("\"\"", "\"")
85+
: line;
86+
}
87+
}
88+
}

src/XLParser.Tests/XLParser.Tests.csproj

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
<Reference Include="System.Core">
4848
<RequiredTargetFramework>3.5</RequiredTargetFramework>
4949
</Reference>
50+
<Reference Include="System.Data" />
5051
<Reference Include="System.Xml" />
5152
<Reference Include="System.Xml.Linq" />
5253
</ItemGroup>
@@ -60,13 +61,28 @@
6061
<Compile Include="Properties\AssemblyInfo.cs" />
6162
<Compile Include="ParserTests.cs" />
6263
<Compile Include="PrintTests.cs" />
64+
<Compile Include="DatasetTests.cs" />
6365
</ItemGroup>
6466
<ItemGroup>
6567
<ProjectReference Include="..\XLParser\XLParser.csproj">
6668
<Project>{fb048d20-29fd-4d2c-a336-59f93f9a68f3}</Project>
6769
<Name>XLParser</Name>
6870
</ProjectReference>
6971
</ItemGroup>
72+
<ItemGroup>
73+
<None Include="data\enron\formulas.csv">
74+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
75+
</None>
76+
<None Include="data\enron\knownfails.csv">
77+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
78+
</None>
79+
<None Include="data\euses\formulas.csv">
80+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
81+
</None>
82+
<None Include="data\euses\knownfails.csv">
83+
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
84+
</None>
85+
</ItemGroup>
7086
<Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
7187
<!-- To modify your build process, add your task inside one of the targets below and uncomment it.
7288
Other similar extension points exist, see Microsoft.Common.targets.

0 commit comments

Comments
 (0)