-
Notifications
You must be signed in to change notification settings - Fork 0
/
XPathNodeCounter.cs
56 lines (52 loc) · 1.9 KB
/
XPathNodeCounter.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text.RegularExpressions;
using System.Xml;
namespace XPathSearch
{
public class XPathNodeCounter
{
private readonly ConfigProvider _configProvider;
private const string NotAvailable = "N/A";
private const string NamespaceRegex = @"(xmlns:?[^=]*=[""][^""]*[""])";
public XPathNodeCounter(ConfigProvider configProvider)
{
_configProvider = configProvider;
}
public IOrderedEnumerable<WordCount> GetNodeValueCountsOrdered()
{
return Directory
.GetFiles(_configProvider.DirectoryPath)
.AsParallel()
.WithDegreeOfParallelism(_configProvider.DesiredThreadCount)
.SelectMany(ProcessFile)
.ToLookup(word => word)
.Select(wordGroup => new WordCount { Key = wordGroup.Key, Count = wordGroup.Count() })
.OrderByDescending(arg => arg.Count);
}
private IEnumerable<string> ProcessFile(string filePath)
{
var doc = LoadXmlDocument(filePath);
XmlNodeList nodes = doc.SelectNodes(_configProvider.XPath);
if (nodes == null || nodes.Count == 0)
{
yield return NotAvailable;
yield break;
}
foreach (XmlNode node in nodes)
{
yield return node.InnerText;
}
}
private XmlDocument LoadXmlDocument(string filePath)
{
string fileText = File.ReadAllText(filePath);
string withoudNs = Regex.Replace(fileText, NamespaceRegex, "",
RegexOptions.IgnoreCase | RegexOptions.Multiline);
var doc = new XmlDocument();
doc.LoadXml(withoudNs);
return doc;
}
}
}