Skip to content

Commit 10a9b47

Browse files
HaleyoLewis Coates
authored andcommitted
adds parse xml test (#18)
* adds parse xml test * updates metadata.txt
1 parent 8b7d757 commit 10a9b47

File tree

9 files changed

+242
-0
lines changed

9 files changed

+242
-0
lines changed

regression-tests/datasets/metadata.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,3 +70,10 @@ unicode01.csv, generated by hand
7070
vector.csv, generated by vector_generator.py
7171
weighted_median_negative.csv, lists integers
7272
weight_median.csv, generated by weight_median.py
73+
xml_comment.xml, generated by hand
74+
xml_dangle_tag.xml, generated by hand
75+
xml_doc.xml, generated by hand
76+
xml_overlap.xml, generated by hand
77+
xml_overlap_inner.xml, generated by hand
78+
xml_overlap_2level.xml, generated by hand
79+
xml_smoke.xml, generated by hand
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
<?xml version="2.0" encoding="UTF-8"?>
2+
<root>
3+
<nodeValues></nodeValues>
4+
<node attr="&lt;/node&gt;">
5+
<name>Node1</name>
6+
<value>100</value>
7+
<sub>
8+
<text>val1</text>
9+
<node>subNode1</node>
10+
<node>subNode2</node>
11+
</sub>
12+
</node>
13+
<node>
14+
<name>Node2</name>
15+
<value>200</value>
16+
<vassels />
17+
</node>
18+
<node>
19+
<name>Node3</name>
20+
<value>300</value>
21+
<sub>
22+
<node attr="not a value" type="alt" />
23+
<text>val2</text>
24+
<node attr="value">subNode3</node>
25+
<node>subNode4</node>
26+
</sub>
27+
</node>
28+
<!--
29+
<node attr="not a value" value="555" type="alt" name="Node5" />
30+
-->
31+
<node>
32+
<name>Node4</name>
33+
<value>400</value>
34+
<sub />
35+
</node>
36+
</root>
37+
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
<!-- Fail to complete the opening tag -->
2+
<node>
3+
<name>Prune</name>
4+
</node>
5+
6+
<node
7+
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<shapes>
3+
<square>
4+
<name>left</name>
5+
<size>3</size>
6+
</square>
7+
<triangle>
8+
<size>3</size>
9+
</triangle>
10+
<square color="blue">
11+
<name>right</name>
12+
<size>5</size>
13+
</square>
14+
</shapes>
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
<!-- Top blocks overlap -->
2+
<root>
3+
<block1>
4+
</root>
5+
</block1>
6+
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
<!-- Multi-level overlap -->
2+
<root>
3+
<block1>
4+
<block2>
5+
</root>
6+
</block2>
7+
</block1>
8+
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
<?xml version="2.0" encoding="UTF-8"?>
2+
3+
<!-- Inner blocks overlap -->
4+
<root>
5+
<node1>
6+
<name>Sub1</name>
7+
<node2>
8+
<name>Sub2</name>
9+
</node1>
10+
</node2>
11+
</root>
12+
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
<?xml version="2.0" encoding="UTF-8"?>
2+
<root>
3+
<nodeValues></nodeValues>
4+
<node attr="&lt;/node&gt;">
5+
<name>Node1</name>
6+
<value>100</value>
7+
<sub>
8+
<text>val1</text>
9+
<node>subNode1</node>
10+
<node>subNode2</node>
11+
</sub>
12+
</node>
13+
<node>
14+
<name>Node2</name>
15+
<value>200</value>
16+
<vassels />
17+
</node>
18+
<node>
19+
<name>Node3</name>
20+
<value>300</value>
21+
<sub>
22+
<node attr="not a value" type="alt" />
23+
<text>val2</text>
24+
<node attr="value">subNode3</node>
25+
<node>subNode4</node>
26+
</sub>
27+
</node>
28+
<node>
29+
<name>Node4</name>
30+
<value>400</value>
31+
<sub />
32+
</node>
33+
</root>
34+
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
""" Test XML multi-line parsing. """
2+
3+
import unittest
4+
5+
from sparktkregtests.lib import sparktk_test
6+
7+
8+
class XMLReadTest(sparktk_test.SparkTKTestCase):
9+
10+
def setUp(self):
11+
"""Import the files to be tested."""
12+
super(XMLReadTest, self).setUp()
13+
14+
self.dangle_tag_xml = self.get_file("xml_dangle_tag.xml")
15+
self.overlap_2_xml = self.get_file("xml_overlap_2level.xml")
16+
self.overlap_inner_xml = self.get_file("xml_overlap_inner.xml")
17+
self.overlap_xml = self.get_file("xml_overlap.xml")
18+
self.comment_xml = self.get_file("xml_comment.xml")
19+
self.doc_xml = self.get_file("xml_doc.xml")
20+
self.smoke_xml = self.get_file("xml_smoke.xml")
21+
22+
@unittest.skip("sparktk: nodes are dropped from import_xml when invalid values found")
23+
def test_xml_good_001(self):
24+
""" Check basic happy-path XML input """
25+
frame = self.context.frame.import_xml(self.smoke_xml, "node")
26+
27+
take = frame.take(20)
28+
self.assertEqual(take[2], 300, "Node3 value incorrect")
29+
30+
@unittest.skip("sparktk: nodes are dropped from import_xml when invalid values found")
31+
def test_xml_comment(self):
32+
""" Check basic happy-path XML input """
33+
frame = self.context.frame.import_xml(self.comment_xml, "node")
34+
35+
take = frame.take(20)
36+
self.assertEqual(take[2][1], 300, "Node3 value incorrect")
37+
38+
def test_xml_square(self):
39+
""" Validate the example given in the user documentation. """
40+
frame = self.context.frame.import_xml(self.doc_xml, "square")
41+
42+
# Now we will want to parse our values out of the xml file.
43+
# To do this we will use the add_columns method::
44+
45+
def parse_square_xml(row):
46+
import xml.etree.ElementTree as eTree
47+
ele = eTree.fromstring(row)
48+
return (ele.get("color"),
49+
ele.find("name").text,
50+
ele.find("size").text)
51+
52+
take = frame.take(20)
53+
self.assertEqual(parse_square_xml(take[0][0])[2], "3", "Square size incorrect")
54+
55+
def test_xml_add_columns(self):
56+
"""validate adding cols to xml frame"""
57+
frame = self.context.frame.import_xml(self.doc_xml, "square")
58+
59+
def parse_square_xml(row):
60+
import xml.etree.ElementTree as eTree
61+
ele = eTree.fromstring(row[0])
62+
return [ele.get("color"),
63+
ele.find("name").text,
64+
ele.find("size").text]
65+
66+
frame.add_columns(parse_square_xml, [("elements", str)])
67+
frame.count()
68+
69+
70+
@unittest.skip("sparktk: import_xml does not error when overlapped blocks at top level")
71+
def test_xml_overlap_outer(self):
72+
""" Reject overlapped blocks at top level """
73+
frame = self.context.frame.import_xml(self.overlap_xml, "block1")
74+
75+
def parse_xml_1col(row):
76+
import xml.etree.ElementTree as eTree
77+
ele = eTree.fromstring(row[0])
78+
return ele.find("name").text
79+
80+
with self.assertRaisesRegexp(Exception, "foo"):
81+
frame.add_columns(parse_xml_1col, [("name", str)])
82+
83+
@unittest.skip("sparktk: import_xml does not error when overlapped blocks at top level")
84+
def test_xml_overlap_inner(self):
85+
"""Reject overlapped blocks nested within blocks, otherwise legal"""
86+
frame = self.context.frame.import_xml(self.overlap_inner_xml, "node1")
87+
88+
def parse_xml_1col(row):
89+
import xml.etree.ElementTree as eTree
90+
ele = eTree.fromstring(row[0])
91+
return ele.find("name").text
92+
93+
with self.assertRaisesRegexp(Exception, "foo"):
94+
frame.add_columns(parse_xml_1col, [("name", str)])
95+
96+
@unittest.skip("sparktk: import_xml does not error when overlapped blocks at top level")
97+
def test_xml_overlap_2level(self):
98+
""" Reject overlapped blocks through 2 levels"""
99+
frame = self.context.frame.import_xml(self.overlap_2_xml, "block2")
100+
101+
def parse_xml_1col(row):
102+
import xml.etree.ElementTree as eTree
103+
ele = eTree.fromstring(row[0])
104+
return ele.find("name").text
105+
106+
with self.assertRaisesRegexp(Exception, "foo"):
107+
frame.add_columns(parse_xml_1col, [("name", str)])
108+
109+
def test_xml_dangle(self):
110+
""" Accept a partial block. """
111+
frame = self.context.frame.import_xml(self.dangle_tag_xml, "node")
112+
113+
self.assertEqual(frame.count(), 1)
114+
115+
116+
if __name__ == "__main__":
117+
unittest.main()

0 commit comments

Comments
 (0)