Skip to content

Commit 483564f

Browse files
author
Daniel Neuhäuser
committed
Fix unicode issue on 2.7
1 parent 295c13d commit 483564f

File tree

3 files changed

+32
-7
lines changed

3 files changed

+32
-7
lines changed

CHANGELOG.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ Version 0.2.1
66

77
- Fix issue with named groups.
88
- Nicer `TypeError`\s, if pattern types are unequal.
9+
- Fix issues with byte instead of unicode strings being returned on Python
10+
2.7.
911

1012
Version 0.2.0
1113
=============

oore.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ def __add__(self, other):
6969
).encode('latin1')
7070
)
7171
else:
72-
return r('(?:{})(?:{})'.format(self.pattern, other.pattern))
72+
return r(u'(?:{})(?:{})'.format(self.pattern, other.pattern))
7373
return NotImplemented
7474

7575
def __or__(self, other):
@@ -89,7 +89,7 @@ def __or__(self, other):
8989
).encode('latin1')
9090
)
9191
else:
92-
return r('(?:{})|(?:{})'.format(self.pattern, other.pattern))
92+
return r(u'(?:{})|(?:{})'.format(self.pattern, other.pattern))
9393
return NotImplemented
9494

9595
def __getitem__(self, index):
@@ -102,7 +102,7 @@ def __getitem__(self, index):
102102
).encode('latin1')
103103
)
104104
else:
105-
return r('(?:{}){{{}}}'.format(self.pattern, index))
105+
return r(u'(?:{}){{{}}}'.format(self.pattern, index))
106106
elif isinstance(index, tuple) and len(index) == 2:
107107
if (
108108
isinstance(index[0], int) and isinstance(index[1], int) and
@@ -116,7 +116,7 @@ def __getitem__(self, index):
116116
).encode('latin1')
117117
)
118118
else:
119-
return r('(?:{}){{{},{}}}'.format(self.pattern, *index))
119+
return r(u'(?:{}){{{},{}}}'.format(self.pattern, *index))
120120
elif (
121121
isinstance(index[0], int) and index[1] is Ellipsis
122122
):
@@ -128,7 +128,7 @@ def __getitem__(self, index):
128128
).encode('latin1')
129129
)
130130
else:
131-
return r('(?:{})*'.format(self.pattern))
131+
return r(u'(?:{})*'.format(self.pattern))
132132
elif index[0] == 1:
133133
if isinstance(self.pattern, bytes):
134134
return r(
@@ -137,7 +137,7 @@ def __getitem__(self, index):
137137
).encode('latin1')
138138
)
139139
else:
140-
return r('(?:{})+'.format(self.pattern))
140+
return r(u'(?:{})+'.format(self.pattern))
141141
else:
142142
if isinstance(self.pattern, bytes):
143143
return r(
@@ -148,7 +148,7 @@ def __getitem__(self, index):
148148
)
149149
else:
150150
return r(
151-
'(?:{pattern}){{{n}}}(?:{pattern})*'.format(
151+
u'(?:{pattern}){{{n}}}(?:{pattern})*'.format(
152152
pattern=self.pattern, n=index[0]
153153
)
154154
)

test_oore.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,23 +7,32 @@
77
:license: BSD, see LICENSE.rst for details
88
"""
99
import re
10+
import sys
1011

1112
from oore import r
1213

1314
from pytest import raises
1415

1516

17+
if sys.version_info[0] == 2:
18+
text_type = unicode
19+
else:
20+
text_type = str
21+
22+
1623
def test_add_text():
1724
foo = r(u'foo')
1825
bar = r(u'bar')
1926
foobar = foo + bar
27+
assert isinstance(foobar.pattern, text_type)
2028
assert foobar.match(u'foobar')
2129

2230

2331
def test_add_bytes():
2432
foo = r(b'foo')
2533
bar = r(b'bar')
2634
foobar = foo + bar
35+
assert isinstance(foobar.pattern, bytes)
2736
assert foobar.match(b'foobar')
2837

2938

@@ -60,53 +69,61 @@ def test_or_mixed():
6069
def test_repeat_text():
6170
foo = r(u'foo')
6271
foo_3 = foo[3]
72+
assert isinstance(foo_3.pattern, text_type)
6373
assert foo_3.match(u'foo' * 3)
6474

6575

6676
def test_repeat_bytes():
6777
foo = r(b'foo')
6878
foo_3 = foo[3]
79+
assert isinstance(foo_3.pattern, bytes)
6980
assert foo_3.match(b'foo' * 3)
7081

7182

7283
def test_repeat_from_to_text():
7384
foo = r(u'foo')
7485
foo_2_to_4 = foo[2, 4]
86+
assert isinstance(foo_2_to_4.pattern, text_type)
7587
for i in range(2, 5):
7688
assert foo_2_to_4.match(u'foo' * i)
7789

7890

7991
def test_repeat_from_to_bytes():
8092
foo = r(b'foo')
8193
foo_2_to_4 = foo[2, 4]
94+
assert isinstance(foo_2_to_4.pattern, bytes)
8295
for i in range(2, 5):
8396
assert foo_2_to_4.match(b'foo' * i)
8497

8598

8699
def test_repeat_zero_or_more_text():
87100
foo = r(u'foo')
88101
foo_zero_or_more = foo[0, ...]
102+
assert isinstance(foo_zero_or_more.pattern, text_type)
89103
for i in range(10):
90104
assert foo_zero_or_more.match(u'foo' * i)
91105

92106

93107
def test_repeat_zero_or_more_bytes():
94108
foo = r(b'foo')
95109
foo_zero_or_more = foo[0, ...]
110+
assert isinstance(foo_zero_or_more.pattern, bytes)
96111
for i in range(10):
97112
assert foo_zero_or_more.match(b'foo' * i)
98113

99114

100115
def test_repeat_one_or_more_text():
101116
foo = r(u'foo')
102117
foo_one_or_more = foo[1, ...]
118+
assert isinstance(foo_one_or_more.pattern, text_type)
103119
for i in range(1, 10):
104120
assert foo_one_or_more.match(u'foo' * i)
105121

106122

107123
def test_repeat_one_or_more_bytes():
108124
foo = r(b'foo')
109125
foo_one_or_more = foo[1, ...]
126+
assert isinstance(foo_one_or_more.pattern, bytes)
110127
for i in range(1, 10):
111128
assert foo_one_or_more.match(b'foo' * i)
112129

@@ -115,6 +132,7 @@ def test_repeat_n_or_more_text():
115132
foo = r(u'foo')
116133
for n in range(2, 5):
117134
foo_n_or_more = foo[n, ...]
135+
assert isinstance(foo_n_or_more.pattern, text_type)
118136
for i in range(n, 10):
119137
assert foo_n_or_more.match(u'foo' * i)
120138

@@ -123,6 +141,7 @@ def test_repeat_n_or_more_bytes():
123141
foo = r(b'foo')
124142
for n in range(2, 5):
125143
foo_n_or_more = foo[n, ...]
144+
assert isinstance(foo_n_or_more.pattern, bytes)
126145
for i in range(n, 10):
127146
assert foo_n_or_more.match(b'foo' * i)
128147

@@ -134,6 +153,7 @@ def test_check_r_argument_is_valid_regexp():
134153

135154
def test_numbered_groups_text():
136155
foo = r(u'foo').grouped()
156+
assert isinstance(foo.pattern, text_type)
137157
bar = r(u'bar')
138158
foobar = foo + bar
139159
match = foobar.match(u'foobar')
@@ -142,6 +162,7 @@ def test_numbered_groups_text():
142162

143163
def test_numbered_groups_bytes():
144164
foo = r(b'foo').grouped()
165+
assert isinstance(foo.pattern, bytes)
145166
bar = r(b'bar')
146167
foobar = foo + bar
147168
match = foobar.match(b'foobar')
@@ -150,6 +171,7 @@ def test_numbered_groups_bytes():
150171

151172
def test_named_groups_text():
152173
foo = r(u'foo').grouped('group')
174+
assert isinstance(foo.pattern, text_type)
153175
bar = r(u'bar')
154176
foobar = foo + bar
155177
match = foobar.match(u'foobar')
@@ -158,6 +180,7 @@ def test_named_groups_text():
158180

159181
def test_named_groups_bytes():
160182
foo = r(b'foo').grouped('group')
183+
assert isinstance(foo.pattern, bytes)
161184
bar = r(b'bar')
162185
foobar = foo + bar
163186
match = foobar.match(b'foobar')

0 commit comments

Comments
 (0)