Skip to content

Commit e739ff8

Browse files
Migrate ExtractResult from namedtuple to dataclass (#306)
1 parent 4067dea commit e739ff8

File tree

5 files changed

+27
-70
lines changed

5 files changed

+27
-70
lines changed

README.md

Lines changed: 6 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -31,20 +31,6 @@ ExtractResult(subdomain='forums', domain='bbc', suffix='co.uk', is_private=False
3131
ExtractResult(subdomain='www', domain='worldbank', suffix='org.kg', is_private=False)
3232
```
3333

34-
`ExtractResult` is a namedtuple, so it's simple to access the parts you want.
35-
36-
```python
37-
>>> ext = tldextract.extract('http://forums.bbc.co.uk')
38-
>>> (ext.subdomain, ext.domain, ext.suffix)
39-
('forums', 'bbc', 'co.uk')
40-
>>> # rejoin subdomain and domain
41-
>>> '.'.join(ext[:2])
42-
'forums.bbc'
43-
>>> # a common alias
44-
>>> ext.registered_domain
45-
'bbc.co.uk'
46-
```
47-
4834
Note subdomain and suffix are _optional_. Not all URL-like inputs have a
4935
subdomain or a valid suffix.
5036

@@ -59,17 +45,14 @@ ExtractResult(subdomain='google', domain='notavalidsuffix', suffix='', is_privat
5945
ExtractResult(subdomain='', domain='127.0.0.1', suffix='', is_private=False)
6046
```
6147

62-
If you want to rejoin the whole namedtuple, regardless of whether a subdomain
63-
or suffix were found:
48+
To rejoin the original hostname, if it was indeed a valid, registered hostname:
6449

6550
```python
66-
>>> ext = tldextract.extract('http://127.0.0.1:8080/deployed/')
67-
>>> # this has unwanted dots
68-
>>> '.'.join(ext[:3])
69-
'.127.0.0.1.'
70-
>>> # join each part only if it's truthy
71-
>>> '.'.join(part for part in ext[:3] if part)
72-
'127.0.0.1'
51+
>>> ext = tldextract.extract('http://forums.bbc.co.uk')
52+
>>> ext.registered_domain
53+
'bbc.co.uk'
54+
>>> ext.fqdn
55+
'forums.bbc.co.uk'
7356
```
7457

7558
By default, this package supports the public ICANN TLDs and their exceptions.

tests/custom_suffix_test.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import tempfile
55

66
import tldextract
7+
from tldextract.tldextract import ExtractResult
78

89
FAKE_SUFFIX_LIST_URL = "file://" + os.path.join(
910
os.path.dirname(os.path.abspath(__file__)), "fixtures/fake_suffix_list_fixture.dat"
@@ -27,8 +28,8 @@ def test_private_extraction() -> None:
2728
"""Test this library's uncached, offline, private domain extraction."""
2829
tld = tldextract.TLDExtract(cache_dir=tempfile.mkdtemp(), suffix_list_urls=[])
2930

30-
assert tld("foo.blogspot.com") == ("foo", "blogspot", "com", False)
31-
assert tld("foo.blogspot.com", include_psl_private_domains=True) == (
31+
assert tld("foo.blogspot.com") == ExtractResult("foo", "blogspot", "com", False)
32+
assert tld("foo.blogspot.com", include_psl_private_domains=True) == ExtractResult(
3233
"",
3334
"foo",
3435
"blogspot.com",

tests/main_test.py

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -412,20 +412,6 @@ def test_ipv4_lookalike() -> None:
412412
)
413413

414414

415-
def test_result_as_dict() -> None:
416-
"""Test that the result is a namedtuple."""
417-
result = extract(
418-
"http://admin:[email protected]:666/secret/admin/interface?param1=42"
419-
)
420-
expected_dict = {
421-
"subdomain": "www",
422-
"domain": "google",
423-
"suffix": "com",
424-
"is_private": False,
425-
}
426-
assert result._asdict() == expected_dict
427-
428-
429415
def test_cache_permission(
430416
mocker: pytest_mock.MockerFixture, monkeypatch: pytest.MonkeyPatch, tmp_path: Path
431417
) -> None:

tldextract/cli.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,5 +88,5 @@ def main() -> None:
8888
sys.exit(1)
8989

9090
for i in args.input:
91-
subdomain, domain, suffix, _ = tld_extract(i)
92-
print(f"{subdomain} {domain} {suffix}")
91+
ext = tld_extract(i)
92+
print(f"{ext.subdomain} {ext.domain} {ext.suffix}")

tldextract/tldextract.py

Lines changed: 16 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -13,18 +13,6 @@
1313
>>> tldextract.extract('http://www.worldbank.org.kg/') # Kyrgyzstan
1414
ExtractResult(subdomain='www', domain='worldbank', suffix='org.kg', is_private=False)
1515
16-
`ExtractResult` is a namedtuple, so it's simple to access the parts you want.
17-
18-
>>> ext = tldextract.extract('http://forums.bbc.co.uk')
19-
>>> (ext.subdomain, ext.domain, ext.suffix)
20-
('forums', 'bbc', 'co.uk')
21-
>>> # rejoin subdomain and domain
22-
>>> '.'.join(ext[:2])
23-
'forums.bbc'
24-
>>> # a common alias
25-
>>> ext.registered_domain
26-
'bbc.co.uk'
27-
2816
Note subdomain and suffix are _optional_. Not all URL-like inputs have a
2917
subdomain or a valid suffix.
3018
@@ -37,16 +25,13 @@
3725
>>> tldextract.extract('http://127.0.0.1:8080/deployed/')
3826
ExtractResult(subdomain='', domain='127.0.0.1', suffix='', is_private=False)
3927
40-
If you want to rejoin the whole namedtuple, regardless of whether a subdomain
41-
or suffix were found:
28+
To rejoin the original hostname, if it was indeed a valid, registered hostname:
4229
43-
>>> ext = tldextract.extract('http://127.0.0.1:8080/deployed/')
44-
>>> # this has unwanted dots
45-
>>> '.'.join(part for part in ext[:3])
46-
'.127.0.0.1.'
47-
>>> # join part only if truthy
48-
>>> '.'.join(part for part in ext[:3] if part)
49-
'127.0.0.1'
30+
>>> ext = tldextract.extract('http://forums.bbc.co.uk')
31+
>>> ext.registered_domain
32+
'bbc.co.uk'
33+
>>> ext.fqdn
34+
'forums.bbc.co.uk'
5035
"""
5136

5237
from __future__ import annotations
@@ -55,10 +40,8 @@
5540
import os
5641
import urllib.parse
5742
from collections.abc import Collection, Sequence
43+
from dataclasses import dataclass
5844
from functools import wraps
59-
from typing import (
60-
NamedTuple,
61-
)
6245

6346
import idna
6447

@@ -77,13 +60,17 @@
7760
)
7861

7962

80-
class ExtractResult(NamedTuple):
81-
"""namedtuple of a URL's subdomain, domain, suffix, and flag that indicates if URL has private suffix."""
63+
@dataclass(order=True)
64+
class ExtractResult:
65+
"""A URL's extracted subdomain, domain, and suffix.
66+
67+
Also contains metadata, like a flag that indicates if the URL has a private suffix.
68+
"""
8269

8370
subdomain: str
8471
domain: str
8572
suffix: str
86-
is_private: bool = False
73+
is_private: bool
8774

8875
@property
8976
def registered_domain(self) -> str:
@@ -110,7 +97,7 @@ def fqdn(self) -> str:
11097
''
11198
"""
11299
if self.suffix and (self.domain or self.is_private):
113-
return ".".join(i for i in self[:3] if i)
100+
return ".".join(i for i in (self.subdomain, self.domain, self.suffix) if i)
114101
return ""
115102

116103
@property
@@ -291,7 +278,7 @@ def _extract_netloc(
291278
and netloc_with_ascii_dots[-1] == "]"
292279
):
293280
if looks_like_ipv6(netloc_with_ascii_dots[1:-1]):
294-
return ExtractResult("", netloc_with_ascii_dots, "")
281+
return ExtractResult("", netloc_with_ascii_dots, "", is_private=False)
295282

296283
labels = netloc_with_ascii_dots.split(".")
297284

0 commit comments

Comments
 (0)