13
13
>>> tldextract.extract('http://www.worldbank.org.kg/') # Kyrgyzstan
14
14
ExtractResult(subdomain='www', domain='worldbank', suffix='org.kg', is_private=False)
15
15
16
- `ExtractResult` is a namedtuple, so it's simple to access the parts you want.
17
-
18
- >>> ext = tldextract.extract('http://forums.bbc.co.uk')
19
- >>> (ext.subdomain, ext.domain, ext.suffix)
20
- ('forums', 'bbc', 'co.uk')
21
- >>> # rejoin subdomain and domain
22
- >>> '.'.join(ext[:2])
23
- 'forums.bbc'
24
- >>> # a common alias
25
- >>> ext.registered_domain
26
- 'bbc.co.uk'
27
-
28
16
Note subdomain and suffix are _optional_. Not all URL-like inputs have a
29
17
subdomain or a valid suffix.
30
18
37
25
>>> tldextract.extract('http://127.0.0.1:8080/deployed/')
38
26
ExtractResult(subdomain='', domain='127.0.0.1', suffix='', is_private=False)
39
27
40
- If you want to rejoin the whole namedtuple, regardless of whether a subdomain
41
- or suffix were found:
28
+ To rejoin the original hostname, if it was indeed a valid, registered hostname:
42
29
43
- >>> ext = tldextract.extract('http://127.0.0.1:8080/deployed/')
44
- >>> # this has unwanted dots
45
- >>> '.'.join(part for part in ext[:3])
46
- '.127.0.0.1.'
47
- >>> # join part only if truthy
48
- >>> '.'.join(part for part in ext[:3] if part)
49
- '127.0.0.1'
30
+ >>> ext = tldextract.extract('http://forums.bbc.co.uk')
31
+ >>> ext.registered_domain
32
+ 'bbc.co.uk'
33
+ >>> ext.fqdn
34
+ 'forums.bbc.co.uk'
50
35
"""
51
36
52
37
from __future__ import annotations
55
40
import os
56
41
import urllib .parse
57
42
from collections .abc import Collection , Sequence
43
+ from dataclasses import dataclass
58
44
from functools import wraps
59
- from typing import (
60
- NamedTuple ,
61
- )
62
45
63
46
import idna
64
47
77
60
)
78
61
79
62
80
- class ExtractResult (NamedTuple ):
81
- """namedtuple of a URL's subdomain, domain, suffix, and flag that indicates if URL has private suffix."""
63
+ @dataclass (order = True )
64
+ class ExtractResult :
65
+ """A URL's extracted subdomain, domain, and suffix.
66
+
67
+ Also contains metadata, like a flag that indicates if the URL has a private suffix.
68
+ """
82
69
83
70
subdomain : str
84
71
domain : str
85
72
suffix : str
86
- is_private : bool = False
73
+ is_private : bool
87
74
88
75
@property
89
76
def registered_domain (self ) -> str :
@@ -110,7 +97,7 @@ def fqdn(self) -> str:
110
97
''
111
98
"""
112
99
if self .suffix and (self .domain or self .is_private ):
113
- return "." .join (i for i in self [: 3 ] if i )
100
+ return "." .join (i for i in ( self . subdomain , self . domain , self . suffix ) if i )
114
101
return ""
115
102
116
103
@property
@@ -291,7 +278,7 @@ def _extract_netloc(
291
278
and netloc_with_ascii_dots [- 1 ] == "]"
292
279
):
293
280
if looks_like_ipv6 (netloc_with_ascii_dots [1 :- 1 ]):
294
- return ExtractResult ("" , netloc_with_ascii_dots , "" )
281
+ return ExtractResult ("" , netloc_with_ascii_dots , "" , is_private = False )
295
282
296
283
labels = netloc_with_ascii_dots .split ("." )
297
284
0 commit comments