49
49
'127.0.0.1'
50
50
"""
51
51
52
- import collections
53
52
import logging
54
53
import os
55
54
from functools import wraps
55
+ from typing import List , NamedTuple , Optional , Sequence , Union
56
56
57
57
import idna
58
58
71
71
)
72
72
73
73
74
- class ExtractResult (collections . namedtuple ( "ExtractResult" , "subdomain domain suffix" ) ):
74
+ class ExtractResult (NamedTuple ):
75
75
"""namedtuple of a URL's subdomain, domain, and suffix."""
76
76
77
- # Necessary for __dict__ member to get populated in Python 3+
78
- __slots__ = ()
77
+ subdomain : str
78
+ domain : str
79
+ suffix : str
79
80
80
81
@property
81
- def registered_domain (self ):
82
+ def registered_domain (self ) -> str :
82
83
"""
83
84
Joins the domain and suffix fields with a dot, if they're both set.
84
85
@@ -92,7 +93,7 @@ def registered_domain(self):
92
93
return ""
93
94
94
95
@property
95
- def fqdn (self ):
96
+ def fqdn (self ) -> str :
96
97
"""
97
98
Returns a Fully Qualified Domain Name, if there is a proper domain/suffix.
98
99
@@ -102,12 +103,13 @@ def fqdn(self):
102
103
''
103
104
"""
104
105
if self .domain and self .suffix :
105
- # self is the namedtuple (subdomain domain suffix)
106
+ # Disable bogus lint error (https://github.com/PyCQA/pylint/issues/2568)
107
+ # pylint: disable-next=not-an-iterable
106
108
return "." .join (i for i in self if i )
107
109
return ""
108
110
109
111
@property
110
- def ipv4 (self ):
112
+ def ipv4 (self ) -> str :
111
113
"""
112
114
Returns the ipv4 if that is what the presented domain/url is
113
115
@@ -130,13 +132,13 @@ class TLDExtract:
130
132
# TODO: Agreed with Pylint: too-many-arguments
131
133
def __init__ ( # pylint: disable=too-many-arguments
132
134
self ,
133
- cache_dir = get_cache_dir (),
134
- suffix_list_urls = PUBLIC_SUFFIX_LIST_URLS ,
135
- fallback_to_snapshot = True ,
136
- include_psl_private_domains = False ,
137
- extra_suffixes = (),
138
- cache_fetch_timeout = CACHE_TIMEOUT ,
139
- ):
135
+ cache_dir : str = get_cache_dir (),
136
+ suffix_list_urls : Sequence [ str ] = PUBLIC_SUFFIX_LIST_URLS ,
137
+ fallback_to_snapshot : bool = True ,
138
+ include_psl_private_domains : bool = False ,
139
+ extra_suffixes : Sequence [ str ] = (),
140
+ cache_fetch_timeout : Union [ str , float , None ] = CACHE_TIMEOUT ,
141
+ ) -> None :
140
142
"""
141
143
Constructs a callable for extracting subdomain, domain, and suffix
142
144
components from a URL.
@@ -193,14 +195,18 @@ def __init__( # pylint: disable=too-many-arguments
193
195
194
196
self .include_psl_private_domains = include_psl_private_domains
195
197
self .extra_suffixes = extra_suffixes
196
- self ._extractor = None
198
+ self ._extractor : Optional [ _PublicSuffixListTLDExtractor ] = None
197
199
198
- self .cache_fetch_timeout = cache_fetch_timeout
200
+ self .cache_fetch_timeout = (
201
+ float (cache_fetch_timeout )
202
+ if isinstance (cache_fetch_timeout , str )
203
+ else cache_fetch_timeout
204
+ )
199
205
self ._cache = DiskCache (cache_dir )
200
- if isinstance (self .cache_fetch_timeout , str ):
201
- self .cache_fetch_timeout = float (self .cache_fetch_timeout )
202
206
203
- def __call__ (self , url , include_psl_private_domains = None ):
207
+ def __call__ (
208
+ self , url : str , include_psl_private_domains : Optional [bool ] = None
209
+ ) -> ExtractResult :
204
210
"""
205
211
Takes a string URL and splits it into its subdomain, domain, and
206
212
suffix (effective TLD, gTLD, ccTLD, etc.) component.
@@ -238,23 +244,23 @@ def __call__(self, url, include_psl_private_domains=None):
238
244
domain = labels [suffix_index - 1 ] if suffix_index else ""
239
245
return ExtractResult (subdomain , domain , suffix )
240
246
241
- def update (self , fetch_now = False ):
247
+ def update (self , fetch_now : bool = False ) -> None :
242
248
"""Force fetch the latest suffix list definitions."""
243
249
self ._extractor = None
244
250
self ._cache .clear ()
245
251
if fetch_now :
246
252
self ._get_tld_extractor ()
247
253
248
254
@property
249
- def tlds (self ):
255
+ def tlds (self ) -> List [ str ] :
250
256
"""
251
257
Returns the list of tld's used by default
252
258
253
259
This will vary based on `include_psl_private_domains` and `extra_suffixes`
254
260
"""
255
261
return list (self ._get_tld_extractor ().tlds ())
256
262
257
- def _get_tld_extractor (self ):
263
+ def _get_tld_extractor (self ) -> "_PublicSuffixListTLDExtractor" :
258
264
"""Get or compute this object's TLDExtractor. Looks up the TLDExtractor
259
265
in roughly the following order, based on the settings passed to
260
266
__init__:
@@ -290,9 +296,9 @@ def _get_tld_extractor(self):
290
296
291
297
292
298
@wraps (TLD_EXTRACTOR .__call__ )
293
- def extract (
294
- url , include_psl_private_domains = False
295
- ): # pylint: disable=missing-function-docstring
299
+ def extract ( # pylint: disable=missing-function-docstring
300
+ url : str , include_psl_private_domains : Optional [ bool ] = False
301
+ ) -> ExtractResult :
296
302
return TLD_EXTRACTOR (url , include_psl_private_domains = include_psl_private_domains )
297
303
298
304
0 commit comments