Skip to content

Commit 57cee3d

Browse files
committed
Add download_all_urls option
This commit adds an option to supply a list of URLs for which the script will download all files, regardless of other settings. This allows the user to do something like only download Trailer #1 for most movies (with the `video_types = single_trailer` setting), but for movies the user is really interested in, to download all videos.
1 parent f4f5f49 commit 57cee3d

File tree

4 files changed

+76
-36
lines changed

4 files changed

+76
-36
lines changed

download_trailers.py

Lines changed: 41 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -36,24 +36,26 @@
3636
import socket
3737

3838
try:
39-
# For Python 2
40-
from ConfigParser import Error
41-
from ConfigParser import MissingSectionHeaderError
42-
from urllib2 import urlopen
43-
from urllib2 import Request
44-
from urllib2 import HTTPError
45-
from urllib2 import URLError
46-
except ImportError:
4739
# For Python 3.0 and later
4840
from configparser import Error
4941
from configparser import MissingSectionHeaderError
5042
from urllib.request import urlopen
5143
from urllib.request import Request
5244
from urllib.error import HTTPError
5345
from urllib.error import URLError
46+
from urllib.parse import urlparse
47+
except ImportError:
48+
# Fall back to Python 2's naming
49+
from ConfigParser import Error
50+
from ConfigParser import MissingSectionHeaderError
51+
from urllib2 import urlopen
52+
from urllib2 import Request
53+
from urllib2 import HTTPError
54+
from urllib2 import URLError
55+
from urlparse import urlparse
5456

5557

56-
def get_trailer_file_urls(page_url, res, types):
58+
def get_trailer_file_urls(page_url, res, types, download_all_urls):
5759
"""Get all trailer file URLs from the given movie page in the given
5860
resolution and having the given trailer types.
5961
"""
@@ -70,7 +72,9 @@ def get_trailer_file_urls(page_url, res, types):
7072
file_info = clip['versions']['enus']['sizes'][apple_size]
7173
file_url = convert_src_url_to_file_url(file_info['src'], res)
7274

73-
if should_download_file(types, video_type):
75+
if (get_url_path(page_url) in download_all_urls or
76+
should_download_file(types, video_type)):
77+
7478
url_info = {
7579
'res': res,
7680
'title': title,
@@ -210,14 +214,16 @@ def download_trailer_file(url, destdir, filename):
210214
return
211215

212216

213-
def download_trailers_from_page(page_url, dl_list_path, res, destdir, types):
217+
def download_trailers_from_page(page_url, settings):
214218
"""Takes a page on the Apple Trailers website and downloads the trailer
215219
for the movie on the page. Example URL:
216220
http://trailers.apple.com/trailers/lions_gate/thehungergames/"""
217221

218222
logging.debug('Checking for files at %s', page_url)
219-
trailer_urls = get_trailer_file_urls(page_url, res, types)
220-
downloaded_files = get_downloaded_files(dl_list_path)
223+
trailer_urls = get_trailer_file_urls(page_url, settings['resolution'],
224+
settings['video_types'],
225+
settings['download_all_urls'])
226+
downloaded_files = get_downloaded_files(settings['list_file'])
221227

222228
for trailer_url in trailer_urls:
223229
trailer_file_name = get_trailer_filename(trailer_url['title'],
@@ -226,9 +232,9 @@ def download_trailers_from_page(page_url, dl_list_path, res, destdir, types):
226232
if trailer_file_name not in downloaded_files:
227233
logging.info('Downloading %s: %s', trailer_url['type'],
228234
trailer_file_name)
229-
download_trailer_file(trailer_url['url'], destdir,
235+
download_trailer_file(trailer_url['url'], settings['download_dir'],
230236
trailer_file_name)
231-
record_downloaded_file(trailer_file_name, dl_list_path)
237+
record_downloaded_file(trailer_file_name, settings['list_file'])
232238
else:
233239
logging.debug('*** File already downloaded, skipping: %s',
234240
trailer_file_name)
@@ -249,6 +255,17 @@ def get_trailer_filename(film_title, video_type, res):
249255
return trailer_file_name
250256

251257

258+
def get_url_path(url):
259+
"""Take a full URL and reduce it to just the path, with starting and ending
260+
whitespace as well as the trailing slash removed, if they exist."""
261+
url = url.strip()
262+
path = urlparse(url).path
263+
if path and path[-1] == "/":
264+
path = path[:-1]
265+
266+
return path
267+
268+
252269
def validate_settings(settings):
253270
"""Validate the settings in the given dictionary. If any setting is
254271
invalid, raises an Error with a user message"""
@@ -317,6 +334,12 @@ def get_config_values(config_path, defaults):
317334
config_values = config.defaults()
318335
break
319336

337+
if config_values.get('download_all_urls', ''):
338+
config_values['download_all_urls'] = (
339+
[get_url_path(s) for s in config_values['download_all_urls'].split(',')])
340+
else:
341+
config_values['download_all_urls'] = []
342+
320343
if not config_file_found:
321344
logging.info('Config file not found. Using default values.')
322345

@@ -334,9 +357,9 @@ def get_settings():
334357
script_dir = os.path.abspath(os.path.dirname(__file__))
335358
defaults = {
336359
'download_dir': script_dir,
360+
'output_level': 'debug',
337361
'resolution': '720',
338362
'video_types': 'single_trailer',
339-
'output_level': 'debug',
340363
}
341364

342365
args = get_command_line_arguments()
@@ -507,13 +530,7 @@ def main():
507530
# Do the download
508531
if 'page' in settings:
509532
# The trailer page URL was passed in on the command line
510-
download_trailers_from_page(
511-
settings['page'],
512-
settings['list_file'],
513-
settings['resolution'],
514-
settings['download_dir'],
515-
settings['video_types']
516-
)
533+
download_trailers_from_page(settings['page'], settings)
517534

518535
else:
519536
just_added_url = ('http://trailers.apple.com/trailers/'
@@ -522,13 +539,7 @@ def main():
522539

523540
for trailer in newest_trailers:
524541
url = 'http://trailers.apple.com' + trailer['location']
525-
download_trailers_from_page(
526-
url,
527-
settings['list_file'],
528-
settings['resolution'],
529-
settings['download_dir'],
530-
settings['video_types']
531-
)
542+
download_trailers_from_page(url, settings)
532543

533544

534545
if __name__ == '__main__':

settings-example.cfg

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,10 @@ list_file = /tmp/download_list.txt
2525
# Defaults to single_trailer
2626
video_types = single_trailer
2727

28+
# Regardless of the video_types setting, download all available files from
29+
# these trailer URLs. Can be a single URL or a comma-separated list of URLs.
30+
# download_all_urls = https://trailers.apple.com/trailers/one/,https://trailers.apple.com/trailers/two/
31+
2832
# The console output level of the script. Valid values are:
2933
# debug: print all information, including configuration and debug information
3034
# downloads: only print new downloads

test/fixtures/settings/normal_settings.cfg

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,10 @@ list_file = ~/Videos/download_list.txt
2525
# Defaults to single_trailer
2626
video_types = all
2727

28+
# Regardless of the video_types setting, download all available files from
29+
# these trailer URLs.
30+
download_all_urls = https://trailers.apple.com/trailers/one/, https://trailers.apple.com/trailers/two/
31+
2832
# The console output level of the script. Valid values are:
2933
# debug: print all information, including configuration and debug information
3034
# downloads: only print new downloads

test/test_download_trailers.py

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -46,17 +46,17 @@
4646

4747
SOME_CONFIG_DEFAULTS = {
4848
'download_dir': '/tmp/download',
49+
'output_level': 'debug',
4950
'resolution': '720',
5051
'video_types': 'single_trailer',
51-
'output_level': 'debug',
5252
}
5353

5454
SOME_VALID_SETTINGS = {
55-
'resolution': '1080',
5655
'download_dir': '/',
57-
'video_types': 'trailers',
58-
'output_level': 'error',
5956
'list_file': '/list.txt',
57+
'output_level': 'error',
58+
'resolution': '1080',
59+
'video_types': 'trailers',
6060
}
6161

6262
REQUIRED_SETTINGS = ['resolution', 'download_dir', 'video_types', 'output_level', 'list_file']
@@ -186,13 +186,18 @@ def test_get_trailer_filename_repeating_spaces():
186186

187187
def test_get_config_values_no_config_file():
188188
missing_file_path = '/not/a/path/on/any/real/system/settings.cfg'
189-
assert trailers.get_config_values(missing_file_path, SOME_CONFIG_DEFAULTS) == SOME_CONFIG_DEFAULTS
189+
settings = copy.deepcopy(SOME_CONFIG_DEFAULTS)
190+
settings['download_all_urls'] = []
191+
192+
assert trailers.get_config_values(missing_file_path, SOME_CONFIG_DEFAULTS) == settings
190193

191194

192195
def test_get_config_values_empty_config_file():
193196
empty_config_file = os.path.join(TEST_DIR, 'fixtures', 'settings', 'empty_settings.cfg')
197+
settings = copy.deepcopy(SOME_CONFIG_DEFAULTS)
198+
settings['download_all_urls'] = []
194199

195-
assert trailers.get_config_values(empty_config_file, SOME_CONFIG_DEFAULTS) == SOME_CONFIG_DEFAULTS
200+
assert trailers.get_config_values(empty_config_file, SOME_CONFIG_DEFAULTS) == settings
196201

197202

198203
def test_get_config_values_normal_config_file():
@@ -203,6 +208,10 @@ def test_get_config_values_normal_config_file():
203208
'resolution': '1080',
204209
'video_types': 'all',
205210
'output_level': 'error',
211+
'download_all_urls': [
212+
'/trailers/one',
213+
'/trailers/two',
214+
]
206215
}
207216

208217
assert trailers.get_config_values(empty_config_file, SOME_CONFIG_DEFAULTS) == config_values
@@ -329,3 +338,15 @@ def test_validate_settings_setting_missing():
329338
settings.pop(setting)
330339
trailers.validate_settings(settings)
331340

341+
342+
def test_clean_url_with_traling_slash():
343+
orig_url = "https://trailers.apple.com/path/film/"
344+
345+
assert trailers.get_url_path(orig_url) == "/path/film"
346+
347+
348+
def test_clean_url_without_trailing_slash():
349+
orig_url = "https://trailers.apple.com/path/film"
350+
351+
assert trailers.get_url_path(orig_url) == "/path/film"
352+

0 commit comments

Comments
 (0)