forked from RahulShaw/LinuxAcademy-DL
-
Notifications
You must be signed in to change notification settings - Fork 0
/
driver.py
142 lines (118 loc) · 4.66 KB
/
driver.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#!/usr/bin/env python3
from __future__ import unicode_literals
import os
import platform
import re
import sys
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
import time
import youtube_dl
if len(sys.argv) < 3:
print("Missing Arguments")
exit(1)
username = sys.argv[1]
pwd = sys.argv[2]
url = sys.argv[3]
print("Requesting download of " + url)
user_agent = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.92 ' \
'Safari/537.36 '
headers = {
'User-Agent': user_agent
}
chrome_options = Options()
chrome_options.add_argument(f'user-agent={user_agent}')
chrome_options.add_argument('--disable-extensions')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-gpu')
chrome_options.add_argument('--disable-popup-blocking')
chrome_options.add_argument('--disable-logging')
chrome_options.add_argument('--allow-running-insecure-content')
if platform.system() == 'Linux':
if os.path.exists("/usr/bin/chromedriver"):
browser = webdriver.Chrome(executable_path="/usr/bin/chromedriver",
options=chrome_options)
else:
print("Chromedriver not found; expected path '/usr/bin/chromedriver'")
exit(1)
if platform.system() == 'Darwin':
if os.path.exists("/usr/local/bin/chromedriver"):
browser = webdriver.Chrome(executable_path="/usr/local/bin/chromedriver",
options=chrome_options)
else:
print("Chromedriver not found; expected path '/usr/local/bin/chromedriver'")
exit(1)
else:
if os.path.exists("C:/ChromeDriver/chromedriver.exe"):
browser = webdriver.Chrome(executable_path="C:/ChromeDriver/chromedriver.exe",
options=chrome_options)
else:
print("Chromedriver not found; expected path 'C:/ChromeDriver/chromedriver.exe'")
exit(1)
browser.set_page_load_timeout(10000)
browser.maximize_window()
browser.get("https://linuxacademy.com/")
link = browser.find_element_by_partial_link_text('Log In')
link.click()
print("Sleeping for 15 seconds..")
time.sleep(15)
print("Attempting to login..")
user = browser.find_element_by_name('username')
user.send_keys(username)
password = browser.find_element_by_name('password')
password.send_keys(pwd)
password.send_keys(Keys.RETURN)
time.sleep(30)
try:
logged_in_name = browser.find_element_by_id('navigationUsername')
if logged_in_name:
print("Login successful...")
except Exception as e:
print("Login failed...\nExiting now!")
exit(1)
print("Getting lesson links...")
browser.get(url)
time.sleep(10)
html = browser.page_source
parsed_html = BeautifulSoup(html, 'html5lib')
urls = []
lessons = []
# title = parsed_html.find('span', attrs={'class', 'course-title'}).text
title = parsed_html.find('h2', attrs={'class', 'm-b-0'}).find('span').text;
anchors = parsed_html.find_all('a', attrs={'class', 'syllabus-item'})
for anchor in anchors:
if '/course/' in anchor['href']:
urls.append('https://linuxacademy.com/' + anchor['href'])
lessons.append(anchor.find('h6').text)
else:
pass
if not os.path.exists(title):
os.makedirs(re.sub('[?/:]', '', title))
browser.close()
browser.quit()
print("Starting download...")
if not os.path.exists(os.getcwd() + "/cookies.txt"):
print("Cookies.txt not found!")
print("""Login to LinuxAcademy and visit the course page e.g.
https://linuxacademy.com/cp/modules/view/id/287 and with the cookies.txt extention installed,
click on the icon of the extension and choose To download cookies for this tab click here.
Copy the downloaded txt file to root of the cloned repo. Make sure that the name of the file is cookies.txt.
Repeat when you an encounter exception in downloading the videos (assuming you have an active subscription).""")
exit(1)
try:
for index, url in enumerate(urls, start=0):
temp_list = [urls[index]]
serial = str(index + 1)
ydl_opts = {'cookiefile': 'cookies.txt', 'force_generic_extractor': True,
'outtmpl': os.getcwd() + '/' + title + "/" + serial + '. ' + re.sub('[?/:]', '',
lessons[index]) + '.%(ext)s',
'sleep_interval': 15, 'retries': 10}
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
ydl.download(temp_list)
temp_list.clear()
print("Downloads completed!")
except Exception as e:
print(e)
print("Downloading failed. Perhaps, the cookies have expired.")