-
Notifications
You must be signed in to change notification settings - Fork 0
/
scraper.py
24 lines (22 loc) · 1.61 KB
/
scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
from bs4 import BeautifulSoup
import requests
def scrape(q):
web_page = requests.get('https://codeforces.com/problemset/problem/'+q.split('-')[0]+'/'+q.split('-')[1]).text.encode("utf-8")
soup = BeautifulSoup(web_page, 'lxml')
head = soup.find('div', class_ ="header").text
tail = soup.find('div', class_ = "input-specification").text + soup.find('div', class_ = "output-specification").text + soup.find('div', class_ = "sample-tests").text
content = soup.find('div', class_ = "problem-statement").text.replace(head,'')
content = content.replace(tail,'').replace("$$$","$$")
data = {
"title": soup.find('div',class_ = "title").text,
"time_limit": soup.find('div', class_ = "time-limit").text.replace('time limit per test',''),
"memory_limit": soup.find('div',class_ = "memory-limit").text.replace('memory limit per test',''),
"input_file": soup.find('div', class_ = "input-file").text.replace('input',''),
"output_file": soup.find('div', class_ = "output-file").text.replace('output',''),
"content": content,
"input_specs": soup.find('div', class_ = "input-specification").text.replace('Input','').replace("$$$","$$"),
"output_specs": soup.find('div', class_ = "output-specification").text.replace('Output','').replace("$$$","$$"),
"sample_input": soup.find('div', class_ = "sample-tests").text.replace('Examples', '').replace('Example','').split('Output\n')[0].replace('Input\n',''),
"sample_output": soup.find('div', class_ = "sample-tests").text.replace('Examples', '').replace('Example','').split('Output\n')[1],
}
return data