-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathbootstrapGenerator.py
113 lines (79 loc) · 3.01 KB
/
bootstrapGenerator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import requests
from bs4 import BeautifulSoup
# scanned website url
URL = "https://bilgisayar.btu.edu.tr/index.php?page=duyuru"
# count of scanned event
COUNT = 25
# disable warnings
requests.packages.urllib3.disable_warnings()
def get_content():
x = requests.get(URL, verify=False)
html = x.content
print("website request successful")
return html
def extract_context(html):
info = []
links = []
counter = COUNT
soup = BeautifulSoup(html, 'html.parser')
# Gathering links
is_done = False
counter = 1
print("Scanning links...")
while not is_done :
link = soup.select(f'.scroll-pane > table:nth-child({counter})')
links.append(BeautifulSoup(link.pop().decode_contents(), "html.parser").select("td:nth-child(1) .link1").pop().get("href"))
counter += 1
if counter == COUNT:
is_done = True
print("Scan successful")
# Gathering and extracting contents
print("Extracting...")
for item in links:
req = requests.request('GET', "https://bilgisayar.btu.edu.tr/" + item, verify=False).content
ctx = BeautifulSoup(req, "html.parser")
strcontent =str( BeautifulSoup(str(ctx.select(".col-md-9 > div:nth-child(1) > div:nth-child(2)")), "html.parser") )
if ctx.select(".col-md-9 > div:nth-child(1) > div:nth-child(2) img"):
# extracting text
text = ctx.select(".col-md-9 > div:nth-child(1) > div:nth-child(2) > p:nth-child(1)").pop().getText(" ")
# extracting image link
img = ctx.select(".col-md-9 > div:nth-child(1) > div:nth-child(2) img:nth-of-type(1)").pop().get("src")
# extracting time
index = strcontent.index("<br/><br/>")
strcontent = strcontent[index+10:]
index = strcontent.index(",")
time = strcontent[:index]
# event link
detail = item
info.append( generate_boostrap( (text, img, detail, time) ) )
return info
cntr = 1
def generate_boostrap(info_list):
global cntr
template = f"""
<!-- Etkinlik {cntr} --->
<div class="container-fluid">
<div class="row">
<div class="col-sm-6" style="background-color:lavender;"><a href="#"><img alt="" src="{info_list[1]}" style="height:146px; width:220px" /></a></div>
<div class="col-sm-6" style="background-color:lavender;height:146px;"><br />
<strong>[{info_list[3]}]</strong> {info_list[0][0:135]}...<br/><a href="{info_list[2]}"><strong><u>Detaylı bilgi</u></strong></a></div>
</div>
</div>
<p> </p>
"""
print(f"{cntr}.event is generated")
cntr += 1
template = template.encode(encoding='UTF-8',errors='strict')
return template
def save(info):
print("writing results...")
file = open("./result.txt", "ab+")
for i in info:
file.write(i)
file.close()
print("result.txt is ready!")
def app():
html = get_content()
info = extract_context(html)
save(info)
app()