-
Notifications
You must be signed in to change notification settings - Fork 0
/
ancver.py
94 lines (82 loc) · 3.03 KB
/
ancver.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
'''
anchor verify
a website management tool
USE: python3 anchor.py URL print|try|open|
print : report request status to console
try : report status to console and launch error URLs in browser
open : launch all URLs in browser
Anchor tags must be marked with 'scan'. Example:
<a scan href='https://somedomain.com'>Some Domain</a>
'''
from bs4 import BeautifulSoup
import requests
import webbrowser
import sys
from termcolor import cprint
err = {
400: "Bad Request",
401: "Unauthorized",
402: "Payment Required",
403: "Forbidden",
404: "Not Found",
405: "Method Not Allowed",
406: "Not Acceptable",
407: "Proxy Authentication Required",
408: "Request Timeout",
409: "Conflict",
410: "Gone",
411: "Length Required",
300: "Multiple Choices",
301: "Moved Permanently",
302: "Found",
303: "See Other",
304: "Not Modified",
305: "Use Proxy",
307: "Temporary Redirect",
308: "Permanent Redirect"
} # list is incomplete!
errs = 0 # to count errors
if len(sys.argv) != 3:
cprint("Missing args: {URL} {print|try|open}", "yellow")
sys.exit(1)
r = requests.get(sys.argv[1], timeout=(4,5)) # loads the page with the links to be tested
print("starting page returned: ", r.status_code)
data = r.text
soup = BeautifulSoup(data, 'html.parser')
anchor_tags = soup.find_all('a', attrs={'scan': True}) # locate anchor tags with "scan" attribute
urls = [tag['href'] for tag in anchor_tags if 'href' in tag.attrs] # create list of URLs
action = sys.argv[2].lower() # "print", "try", or "open"
# Process the list of URLs
for url in urls:
if errs > 30:
print("Too many errors!")
sys.exit(1)
if url.startswith("http"): # only looking at fully qualified URLs
if action == "open":
webbrowser.open(url)
else:
print("----------")
print(url)
try:
r = requests.head(url, timeout=(4,11)) # using 'head', faster than 'get'
except requests.exceptions.Timeout:
cprint("REQUEST TIMED OUT, try URL in browser", 'red', attrs=['bold',])
errs += 1
if action == "try":
webbrowser.open(url)
except Exception as e:
cprint("REQUEST FAILED: invalid URL?, or try URL in browser.",
'red',
attrs=['bold',])
errs += 1
if action == "try":
webbrowser.open(url)
else:
error_msg = err.get(r.status_code, " OK")
# Many status codes other than 200 will occur
# because of website security and the fact that
# our requests are not made from a browser!
# So these are assumed to be OK and they'll
# be printed out with the code and message.
print(r.status_code, error_msg)
print(f"\nFound {errs} errors.")