forked from sunner/buzz2weibo
-
Notifications
You must be signed in to change notification settings - Fork 0
/
formaturl.py
44 lines (39 loc) · 1.17 KB
/
formaturl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
from urllib2 import urlopen, URLError
import re
import os, errno, sys
blackurls = ["t.co", "bit.ly","goo.gl","tinyurl.com", "is.gd"]
def changeurl(url):
global blackurls
found = 0
for whiteurl in blackurls:
if url.find(whiteurl)>=0:
found = 1
if found == 0:
return url
p = url.find("://")
if p <0:
return url
return url[p+3:]
def shortenurlfromshorl(url):
global blackurls
for whiteurl in blackurls:
if url.find(whiteurl)>=0:
return url
url1="http://shorl.com/create.php?url="+url+"&go=Shorlify!"
sock = urlopen(url1)
s = sock.read()
sock.close()
# s = 'riginal <span class="caps">URL</span>: http://www.nba.com<br> Shorl: <a href="http://shorl.com/hupryprygafadra" rel="nofollow">http://shorl.com/hupryprygafadra</a></a><br>'
regexpr = "Shorl: <a href=\".*\" "
p=re.compile(regexpr)
m = p.search(s)
if m:
surl = m.group()
surl = surl.rstrip("\" ")
surl = surl.replace('Shorl: <a href="','')
return surl
else:
return ""
if __name__ == '__main__':
surl = shortenurlfromshorl("http://www.nba.com")
print(surl)