-
Notifications
You must be signed in to change notification settings - Fork 1
/
scraper.py
71 lines (53 loc) · 2 KB
/
scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
from bs4 import BeautifulSoup
from requests import get
import pandas as pd
import time
import json
from datetime import datetime
import os
def saveToJson(data):
with open("./results.json", "w+") as f:
json.dump(data, f, indent=3)
def readJson():
with open('./results.json', encoding='utf-8') as f:
return json.load(f)
if not os.path.exists("./results.json"):
saveToJson({})
results = {}
def scrape():
url = 'https://www.blockchain.com/btc/unconfirmed-transactions'
response = get(url)
HTML = BeautifulSoup(response.text, 'html.parser')
hashes = HTML.find_all(
'a', class_='sc-1r996ns-0 fLwyDF sc-1tbyx6t-1 kCGMTY iklhnl-0 eEewhk d53qjk-0 ctEFcK')
extra = HTML.find_all(
'span', class_='sc-1ryi78w-0 cILyoi sc-16b9dsl-1 ZwupP u3ufsr-0 eQTRKC')
hashes = [h.contents[0] for h in hashes]
extra = [e.contents[0].replace('$', '').replace(',', '') for e in extra]
newlist = []
index = 0
for iterations in range(150 // 3):
i = iterations * 3
newlist.append(
[hashes[index], extra[i], extra[i+1], float(extra[i+2])])
index += 1
df = pd.DataFrame(newlist, columns=[
'Hash', 'Time', 'Amount (BTC)', 'Amount (USD)'])
# THIS LINE IS JUST HERE FOR THE JSON PART NOT THE TASK PART
highest_df = df.sort_values(by=['Amount (USD)'], ascending=False).head(1)
# SAVING HIGHEST RESULT TO JSON FILE
try:
results = readJson()
today = datetime.now().strftime('%Y-%m-%d')
jsonObject = highest_df.to_json(orient='records')
object = json.loads(jsonObject.replace('[', '').replace(']', ''))
if f'{today}' in results:
results[f'{today}'].append(object)
saveToJson(results)
else:
results = {f"{today}": [object]}
saveToJson(results)
except:
print("ERROR -> Saving To Json")
# RETURNING DF TO SEND TO REDIS
return df