-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfetch_more.py
97 lines (76 loc) · 3.93 KB
/
fetch_more.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#!/usr/bin/python
import os
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.firefox.options import Options
import time
import concurrent.futures
# Set the path to the geckodriver executable
geckodriver_path = os.path.join(os.getcwd(), 'geckodriver')
# Configure Firefox options for headless browsing
firefox_options = Options()
firefox_options.add_argument("--headless")
# Launch Firefox WebDriver with headless options
driver = webdriver.Firefox(executable_path=geckodriver_path, options=firefox_options)
base_url = 'https://www.tradingview.com/scripts/'
def download_page(page):
url = base_url + f'page-{page}/?script_type=strategies&script_access=open&sort=month_popular&route_range=1'
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
script_items = soup.find_all('div', class_='tv-feed__item')
if not script_items:
return # Return if there are no more script items on the page
for item in script_items:
if item.get('data-widget-type') == 'idea':
script_data = item.get('data-widget-data')
script_name = item.find('a', class_='tv-widget-idea__title').text.strip()
script_author = item.find('span', class_='tv-card-user-info__name').text.strip()
script_description = item.find('p', class_='tv-widget-idea__description-row').text.strip()
print('Script Name:', script_name)
print('Author:', script_author)
print('Description:', script_description)
print('---')
# Extract PineScript code
script_url = item.find('a', class_='tv-widget-idea__title').get('href')
script_full_url = 'https://www.tradingview.com' + script_url
# Open the script page with Firefox
driver.get(script_full_url)
time.sleep(1) # Add a delay to allow the page to fully load
try:
# Find and expand the collapse buttons
collapse_buttons = driver.find_elements(By.CLASS_NAME, "collapseBtn")
for button in collapse_buttons:
driver.execute_script("arguments[0].click();", button)
time.sleep(0.5) # Add a delay after expanding each collapse button
# Get the PineScript code element
script_code_element = driver.find_element(By.CLASS_NAME, "tv-chart-view__script-wrap")
# Get the PineScript code text using innerText property
script_code = script_code_element.get_property("innerText").strip()
# Create directory if it doesn't exist
directory = 'PineScripts'
os.makedirs(directory, exist_ok=True)
# Replace invalid characters in script name
invalid_characters = '/\\?%*:|"<>'
for char in invalid_characters:
script_name = script_name.replace(char, '-')
# Save PineScript to a file with .pine extension
filename = os.path.join(directory, f"{script_name}.pine")
with open(filename, 'w') as file:
file.write(f"Script Name: {script_name}\n")
file.write(f"Author: {script_author}\n")
file.write(f"Description: {script_description}\n")
file.write("PineScript code:\n")
file.write(script_code)
print(f"Saved PineScript code to {filename}\n")
except NoSuchElementException:
print(f"Element not found. Skipping script: {script_name}")
# Number of pages to download concurrently
num_pages = 3
with concurrent.futures.ThreadPoolExecutor() as executor:
page_range = range(1, num_pages + 1)
executor.map(download_page, page_range)
# Close the WebDriver
driver.quit()