-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathAthleta.py
52 lines (49 loc) · 2.66 KB
/
Athleta.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
'''-------------------------------------------------------------------------
Name: Athleta's Locations Scraper
Description: The Script scrapes all Athleta locations from
the website- All City links are listed under each state
The links to each city/store is scraped and processed
Does not have auto-updating capability yet
Date: 12/11/2017
Author: pverma
-------------------------------------------------------------------------'''
from selenium import webdriver
import time
import csv
with open('AthletaStores.csv', 'ab') as file:
scrapedLocations = [] # Avoid duplicate addresses
storeLinkList = []
writer = csv.writer(file, delimiter=',', quoting=csv.QUOTE_MINIMAL)
writer.writerow(['Address', 'City', 'State', 'Zipcode'])
driver = webdriver.PhantomJS(executable_path='C:/Users/u760979/Desktop/Python/Executables/phantomjs.exe') #headless browser
# driver = webdriver.Firefox(executable_path='C:/Users/u760979/Desktop/Python/Executables/geckodriver.exe')
url = 'http://stores.athleta.net/' #url for map
driver.get(url) #retrieve url
time.sleep(5)
'''---Get links from the state page---'''
linksFile = open('linksFile.txt', 'w')
storeLinks = driver.find_elements_by_xpath('/html/body/div[1]/div[4]/div[2]/div/div/div[*]/div[*]/a')
for link in storeLinks:
link = link.get_attribute("href")
linksFile.write("%s\n" % link)
driver.quit()
linksFile.close()
storeLinkList = open("linksFile.txt").readlines()
'''---Get Address from links---'''
for link in storeLinkList:
storeDriver = webdriver.PhantomJS(executable_path='C:/Users/u760979/Desktop/Python/Executables/phantomjs.exe')
storeDriver.get(link) # retrieve url
time.sleep(15)
Address = storeDriver.find_element_by_xpath('//*[@id="widget_Athleta_Stores_Details_div"]/div[1]/address/span[1]').text #get addresses
City = storeDriver.find_element_by_xpath('//*[@id="widget_Athleta_Stores_Details_div"]/div[1]/address/span[2]').text
State = storeDriver.find_element_by_xpath('//*[@id="widget_Athleta_Stores_Details_div"]/div[1]/address/span[3]').text
Zipcode = storeDriver.find_element_by_xpath('//*[@id="widget_Athleta_Stores_Details_div"]/div[1]/address/span[4]').text
if Address != '':
Address = Address.split('\n')[0]
if Address not in scrapedLocations: #Check if Location was pulled
scrapedLocations.append(Address)
writer.writerow([Address,City,State,Zipcode]) #write to file
print Address,City,State,Zipcode
file.flush()
storeDriver.quit()
time.sleep(15)