-
Notifications
You must be signed in to change notification settings - Fork 2
/
main.py
121 lines (96 loc) · 3.82 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# Instagram Scraper in Python using Selenium
# Scraper logs into Instagram and Inputs the username in the search box
# Data collected is Name ,Followers ,Following ,Posts_URL
import selenium
import time
import urllib.request
import os
import wget
import requests
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.keys import *
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
# Launch Chrome and navigate to the Instagram Page
driver = webdriver.Chrome("C:/chromedriver_win32/chromedriver.exe")
driver.get("https://www.instagram.com")
username = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.CSS_SELECTOR, "input[name='username']")))
password = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.CSS_SELECTOR, "input[name='password']")))
# clear the input boxes
username.clear()
password.clear()
# input username and password
username.send_keys("") # Your username i.e ("Peter")
password.send_keys("") # Your password goes here
# login button auto click
log_in = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable(
(By.CSS_SELECTOR, "button[type='submit']"))).click()
# not now auto click (Save login info )
not_now = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable(
(By.XPATH, "//button[contains(text(),'Not Now')]"))).click()
# not now auto click (Turn on notifications)
not_now2 = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable(
(By.XPATH, "//button[contains(text(),'Not Now')]"))).click()
# navigate to the search box input
search_box = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, "//input[@placeholder='Search']")))
search_box.clear()
# The user name we are searching
keyword = "nike"
search_box.send_keys(keyword)
search_box.send_keys(Keys.ENTER)
# select first result from search box
first_result = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.CLASS_NAME, '-qQT3')))
first_result.click()
# get username
name = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, '//*[@id="react-root"]/section/main/div/header/section/div[1]/h2')))
print("Username: " + name.text)
# get number of posts
posts = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.CLASS_NAME, 'g47SY')))
print("Posts: " + posts.text)
# number of followers
followers = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, "//*[@id='react-root']/section/main/div/header/section/ul/li[2]/a/span")))
print("Number of Followers: " + followers.text)
# number of following
following = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.XPATH, "//*[@id='react-root']/section/main/div/header/section/ul/li[3]/a/span")))
print("Number of Following: " + following.text)
time.sleep(2)
# scroll for more images
driver.execute_script("window.scrollTo(0,4000)")
time.sleep(1)
driver.execute_script("window.scrollTo(0,4000)")
# posts url
posts = []
links = driver.find_elements_by_tag_name('a')
for link in links:
post = link.get_attribute('href')
if '/p/' in post:
posts.append(post)
download_url = []
for post in posts:
headers = {'User-Agent': 'Mozilla'}
r = requests.get('{}?__a=1'.format(post), headers=headers)
data = r.json()['graphql']['shortcode_media']
shortcode = data['shortcode']
is_video = data['is_video']
if is_video:
download_url.append('This is a video file')
else:
download_url.append('This is an image file')
# joins the two lists together side by side
links = [''.join(x) for x in zip(posts, download_url)]
# prints links and whether the post is a video file or an image file
print(links)