Skip to content

Commit d50784b

Browse files
authored
Add files via upload
1 parent 4f1dce4 commit d50784b

File tree

3 files changed

+182
-0
lines changed

3 files changed

+182
-0
lines changed
Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"from time import sleep\n",
10+
"import scrapy\n",
11+
"import pandas as pd\n",
12+
"from scrapy import Spider\n",
13+
"from selenium import webdriver\n",
14+
"from scrapy.selector import Selector\n",
15+
"from io import BytesIO\n",
16+
"from PIL import Image\n",
17+
"import os\n",
18+
"import requests"
19+
]
20+
},
21+
{
22+
"cell_type": "code",
23+
"execution_count": 2,
24+
"metadata": {},
25+
"outputs": [
26+
{
27+
"name": "stdout",
28+
"output_type": "stream",
29+
"text": [
30+
"1951\n"
31+
]
32+
}
33+
],
34+
"source": [
35+
"imageID = []\n",
36+
"sl_no = []\n",
37+
"imageLikes = []\n",
38+
"i = 0\n",
39+
"instaccountlink = \"https://instagram.com/audi\"\n",
40+
"instaaccountname = \"Audi\"\n",
41+
"driver = webdriver.Chrome(\"driver/driver\")\n",
42+
"driver.get(instaccountlink)\n",
43+
"unique_urls = []\n",
44+
"while i<300:\n",
45+
" i = i +1\n",
46+
" sel = Selector(text=driver.page_source)\n",
47+
" \n",
48+
" url = sel.xpath('//div[@class=\"v1Nh3 kIKUG _bz0w\"]/a/@href').extract()\n",
49+
" for u in url:\n",
50+
" if u not in unique_urls:\n",
51+
" unique_urls.append(u)\n",
52+
" \n",
53+
" driver.execute_script(\"window.scrollTo(0, document.body.scrollHeight);\")\n",
54+
" sel = Selector(text=driver.page_source)\n",
55+
" url = sel.xpath('//div[@class=\"v1Nh3 kIKUG _bz0w\"]/a/@href').extract()\n",
56+
" sleep(1)\n",
57+
" for u in url:\n",
58+
" if u not in unique_urls:\n",
59+
" unique_urls.append(u)\n",
60+
" \n",
61+
"driver.quit()\n",
62+
"print(len(unique_urls))"
63+
]
64+
},
65+
{
66+
"cell_type": "code",
67+
"execution_count": 3,
68+
"metadata": {},
69+
"outputs": [
70+
{
71+
"name": "stdout",
72+
"output_type": "stream",
73+
"text": [
74+
"file saved successfully\n"
75+
]
76+
}
77+
],
78+
"source": [
79+
"file = open(\"output/audi_instagram_11_07_2019.csv\",\"a\")\n",
80+
"for u in unique_urls :\n",
81+
" file.write(u)\n",
82+
" file.write(\"\\n\")\n",
83+
"file.close()\n",
84+
"print(\"file saved successfully\")"
85+
]
86+
},
87+
{
88+
"cell_type": "code",
89+
"execution_count": 4,
90+
"metadata": {},
91+
"outputs": [],
92+
"source": [
93+
"# saving the images to specified directory\n",
94+
"driver = webdriver.Chrome('driver/driver')\n",
95+
"\n",
96+
"image_urls = []\n",
97+
"count = 0\n",
98+
"max_no_of_iteration=250\n",
99+
"for u in unique_urls:\n",
100+
" try:\n",
101+
" driver.get('http://instagram.com'+u)\n",
102+
" sel = Selector(text=driver.page_source)\n",
103+
"\n",
104+
" src= sel.xpath('//div/img/@src').extract()[0]\n",
105+
"# print(src)\n",
106+
" r = requests.get(src)\n",
107+
" \n",
108+
" image = Image.open(BytesIO(r.content))\n",
109+
"# path = \"C:/Users/carbon/Desktop/output/\"+instaAccountName+str(count)+\".\" + image.format \n",
110+
" path = \"output/\"+instaaccountname+str(count)+\".\" + image.format\n",
111+
"# print(image.size, image.format, image.mode)\n",
112+
" q1=''\n",
113+
" q2=''\n",
114+
" try:\n",
115+
" image.save(path, image.format)\n",
116+
" q1 = instaaccountname+str(count)\n",
117+
" q2 = sel.xpath('//span/span/text()').extract_first()\n",
118+
"# print(q1)\n",
119+
"# print(q2)\n",
120+
"\n",
121+
" except IOError:\n",
122+
" q1=''\n",
123+
" q2=''\n",
124+
" imageID.insert(len(imageID),q1)\n",
125+
" imageLikes.insert(len(imageLikes),q2)\n",
126+
" sl_no.insert(len(sl_no),str(count))\n",
127+
" count = count + 1\n",
128+
" if count > max_no_of_iteration:\n",
129+
" driver.quit()\n",
130+
" df = pd.DataFrame({'ImageID':imageID,'Sl_no':sl_no, 'ImageLikes':imageLikes})\n",
131+
" fileName = instaaccountname+str('.csv')\n",
132+
" df.to_csv(fileName, index=False)\n",
133+
" break\n",
134+
"\n",
135+
"\n",
136+
" except:\n",
137+
" pass\n",
138+
"\n",
139+
"try:\n",
140+
" driver.quit()\n",
141+
"except:\n",
142+
" pass"
143+
]
144+
},
145+
{
146+
"cell_type": "code",
147+
"execution_count": null,
148+
"metadata": {},
149+
"outputs": [],
150+
"source": []
151+
}
152+
],
153+
"metadata": {
154+
"kernelspec": {
155+
"display_name": "Python 3",
156+
"language": "python",
157+
"name": "python3"
158+
},
159+
"language_info": {
160+
"codemirror_mode": {
161+
"name": "ipython",
162+
"version": 3
163+
},
164+
"file_extension": ".py",
165+
"mimetype": "text/x-python",
166+
"name": "python",
167+
"nbconvert_exporter": "python",
168+
"pygments_lexer": "ipython3",
169+
"version": "3.6.4"
170+
}
171+
},
172+
"nbformat": 4,
173+
"nbformat_minor": 2
174+
}

insta_image_saving/instructions.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
INSTRUCTIONS FOR SUCCESSFUL OPERATION :
2+
3+
--> hardcode variable "instaccountlink" with desired instagram URL
4+
--> hardcode variable "instaaccountname" with desired name
5+
--> hardcode variable "file" with desired output name
6+
--> set condition for scrolling, i.e. variable "i" which is by default set to 300
7+
--> you may need to redefine HTML tags used, as these are regularly changed as per INSTAGRAM security concerns, the HTML tags used here is latest as per best of my knowledge.

insta_image_saving/readme.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
THIS A JUPYTER NOTEBOOK THAT CAN SAVE ALL OR SOME IMAGES FROM A PUBLIC INSTAGRAM URL AS PER USER'S DIRECTIVES.

0 commit comments

Comments
 (0)