forked from PaulC91/facebook-group-scrape-python
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsource code
62 lines (49 loc) · 2.05 KB
/
source code
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# current code that I am running. contains a lot of try, except, error handling due to a lot of inconsistent json data
# i'm sure there's far more efficient ways to do this so any suggestions are welcome!
# things i'm working on:
# saving output to csv
# best way to plot number of posts, likes and comments against time
from facepy import GraphAPI
import json
# paste in an access token from the facebook developers graph API (need to sign up with your fb account logged in)
graph = GraphAPI("")
# paste the group id from url of a group you are an admin of
group_id = ""
#change limit, since and until to edit the number of posts to retreive or from what time period
data = graph.get(group_id+'/feed',page=False,limit=100,since="yyyy-mm-dd",until="yyyy-mm-dd")
jsonposts = json.dumps(data)
output = json.loads(jsonposts)
info = output[u'data']
masterlist = []
# this is currently grabbing the date of post, who posted it, title of link posted, link url, number of likes and comments
# and appending a list of the data for each post to a master list. if some of that data is not present, it will append an error
# integer or string instead
def scrape_data(a):
for i in a:
minorlist = []
minorlist.append(i[u'created_time'])
minorlist.append(i[u'from'][u'name'])
try:
minorlist.append(i[u'name'])
except:
minorlist.append("No Link")
try:
minorlist.append(i[u'link'])
except:
minorlist.append("No URL")
try:
minorlist.append(len(i[u'likes'][u'data']))
except KeyError:
minorlist.append(0)
try:
minorlist.append(len(i[u'comments'][u'data']))
except KeyError:
minorlist.append("No Comments :(")
masterlist.append(minorlist)
scrape_data(info)
# prints all collected data for each post on one line with comma seperators then data for next post on new line and so on
for item in masterlist:
try:
print ", ".join(map(str, item[0:]))
except UnicodeEncodeError:
pass