This repository has been archived by the owner on May 10, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
twitterSearch.R
51 lines (39 loc) · 1.58 KB
/
twitterSearch.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
library(twitteR)
library(ggplot2)
library(lubridate)
library(dplyr)
library(tidytext)
library(stringr)
api_key <- "..."
api_secret <- "..."
access_token <- "..."
access_token_secret <- "..."
setup_twitter_oauth(api_key,api_secret,access_token,access_token_secret)
search_word <- "water"
number_of_tweets <- 1000
#pass a pair of coordinates (longitude, latitude) to specify location for search
loc_of_tweets <- "34.052234,-118.243685"
#and also the search radius (as in how wider around the coordinates should we search)
search_radius <- "3000km"
twitter_result <- searchTwitter(search_word, n=number_of_tweets, geocode= paste(loc_of_tweets, search_radius, sep=","))
twitter_df <- twListToDF(twitter_result)
#remove retweets
twitter_df2 <- twitter_df[twitter_df$isRetweet==FALSE,]
#make date
twitter_df2$created = ydm_hms(twitter_df2$created)
#plot when the tweets are being made
ggplot(twitter_df2, aes(x = created)) +
geom_histogram(position = "identity", bins = 20, show.legend = FALSE) +
theme_bw()
replace_reg <- "https://t.co/[A-Za-z\\d]+|http://[A-Za-z\\d]+|&|<|>|RT|https"
unnest_reg <- "([^A-Za-z_\\d#@']|'(?![A-Za-z_\\d#@]))"
tidy_tweets <- twitter_df2 %>%
filter(!str_detect(text, "^RT")) %>%
mutate(text = str_replace_all(text, replace_reg, "")) %>%
unnest_tokens(word, text, token = "regex", pattern = unnest_reg) %>%
filter(!word %in% stop_words$word,
str_detect(word, "[a-z]"))
frequency <- tidy_tweets %>%
count(word, sort = TRUE)
#remove when the word is the keyword or contains the keyword
frequency <- frequency[grepl(search_word,frequency$word)==FALSE,]