Skip to content

Commit ead6328

Browse files
committed
fix wordcloud
1 parent ec1b378 commit ead6328

File tree

5 files changed

+46
-24
lines changed

5 files changed

+46
-24
lines changed

.binder/environment.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ dependencies:
1919
- r-ggpubr
2020
- r-ggthemes
2121
- r-here
22-
- libstdcxx-ng
22+
- r-bibtex
2323
- conda-build
2424
- autopep8
2525
- entrez-direct

.binder/start

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#!/bin/bash
2+
3+
# source: https://discourse.jupyter.org/t/glibcxx-3-4-26-not-found-from-rstudio/7778/8
4+
set -e
5+
export LD_LIBRARY_PATH=${NB_PYTHON_PREFIX}/lib:${LD_LIBRARY_PATH}
6+
exec "$@"

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,3 +29,5 @@ src/timeline.html
2929
.bashrc
3030

3131
.jupyter-server-log.txt
32+
33+
src/timeline/timeline.html

.here

Whitespace-only changes.

src/wordcloud/wordcloud.R

Lines changed: 37 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,45 @@
1-
library(bib2df)
2-
library(dplyr)
3-
library(tidytext)
4-
library(stringr)
5-
library(wordcloud)
6-
library(knitr)
7-
library(readr)
1+
library("bibtex")
2+
library("dplyr")
3+
library("tidytext")
4+
library("stringr")
5+
library("wordcloud")
6+
library("knitr")
7+
library("readr")
8+
library("here")
89

9-
pal <- brewer.pal(8,"Dark2")
10+
pal <- brewer.pal(8, "Dark2")
1011

11-
useFullText<-TRUE
12+
useFullText <- FALSE
1213

13-
if(useFullText==TRUE){
14-
#full text from pdfs
15-
readr::read_file("../data/citations/tokens.txt.gz") %>%
16-
stringr::str_replace_all("'","") %>%
17-
stringr::str_replace_all("\\[","") %>%
18-
stringr::str_replace_all("\\]","") %>%
19-
stringr::str_replace_all(" ","") %>%
20-
stringr::str_split(pattern=',',simplify = TRUE) %>%
14+
if (useFullText == TRUE) {
15+
#full text from pdfs, cannot be shared publicly
16+
readr::read_file("../data/citations/tokens.txt.gz") %>%
17+
stringr::str_replace_all("'", "") %>%
18+
stringr::str_replace_all("\\[", "") %>%
19+
stringr::str_replace_all("\\]", "") %>%
20+
stringr::str_replace_all(" ", "") %>%
21+
stringr::str_split(pattern = ",", simplify = TRUE) %>%
2122
stringr::str_to_lower() -> tokenvec
22-
data.frame(word=tokenvec) %>% anti_join(stop_words) %>% count(word, sort = TRUE) %>% ungroup() -> tokens_clean
23-
}else{
23+
data.frame(word = tokenvec) %>%
24+
anti_join(stop_words) %>%
25+
count(word, sort = TRUE) %>%
26+
ungroup() -> tokens_clean
27+
} else {
2428
#just the abstracts
25-
path<-"../data/citations/metadata-in-rcr-refs.bib"
26-
df <- bib2df(path)
27-
df %>% dplyr::filter(!is.na(ABSTRACT)) %>% unnest_tokens(word,ABSTRACT) %>% anti_join(stop_words) %>% count(word, sort = TRUE) %>% ungroup() -> tokens_clean
29+
path <- here::here("data/citations/metadata-in-rcr-refs.bib")
30+
bib <- bibtex::read.bib(path)
31+
df <- data.frame(`ABSTRACT` = unlist(
32+
sapply(bib, function(b) { b$abstract })))
33+
df %>% dplyr::filter(!is.na(ABSTRACT)) %>%
34+
unnest_tokens(word, ABSTRACT) %>%
35+
anti_join(stop_words) %>%
36+
count(word, sort = TRUE) %>%
37+
ungroup() -> tokens_clean
2838
}
2939

3040
tokens_clean %>%
31-
with(wordcloud(word, n, random.order = FALSE, max.words = 100, colors=pal)) -> word_cloud
41+
with(wordcloud(word,
42+
n,
43+
random.order = FALSE,
44+
max.words = 100,
45+
colors = pal)) -> word_cloud

0 commit comments

Comments
 (0)