-
-
Notifications
You must be signed in to change notification settings - Fork 10
/
w19_nyt.R
executable file
·130 lines (113 loc) · 4.51 KB
/
w19_nyt.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# TidyTuesday 2022 week 19 - NYT
# source: https://github.com/rfordatascience/tidytuesday/blob/master/data/2022/2022-05-10/readme.md
# Housekeeping: clean the space before to start and set the working dorectory to your .R file source
rm(list=ls())
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
# load the libraries
library(tidyverse)
library(forcats)
library(ggridges)
library(showtext)
library(cowplot)
# set the font
showtext_auto(enable = T)
sysfonts::font_families_google()
sysfonts::font_add_google("Abril Fatface", "Abril Fatface")
# load data
nyt_titles <- readr::read_tsv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-05-10/nyt_titles.tsv')
nyt_full <- readr::read_tsv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-05-10/nyt_full.tsv')
# look at the data
nyt_titles%>%head
nyt_full%>%head
# a bit of wrangling
df <- nyt_titles %>%
full_join(nyt_full,by=c("year","title","author"))
df2 <- df %>%
mutate(year_fct = fct_rev(as.factor(year))) %>% #
filter(!str_detect(author,"Edited|edited|created|compiled|Completed|NO AUTHOR| and |Illustrated| with |translated"))%>%
mutate(author=gsub("! by |? by |?by |\\?|\"|, Jr| Jr|\\.$| writing as.*","",author)) %>%
group_by(author) %>%
summarise(year_fct,
avg_rank=mean(rank),
scale=scale(rank,center = F),
scale_pct=scale/sum(scale),
avg_rank_pct=avg_rank/sum(avg_rank),
id=n()) %>%
ungroup()
# set the dataset for the geom_text labels
side_labels <- df2 %>%
group_by(year_fct,author)%>%
summarize(top=max(scale),scale_pct=mean(scale_pct))%>%
distinct()%>%
filter(top==max(top))%>%
mutate(lab=paste(author,"in",year_fct),
lab2=paste(author,"ranked",round(top),"on avg in",year_fct))%>%
ungroup() %>%
select(year_fct,lab,scale_pct)%>%
arrange(desc(year_fct))
# make the plot
df2 %>%
# reorder different authors within the same year along with the percentages values
# this will reorder the density courves for each year
mutate(author=fct_reorder(author,scale_pct)) %>%
ggplot(aes(x=scale_pct, y=year_fct)) +
geom_density_ridges(aes(fill=author),
show.legend = F,
size=0.3,
scale=1,
alpha = .8,
color = "grey25",
from = 0, to = 1) +
geom_label(data=side_labels,
aes(x=0.5,y=year_fct,label=lab),
label.padding = unit(0.05, "lines"),
label.r = unit(0.5, "lines"),
label.size = 0,
family="Abril Fatface",size=10,
inherit.aes = F,hjust=0,vjust=0)+
scale_y_discrete(expand = c(0, 0)) +
scale_x_continuous(expand = expansion(mult=c(0,-0.35),
add=c(0, -0.02))) +
scale_fill_grey(
start = 0.2,
end = 0.9,
na.value = "red",
aesthetics = "fill") +
labs(title="The New York Times",
subtitle="Solo author ranks from 1931 to 2020",
caption="DataSource: Post45 Data Collective NYT HARDCOVER FICTION BESTSELLERS\nDataViz: Federica Gazzelloni (@fgazzelloni)",
x="Rank density",y="Year") +
coord_cartesian(clip = "off") +
theme_ridges(grid = FALSE)+
theme(text = element_text(family="Abril Fatface",size=45),
plot.title = element_text(size=90),
plot.caption = element_text(hjust=1),
axis.text.y = element_text(size=30,hjust=0),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
plot.background = element_rect(color="grey90",fill="grey90"),
panel.background = element_rect(color="grey90",fill="grey90"))
# save first partial version
ggsave("partial.png",
dpi=320,
height = 14,
width = 10)
# frame the graphics and add a notation with {cowplot}
# it helps reducing time when setting the text position.
ggdraw()+
draw_image("partial.png") +
draw_label("How to read it:
On average, authors rank 7.6 based on weekly frequencies on NYT,
which corresponds to 3.4% of the total scaled avg-ranks.
Each year shown in the graph represents the density curve of the
ranks for the NYT's solo authors in that year.
The density curves are ordered by total percentage of scaled ranks.
On the right is the author with the avg-weekly highest rank for the year.",
lineheight = 0.25,hjust=0,
x=0.04,y=0.05,fontfamily="Abril Fatface",size=25)
# save the final version
ggsave("w19_nyt.png",
dpi=320,
height = 12,
width = 9)
#####--------#####