-
Notifications
You must be signed in to change notification settings - Fork 0
/
cadaveres.adoptados.R
88 lines (66 loc) · 3.04 KB
/
cadaveres.adoptados.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# This script cleans and manipulates data from this google drive spreadsheet:
# https://docs.google.com/spreadsheets/d/1nX0ZQK8I3lg2BO1i0bNsHi6-gGD0OorBNMzoz0o1VsU
# Packages and dependencies -----------------------------------------------
# Load required packages and install them if they are not available.
require(googlesheets)
suppressMessages(require(dplyr))
require(tidyr)
# Data import -------------------------------------------------------------
# Loading 'Clon de Adopta un cadáver' spreadsheet using its key.
ss = gs_key("1PvvaBcSyQP_0SYo2epQcPAU47rA--E_6KSCdMKSQKcg")
ss$n_ws # Number of worksheets.
ss_ws = 1:ss$n_ws
# Load all worksheets and store them in data frames.
for (i in seq_along(ss_ws)) {
data_temp = paste("data", i, sep = "")
assign(data_temp, gs_read(ss, ws = ss_ws[i], range = cell_cols(1:76)))
}
# Combine all worksheets into a single data frame.
data_raw = rbind(data1, data2, data3, data4, data5, data6, data7,
data8, data9, data10, data11, data12, data13, data14, data15,
data16, data17, data18, data19)
# Store the file for offline edits.
write.csv(data_raw, file = "data/input/cadaveres.adoptados_raw.csv",
na = c("", "-","---","------", NA))
# Cleanup: deleting unusued temporary dataframes.
remove(data1, data2, data3, data4, data5, data6, data7,
data8, data9, data10, data11, data12, data13, data14, data15,
data16, data17, data18, data19)
# Data manipulation -------------------------------------------------------
# Filtering out unusable rows and split latitude and latitude column into
# separate variables.
data = data_raw %>%
filter(complete.cases(nombre_promocional)) %>%
filter(complete.cases(latitud_longitud)) %>%
separate(latitud_longitud,c("latitud","longitud"), ",") %>%
# Turn - characters into NA characters.
replace("-", NA)
# Adding new information for future classification on the website.
data$corpse_category = "Adoptado"
data$superficie_terreno = as.numeric(data$superficie_terreno) / 10000
data$superficie_terreno_units = "area_hectares"
data$superficie_construida_units = "area_square_meters"
# Export file again -------------------------------------------------------
write.csv(data, file = "data/cadaveres.adoptados.csv", na = c("", "-","---","------", NA))
# Some statistics ---------------------------------------------------------
# Count padres_adoptivos' unique values
padres_adoptivos = data %>%
select(padre_adoptivo) %>%
filter(complete.cases(padre_adoptivo)) %>%
gather(variable, value) %>%
# Split multiple values in separate observations.
mutate(value = strsplit(as.character(value), ", ")) %>%
unnest(value) %>%
group_by(value) %>%
summarise(Total = n()) %>%
arrange(desc(Total))
# Count superficie_terreno
superficie_terreno_n = data %>%
select(superficie_terreno) %>%
filter(complete.cases(superficie_terreno)) %>%
summarise(Total = n())
# Count superficie_construida
superficie_construida_n = data %>%
select(superficie_construida) %>%
filter(complete.cases(superficie_construida)) %>%
summarise(Total = n())