-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathlanguages.Rmd
161 lines (148 loc) · 6.16 KB
/
languages.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
---
title: "Languages"
link-citations: true
csl: apa.csl
output:
html_document:
include:
in_header: ./html/header_languages.html
editor_options:
chunk_output_type: console
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = FALSE, message = FALSE, warning = FALSE)
library(tidyverse)
library(DT)
```
This page includes a list of the idioms represented in the TALD database. The East Caucasian language family is very rich in terms of dialectal differentiation. There is no unified genealogical classification of all idioms spoken in Daghestan, so we compiled our own genealogical classification that tries to merge together different traditional and modern approaches to language/dialect classification of Daghestanian languages. In this tree you can also find the geneological affiliation of each village represented in TALD. This classification is different from the one in [the Glottolog database](https://glottolog.org/resource/languoid/id/nakh1245). In the first tabset you can find a table that contains all the idioms represented in TALD and the chapter(s) in which they appear. In the second tabset you can find a tree that represents the genealogical classification that we use in the current version of TALD.
## {.tabset .tabset-fade .tabset-pills}
### TALD Chapters by language
```{r}
map(list.files("data/orig_table", full.names = TRUE), function(file){
readxl::read_xlsx("data/contributors.xlsx", guess_max = 4000) |>
filter(render == 1) |>
rowwise() |>
mutate(id_0 = sprintf(str_c("%0", nchar(max(id))+1, "d_"), id),
Chapter = str_c("- [",
title,
"](",
str_c(id_0, filename),
".html)")) |>
select(filename, Chapter) ->
features
feature_dataset <- read_tsv(file, show_col_types = FALSE)
feature_dataset |>
select(str_which(colnames(feature_dataset), 'value\\d{1,}_name$')) |>
pivot_longer(cols = everything()) |>
distinct() |>
mutate(name = str_remove(name, '_name')) ->
columns_rename
feature_dataset |>
select(lang, idiom, matches("value\\d")) |>
select(-matches("numeric")) |>
rename_with(function(x){columns_rename$value[match(x, columns_rename$name)]}, matches('value\\d{1,}$')) |>
select(-matches("value\\d{1,}\\_name")) |>
mutate(across(everything(), as.character)) |>
pivot_longer(names_to = "Feature", values_to = "Value", -c(lang:idiom)) |>
filter(!is.na(Value)) |>
rename(Language=lang,
Idiom = idiom) |>
mutate(filename = str_remove(file, "data/orig_table/"),
filename = str_remove(filename, ".tsv$")) |>
left_join(features, by = join_by(filename)) |>
select(Language, Idiom, Chapter)
}) |>
list_rbind() |>
mutate(across(everything(), as.factor)) |>
arrange(Idiom, Language) |>
distinct() |>
group_by(Idiom, Language) |>
summarise(Chapter = str_c(Chapter, collapse = "\n")) |>
mutate(Chapter = markdown::renderMarkdown(Chapter)) |>
arrange(Language) |>
DT::datatable(filter = 'top',
rownames = FALSE,
escape = FALSE,
options = list(
pageLength = 20,
autoWidth = TRUE,
dom = 'fltpi'))
```
### Genealogical classification of TALD idioms
```{r}
read_tsv("data/tald_villages.csv") |>
filter(!is.na(village_dialect)) |>
mutate(village_dialect = str_c(village_dialect, " (", rus_village, ")")) |>
distinct(family, aff, default_level, dialect_nt1, dialect_nt2, dialect_nt3,
village_dialect) |>
arrange(family, aff, default_level, dialect_nt1, dialect_nt2, dialect_nt3,
village_dialect) |>
mutate(id = 1:n()) |>
pivot_longer(names_to = "cols", values_to = "value", -id) |>
na.omit() |>
group_by(id) |>
mutate(new_col = letters[1:n()]) |>
select(-cols) |>
ungroup() |>
pivot_wider(names_from = new_col, values_from = value) |>
mutate(L1 = a |> factor() |> as.double()) |>
group_by(a) |>
mutate(L3 = b |> factor() |> as.double()) |>
group_by(b) |>
mutate(L5 = c |> factor() |> as.double()) |>
group_by(c) |>
mutate(L7 = d |> factor() |> as.double()) |>
group_by(d) |>
mutate(L9 = e |> factor() |> as.double()) |>
group_by(e) |>
mutate(L11 = f |> factor() |> as.double()) |>
ungroup() |>
pivot_longer(names_to = "col", values_to = "value", a:f) |>
filter(!is.na(value)) |>
mutate(L2 = ifelse(col == "a", "text", "children"),
L4 = case_when(col == "b" ~ "text",
col %in% c("a") ~ NA,
TRUE ~ "children"),
L3 = ifelse(is.na(L4), NA, L3),
L6 = case_when(col == "c" ~ "text",
col %in% c("a", "b") ~ NA,
TRUE ~ "children"),
L5 = ifelse(is.na(L6), NA, L5),
L8 = case_when(col == "d" ~ "text",
col %in% c("a", "b", "c") ~ NA,
TRUE ~ "children"),
L7 = ifelse(is.na(L8), NA, L7),
L10 = case_when(col == "e" ~ "text",
col %in% c("a", "b", "c", "d") ~ NA,
TRUE ~ "children"),
L9 = ifelse(is.na(L10), NA, L9),
L12 = case_when(col == "f" ~ "text",
col %in% c("a", "b", "c", "d", "e") ~ NA,
TRUE ~ "children"),
L11 = ifelse(is.na(L12), NA, L11),
L4 = ifelse(is.na(L3), NA, L4),
L6 = ifelse(is.na(L5), NA, L6),
L8 = ifelse(is.na(L7), NA, L8),
L10 = ifelse(is.na(L9), NA, L10)) |>
filter(!is.na(value)) |>
distinct(L1, L2, L3, L4, L5, L6, L7, L8, L9, L10, L11, L12, value) |>
mutate_all(as.character) |>
mutate(id = 1:n()) ->
result
result |>
mutate(across(L1:L12, function(i){ifelse(i == "text", "icon", i)}),
value = "FALSE") |>
bind_rows(result) |>
arrange(id) |>
select(-id) |>
rrapply::rrapply(how = "unmelt") |>
rrapply::rrapply(condition = function(x) x == "FALSE",
f = as.logical,
how = "replace") |>
modify_tree(pre = function(x) {
nm <- names(x)
if (1 %in% nm) unname(x)
else x
}) |>
jsTreeR::jstree(search = TRUE)
```