diff --git a/DESCRIPTION b/DESCRIPTION index 03f4b57..3ed13ee 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: phonfieldwork Type: Package Title: Linguistic Phonetic Fieldwork Tools -Version: 0.0.12 +Version: 0.0.13 Depends: R (>= 3.5.0) Imports: tuneR, @@ -11,7 +11,7 @@ Imports: graphics, rmarkdown, xml2, - uchardet, + readr, tools, mime Authors@R: c( @@ -47,7 +47,7 @@ URL: https://CRAN.R-project.org/package=phonfieldwork, https://docs.ropensci.org BugReports: https://github.com/ropensci/phonfieldwork/issues Encoding: UTF-8 LazyData: true -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.1 VignetteBuilder: knitr Suggests: knitr, diff --git a/NAMESPACE b/NAMESPACE index a4b3617..b0998ab 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -44,6 +44,7 @@ importFrom(graphics,rect) importFrom(graphics,segments) importFrom(graphics,text) importFrom(mime,guess_type) +importFrom(readr,guess_encoding) importFrom(rmarkdown,render) importFrom(stats,fft) importFrom(stats,na.omit) @@ -55,7 +56,6 @@ importFrom(tuneR,extractWave) importFrom(tuneR,readMP3) importFrom(tuneR,readWave) importFrom(tuneR,writeWave) -importFrom(uchardet,detect_file_enc) importFrom(utils,installed.packages) importFrom(utils,read.delim) importFrom(utils,write.csv) diff --git a/R/formant_to_df.R b/R/formant_to_df.R index 00b3f25..2b04b2c 100644 --- a/R/formant_to_df.R +++ b/R/formant_to_df.R @@ -13,7 +13,7 @@ #' formant_to_df(system.file("extdata", "e.Formant", package = "phonfieldwork")) #' @export #' -#' @importFrom uchardet detect_file_enc +#' @importFrom readr guess_encoding #' formant_to_df <- function(file_name) { @@ -22,7 +22,8 @@ formant_to_df <- function(file_name) { formant <- file_name } else { # thanks to Artem Klevtsov for this code - con <- file(file_name, encoding = uchardet::detect_file_enc(file_name)) + con <- file(file_name, + encoding = readr::guess_encoding(file_name)$encoding) formant <- readLines(con) close(con) } diff --git a/R/intensity_to_df.R b/R/intensity_to_df.R index 5e8350d..f30ad00 100644 --- a/R/intensity_to_df.R +++ b/R/intensity_to_df.R @@ -13,7 +13,7 @@ #' intensity_to_df(system.file("extdata", "test.Intensity", package = "phonfieldwork")) #' @export #' -#' @importFrom uchardet detect_file_enc +#' @importFrom readr guess_encoding #' intensity_to_df <- function(file_name) { @@ -23,7 +23,8 @@ intensity_to_df <- function(file_name) { intensity <- file_name } else { # thanks to Artem Klevtsov for this code - con <- file(file_name, encoding = uchardet::detect_file_enc(file_name)) + con <- file(file_name, + encoding = readr::guess_encoding(file_name)$encoding) intensity <- readLines(con) close(con) } diff --git a/R/pitch_to_df.R b/R/pitch_to_df.R index 4277761..cb161da 100644 --- a/R/pitch_to_df.R +++ b/R/pitch_to_df.R @@ -16,7 +16,7 @@ #' pitch_to_df(system.file("extdata", "test.Pitch", package = "phonfieldwork")) #' @export #' -#' @importFrom uchardet detect_file_enc +#' @importFrom readr guess_encoding #' pitch_to_df <- function(file_name, @@ -26,7 +26,8 @@ pitch_to_df <- function(file_name, pitch <- file_name } else { # thanks to Artem Klevtsov for this code - con <- file(file_name, encoding = uchardet::detect_file_enc(file_name)) + con <- file(file_name, + encoding = readr::guess_encoding(file_name)$encoding) pitch <- readLines(con) close(con) } diff --git a/R/set_textgrid_names.R b/R/set_textgrid_names.R index 23f1063..808b176 100644 --- a/R/set_textgrid_names.R +++ b/R/set_textgrid_names.R @@ -19,7 +19,6 @@ #' ) #' @export #' -#' @importFrom uchardet detect_file_enc #' set_textgrid_names <- function(textgrid, diff --git a/R/srt_to_df.R b/R/srt_to_df.R index 4e7928a..2df1247 100644 --- a/R/srt_to_df.R +++ b/R/srt_to_df.R @@ -13,13 +13,14 @@ #' srt_to_df(system.file("extdata", "test.srt", package = "phonfieldwork")) #' @export #' -#' @importFrom uchardet detect_file_enc +#' @importFrom readr guess_encoding #' srt_to_df <- function(file_name) { # thanks to Artem Klevtsov for this code - con <- file(file_name, encoding = uchardet::detect_file_enc(file_name)) + con <- file(file_name, + encoding = readr::guess_encoding(file_name)$encoding) srt <- readLines(con) close(con) diff --git a/R/textgrid_to_df.R b/R/textgrid_to_df.R index 8db9cd7..cb2fa0d 100644 --- a/R/textgrid_to_df.R +++ b/R/textgrid_to_df.R @@ -21,7 +21,6 @@ #' )) #' @export #' -#' @importFrom uchardet detect_file_enc #' textgrid_to_df <- function(file_name) { diff --git a/R/utils.r b/R/utils.r index 1fb570f..d555c01 100644 --- a/R/utils.r +++ b/R/utils.r @@ -3,7 +3,7 @@ #' @author George Moroz #' @param file_name string with a filename or path to the TextGrid #' @noRd -#' @importFrom uchardet detect_file_enc +#' @importFrom readr guess_encoding #' read_textgrid <- function(file_name) { @@ -11,7 +11,8 @@ read_textgrid <- function(file_name) { tg <- file_name } else { # thanks to Artem Klevtsov for this code - con <- file(file_name, encoding = uchardet::detect_file_enc(file_name)) + con <- file(file_name, + encoding = readr::guess_encoding(file_name)$encoding) tg <- readLines(con) close(con) } diff --git a/codemeta.json b/codemeta.json index f5e039e..ee75941 100644 --- a/codemeta.json +++ b/codemeta.json @@ -8,19 +8,13 @@ "codeRepository": "https://github.com/ropensci/phonfieldwork", "issueTracker": "https://github.com/ropensci/phonfieldwork/issues", "license": "https://spdx.org/licenses/GPL-2.0", - "version": "0.0.12", + "version": "0.0.13", "programmingLanguage": { "@type": "ComputerLanguage", "name": "R", "url": "https://r-project.org" }, - "runtimePlatform": "R version 4.2.2 Patched (2022-11-10 r83330)", - "provider": { - "@id": "https://cran.r-project.org", - "@type": "Organization", - "name": "Comprehensive R Archive Network (CRAN)", - "url": "https://cran.r-project.org" - }, + "runtimePlatform": "R version 4.3.3 (2024-02-29)", "author": [ { "@type": "Person", @@ -206,15 +200,15 @@ }, "9": { "@type": "SoftwareApplication", - "identifier": "uchardet", - "name": "uchardet", + "identifier": "readr", + "name": "readr", "provider": { "@id": "https://cran.r-project.org", "@type": "Organization", "name": "Comprehensive R Archive Network (CRAN)", "url": "https://cran.r-project.org" }, - "sameAs": "https://CRAN.R-project.org/package=uchardet" + "sameAs": "https://CRAN.R-project.org/package=readr" }, "10": { "@type": "SoftwareApplication", @@ -235,7 +229,7 @@ }, "SystemRequirements": "pandoc (>= 1.14) - http://pandoc.org" }, - "fileSize": "4371.17KB", + "fileSize": "4059.603KB", "citation": [ { "datePublished": "2023", diff --git a/docs/404.html b/docs/404.html index bbcc47b..823e1e8 100644 --- a/docs/404.html +++ b/docs/404.html @@ -32,7 +32,7 @@ phonfieldwork - 0.0.12 + 0.0.13 diff --git a/docs/CONTRIBUTING.html b/docs/CONTRIBUTING.html index 30d91bd..8f723eb 100644 --- a/docs/CONTRIBUTING.html +++ b/docs/CONTRIBUTING.html @@ -17,7 +17,7 @@ phonfieldwork - 0.0.12 + 0.0.13 diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index 79e6b49..a9c41c0 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -17,7 +17,7 @@ phonfieldwork - 0.0.12 + 0.0.13 diff --git a/docs/additional/first_example.html b/docs/additional/first_example.html index a7a064a..11a027e 100644 --- a/docs/additional/first_example.html +++ b/docs/additional/first_example.html @@ -3060,20 +3060,23 @@ }); }; -if (document.readyState !== "loading" && - document.querySelector('slides') === null) { - // if the document is done loading but our element hasn't yet appeared, defer - // loading of the deck - window.setTimeout(function() { - loadDeck(null); - }, 0); -} else { - // still loading the DOM, so wait until it's finished - document.addEventListener("DOMContentLoaded", loadDeck); +if (!window.Shiny) { + // If Shiny is loaded, the slide deck is initialized in ioslides template + + if (document.readyState !== "loading" && + document.querySelector('slides') === null) { + // if the document is done loading but our element hasn't yet appeared, defer + // loading of the deck + window.setTimeout(function() { + loadDeck(null); + }, 0); + } else { + // still loading the DOM, so wait until it's finished + document.addEventListener("DOMContentLoaded", loadDeck); + } } - - + @@ -3422,8 +3257,8 @@

Sound Viewer

data

-
- +
+
×
diff --git a/docs/articles/data_manipulation_with_tidyverse.html b/docs/articles/data_manipulation_with_tidyverse.html index 0a4d39f..5b8a39d 100644 --- a/docs/articles/data_manipulation_with_tidyverse.html +++ b/docs/articles/data_manipulation_with_tidyverse.html @@ -33,7 +33,7 @@ phonfieldwork - 0.0.12 + 0.0.13
@@ -92,7 +92,7 @@

George Moroz, Laboratory

-

2023-02-16

+

2024-04-08

Source: vignettes/data_manipulation_with_tidyverse.Rmd @@ -129,22 +129,7 @@

.TextGrid, .eaf, .exb formats
textgrid_to_df("s1/s1_all.TextGrid")
-#>    id time_start  time_end      content tier     tier_name          source
-#> 1   1  0.0000000 0.4821542          tip    1        labels s1_all.TextGrid
-#> 4   1  0.0000000 0.4821542 1_s1_tip.wav    2 backup labels s1_all.TextGrid
-#> 7   1  0.0000000 0.1072426                 3               s1_all.TextGrid
-#> 8   2  0.1072426 0.1887230            ı    3               s1_all.TextGrid
-#> 9   3  0.1887230 0.4821542                 3               s1_all.TextGrid
-#> 2   2  0.4821542 0.9120635          tap    1        labels s1_all.TextGrid
-#> 5   2  0.4821542 0.9120635 2_s1_tap.wav    2 backup labels s1_all.TextGrid
-#> 10  4  0.4821542 0.5770552                 3               s1_all.TextGrid
-#> 11  5  0.5770552 0.6793392            æ    3               s1_all.TextGrid
-#> 12  6  0.6793392 0.9120635                 3               s1_all.TextGrid
-#> 3   3  0.9120635 1.3942177          top    1        labels s1_all.TextGrid
-#> 6   3  0.9120635 1.3942177 3_s1_top.wav    2 backup labels s1_all.TextGrid
-#> 13  7  0.9120635 1.0364661                 3               s1_all.TextGrid
-#> 14  8  1.0364661 1.1066780            ɒ    3               s1_all.TextGrid
-#> 15  9  1.1066780 1.3942177                 3               s1_all.TextGrid

+#> Error in file(file_name, encoding = readr::guess_encoding(file_name)$encoding): invalid 'encoding' argument

As we see this table has a long format structure: each observation has its own row. We can select the first two rows with the filter() function, remove all unnecessary columns with the @@ -155,12 +140,7 @@

.TextGrid, .eaf, .exb formatsfilter(tier %in% 1:2) %>% select(-time_start, -time_end, -tier_name) %>% pivot_wider(names_from = tier, values_from = content) -#> # A tibble: 3 × 4 -#> id source `1` `2` -#> <dbl> <chr> <chr> <chr> -#> 1 1 s1_all.TextGrid tip 1_s1_tip.wav -#> 2 2 s1_all.TextGrid tap 2_s1_tap.wav -#> 3 3 s1_all.TextGrid top 3_s1_top.wav +#> Error in file(file_name, encoding = readr::guess_encoding(file_name)$encoding): invalid 'encoding' argument +#> p_id s_id free_trans w_id txt gls +#> <dbl> <dbl> <chr> <dbl> <chr> <chr> +#> 1 1 1 Жил-был (у Гъули?) петух. 1 б-ик1-о-й=гъоди "an-быть-pst-cvb(p… +#> 2 1 1 Жил-был (у Гъули?) петух. 2 б--о-ч1игу=гъоди "an--pst-neg.cvb-=… +#> 3 1 1 Жил-был (у Гъули?) петух. 3 Гъули-б "Гъули-an(gen)" +#> 4 1 1 Жил-был (у Гъули?) петух. 4 х1елеко "петух" +#> 5 1 1 Жил-был (у Гъули?) петух. 5 . "" +#> 6 2 2 Он грелся на улице(?). 6 къват1и-ла=гъоди "улица-in-=rep" +#> 7 2 2 Он грелся на улице(?). 7 б-ик1-о-j "an-быть-pst-cvb(p… +#> 8 2 2 Он грелся на улице(?). 8 букьир-ъа "Букир-sup" +#> 9 2 2 Он грелся на улице(?). 9 . "" +#> 10 2 3 [Ему в ногу] воткнулась колючка. 10 къинни-й=гъоди "втыкаться-cvb(pf)… +#> # ℹ 126 more rows

The first filter() removes some garbage rows that are present in our example flextext. The select() function selects only six important columns from 15 presented in the dataset. The @@ -251,27 +231,27 @@

.flextext format#> argument. #> # A tibble: 19 × 5 #> # Groups: p_id, s_id [19] -#> p_id s_id free_trans txt gls -#> <dbl> <dbl> <chr> <chr> <chr> -#> 1 1 1 "Жил-был (у Гъули?) петух." б-ик… "an-… -#> 2 2 2 "Он грелся на улице(?)." къва… "ули… -#> 3 2 3 "[Ему в ногу] воткнулась колючка." къин… "вты… -#> 4 3 4 "Когда колючка воткнулась, [он] ушел к Бихтаю." ццан… "кол… -#> 5 4 5 "Гъули не обнаружил дома Бихтай, дома ее нет, из себя не вынул и… бихь… "Бих… -#> 6 5 6 "Оттуда пошел к Умалаю, " б-uʔ… "an-… -#> 7 6 7 "Оттуда петух пошел к Патимат." х1ел… "пет… -#> 8 8 10 "Оттуда [петух] пошел к Ханичай." гье-… "dem… -#> 9 9 11 "Иди к Хурмат, ..." хъан… "Хан… -#> 10 10 12 "Когда дошёл до двора Хурмат, из окна появился мальчик, сказал:" рул1… "гов… -#> 11 11 13 "Три дня не ели, мы с ней не зная, сказал он." рул1… "гов… -#> 12 12 14 "Оттуда он ушёл и дошёл до Айшат Исмаиловой и её не обнаружив, о… гье-… "dem… -#> 13 13 15 "Захраил …?" й--и… "f--… -#> 14 14 16 "И он пошел в село. Захраил сказала, что колючка воткнулась в пе… б-ук… "an-… -#> 15 15 17 "Оттуда снизу вверх к Исрапилу ..." гьег… "там… -#> 16 16 18 "Шли-шли и пришли к Гаджи." гье-… "dem… -#> 17 17 19 "Они поссорились (?) и прогнали (?) петуха." й-ей… "f-р… -#> 18 18 20 "Когда закончили ссориться, [пошли?] к Забиту." джид… "дел… -#> 19 19 21 "На воротах Забита петух обнаружил замок и пошел к Зумайрат." х1ел… "пет… +#> p_id s_id free_trans txt gls +#> <dbl> <dbl> <chr> <chr> <chr> +#> 1 1 1 "Жил-был (у Гъули?) петух." б-ик… "an-… +#> 2 2 2 "Он грелся на улице(?)." къва… "ули… +#> 3 2 3 "[Ему в ногу] воткнулась колючка." къин… "вты… +#> 4 3 4 "Когда колючка воткнулась, [он] ушел к Бихтаю." ццан… "кол… +#> 5 4 5 "Гъули не обнаружил дома Бихтай, дома ее нет, из себя не вынул… бихь… "Бих… +#> 6 5 6 "Оттуда пошел к Умалаю, " б-uʔ… "an-… +#> 7 6 7 "Оттуда петух пошел к Патимат." х1ел… "пет… +#> 8 8 10 "Оттуда [петух] пошел к Ханичай." гье-… "dem… +#> 9 9 11 "Иди к Хурмат, ..." хъан… "Хан… +#> 10 10 12 "Когда дошёл до двора Хурмат, из окна появился мальчик, сказал… рул1… "гов… +#> 11 11 13 "Три дня не ели, мы с ней не зная, сказал он." рул1… "гов… +#> 12 12 14 "Оттуда он ушёл и дошёл до Айшат Исмаиловой и её не обнаружив,… гье-… "dem… +#> 13 13 15 "Захраил …?" й--и… "f--… +#> 14 14 16 "И он пошел в село. Захраил сказала, что колючка воткнулась в … б-ук… "an-… +#> 15 15 17 "Оттуда снизу вверх к Исрапилу ..." гьег… "там… +#> 16 16 18 "Шли-шли и пришли к Гаджи." гье-… "dem… +#> 17 17 19 "Они поссорились (?) и прогнали (?) петуха." й-ей… "f-р… +#> 18 18 20 "Когда закончили ссориться, [пошли?] к Забиту." джид… "дел… +#> 19 19 21 "На воротах Забита петух обнаружил замок и пошел к Зумайрат." х1ел… "пет…

It is also very easy to get some simple statistics from the data:

 df %>% 
diff --git a/docs/articles/index.html b/docs/articles/index.html
index 67e0f26..c26b8d0 100644
--- a/docs/articles/index.html
+++ b/docs/articles/index.html
@@ -17,7 +17,7 @@
       
       
         phonfieldwork
-        0.0.12
+        0.0.13
       
     
diff --git a/docs/articles/phonfieldwork.html b/docs/articles/phonfieldwork.html index 89e2575..d1496d2 100644 --- a/docs/articles/phonfieldwork.html +++ b/docs/articles/phonfieldwork.html @@ -33,7 +33,7 @@ phonfieldwork - 0.0.12 + 0.0.13 @@ -90,7 +90,7 @@

G. Moroz,

-

2023-02-16

+

2024-04-08

Source:
vignettes/phonfieldwork.Rmd @@ -166,7 +166,19 @@

Install the package
 citation("phonfieldwork")
-
## Error in person$family: $ operator is invalid for atomic vectors
+
## To cite package 'phonfieldwork' in publications use:
+## 
+##   Moroz G (2023). "Phonetic fieldwork research and experiments with the R
+##   package phonfieldwork." In Kobozeva I, Semyonova K, Kostyuk A, Zakharov L,
+##   Svetozarova N (eds.), _«…Vperyod i vverkh po lestnitse zvuchashey». Sbornik
+##   statye k 80-letiyu Olgi Fyodorovny Krivnovoy [Festschrift in memoriam to Olga
+##   Fyodorovna Krivnova]_. Buki Vedi, Moscow.
+## 
+##   Moroz G (2020). _Phonetic fieldwork and experiments with phonfieldwork
+##   package_. <https://CRAN.R-project.org/package=phonfieldwork>.
+## 
+## To see these entries in BibTeX format, use 'print(<citation>, bibtex=TRUE)',
+## 'toBibtex(.)', or set 'options(citation.bibtex.max=999)'.

If you have any trouble using the package, do not hesitate to create an issue on Github.

@@ -320,7 +332,7 @@

Create a presentation
 my_image <- system.file("extdata", "r-logo.png", package = "phonfieldwork")
 my_image
-
## [1] "/home/agricolamz/R/x86_64-pc-linux-gnu-library/4.2/phonfieldwork/extdata/r-logo.png"
+
## [1] "/home/agricolamz/R/x86_64-pc-linux-gnu-library/4.3/phonfieldwork/extdata/r-logo.png"
 create_presentation(stimuli = c("rzeka", "drzewo", my_image),
                     external = 3,
@@ -426,7 +438,7 @@ 

Rename collected data
 get_sound_duration("s1/2_s1_tap.wav")

##           file  duration
-## 1 2_s1_tap.wav 0.4299093
+## 1 2_s1_tap.wav 0.4821542

It is also possible to analyze the whole folder using the read_from_folder() function. The first argument is the path to the folder. The second argument is the type of information or file @@ -516,10 +528,12 @@

Annotate your data= "s1/s1_all.TextGrid", tier = 3, backup = FALSE) +
## Error in file(file_name, encoding = readr::guess_encoding(file_name)$encoding): invalid 'encoding' argument

You can see that we created a third tier with annotation. The only thing left is to move annotation boundaries in Praat (this can not be automated):

+
## Error in file(file_name, encoding = readr::guess_encoding(file_name)$encoding): invalid 'encoding' argument

You can see from the last figure that no backup tier was created (backup = FALSE), that the third tier was annotated @@ -527,7 +541,7 @@

Annotate your datacreate_empty_textgrid() function that takes a duration as an argument:

-
+
 create_empty_textgrid(get_sound_duration("s2/s2_tip_1.wav")$duration,
                       tier_name = c("a", "b"),
                       path = "s2",
@@ -551,7 +565,7 @@ 

Annotate your data## │   └── s2_top_3.wav

It is also possible to remove some tier from textgrid. For instance, we can remove one tier from the previously created file:

-
+
 remove_textgrid_tier(textgrid = "s2/s2_tip_1.TextGrid", tier = 2)

@@ -560,17 +574,21 @@

Extracting your data
+
 dir.create("s1/s1_sounds")

It is possible to extract all annotated files based on an annotation tier:

-
+
 extract_intervals(file_name = "s1/s1_all.wav",
                   textgrid = "s1/s1_all.TextGrid",
                   tier = 3,
                   path = "s1/s1_sounds/",
                   prefix = "s1_")
-
## ├── s1
+
## Error in file(file_name, encoding = readr::guess_encoding(file_name)$encoding): invalid 'encoding' argument
+
## ├── my_stimuli_df.csv
+## ├── my_stimuli_df.xlsx
+## ├── phonfieldwork.Rmd
+## ├── s1
 ## │   ├── 1_s1_tip.wav
 ## │   ├── 2_s1_tap.wav
 ## │   ├── 3_s1_top.wav
@@ -582,9 +600,6 @@ 

Extracting your data## │   ├── s1_all.TextGrid ## │   ├── s1_all.wav ## │   └── s1_sounds -## │   ├── 1_s1_ı.wav -## │   ├── 2_s1_æ.wav -## │   └── 3_s1_ɒ.wav ## ├── s2 ## │   ├── s2_tap_2.wav ## │   ├── s2_tip_1.TextGrid @@ -596,9 +611,9 @@

Visualizing your data

It is possible to view an oscilogram and spetrogram of any soundfile:

-
+
 draw_sound(file_name = "s1/s1_sounds/1_s1_ı.wav")
-

+
## Error in tuneR::readWave(file_name): File 's1/s1_sounds/1_s1_ı.wav' does not exist.

There are additional parameters:

-
+
 textgrid_to_df(system.file("extdata", "test.TextGrid", package = "phonfieldwork"))
##    id time_start   time_end content tier       tier_name        source
 ## 1   1 0.00000000 0.01246583            1       intervals test.TextGrid
@@ -810,7 +826,7 @@ 

Read linguistic files into RFRelan package by Niko Partanen -
+
 eaf_to_df(system.file("extdata", "test.eaf", package = "phonfieldwork"))
##    tier id                                       content       tier_name tier_type id_
 ## 12    1  1                                                     intervals     praat   1
@@ -846,7 +862,7 @@ 

Read linguistic files into Rsystem.file() function to path to the file) -
+
 exb_to_df(system.file("extdata", "test.exb", package = "phonfieldwork"))
##   tier id content tier_name tier_type tier_category tier_speaker time_start  time_end
 ## 3    1  1       t     X [v]         t             v         SPK0 0.06908955 0.2498984
@@ -870,7 +886,7 @@ 

Read linguistic files into Rsystem.file() function to path to the file) -
+
 srt_to_df(system.file("extdata", "test.srt", package = "phonfieldwork"))
##   id content time_start time_end   source
 ## 0  1       t      0.013    0.248 test.srt
@@ -880,7 +896,7 @@ 

Read linguistic files into R
+
 audacity_to_df(system.file("extdata", "test_audacity.txt", package = "phonfieldwork"))
##   time_start  time_end content            source
 ## 1  0.2319977 0.3953891    sssw test_audacity.txt
@@ -889,7 +905,7 @@

Read linguistic files into R
head(flextext_to_df("files/zilo_test.flextext"))
## It can take some time for big files...
##   p_id s_id w_id    txt     cf hn     gls                   msa                free_trans
@@ -924,7 +940,7 @@ 

Read linguistic files into R.docx, .html):

-
+
 create_glossed_document(flextext = "files/zilo_test.flextext",
                         output_dir = ".") # you need to specify the path to the output folder
## It can take some time for big files...
@@ -939,7 +955,7 @@

Read linguistic files into Rexb_to_df(), audacity_to_df(), srt_to_df()) except flextext_to_df() can be used in order to visualise sound annotation:

-
+
 draw_sound(file_name = system.file("extdata", "test.wav", package = "phonfieldwork"),
            annotation = eaf_to_df(system.file("extdata", "test.eaf", package = "phonfieldwork")))

@@ -963,7 +979,14 @@

Create a viewer

We will start with the previous folder structure:

-
## ├── s1
+
## │   └── zilo_test.flextext
+## ├── first_example.html
+## ├── glossed_document.html
+## ├── introduction_to_phonfieldwork.Rmd.orig
+## ├── my_stimuli_df.csv
+## ├── my_stimuli_df.xlsx
+## ├── phonfieldwork.Rmd
+## ├── s1
 ## │   ├── 1_s1_tip.wav
 ## │   ├── 2_s1_tap.wav
 ## │   ├── 3_s1_top.wav
@@ -975,29 +998,22 @@ 

Create a viewer## │   ├── s1_all.TextGrid ## │   ├── s1_all.wav ## │   ├── s1_pics -## │   │   ├── 1_s1_ı.png -## │   │   ├── 2_s1_æ.png -## │   │   └── 3_s1_ɒ.png -## │   ├── s1_sounds -## │   │   ├── 1_s1_ı.wav -## │   │   ├── 2_s1_æ.wav -## │   │   └── 3_s1_ɒ.wav -## │   └── s1_tip.png +## │   └── s1_sounds ## ├── s2 ## │   ├── s2_tap_2.wav ## │   ├── s2_tip_1.TextGrid ## │   ├── s2_tip_1.wav ## │   └── s2_top_3.wav

We have all folders:

-
+
 list.files("s1/s1_sounds/") # sounds
-
## [1] "1_s1_ı.wav" "2_s1_æ.wav" "3_s1_ɒ.wav"
-
+
## character(0)
+
 list.files("s1/s1_pics/") # pictures
-
## [1] "1_s1_ı.png" "2_s1_æ.png" "3_s1_ɒ.png"
+
## character(0)

So what is left is the table. It is possible to create manually (or upload it form .csv or .xlsx files, see section 4.1):

-
+
 df <- data.frame(word  = c("tap", "tip", "top"),
                  sounds = c("æ", "ı", "ɒ"))
 df
@@ -1006,7 +1022,7 @@

Create a viewer## 2 tip ı ## 3 top ɒ

This table could be used in order to create an annotation viewer:

-
+
 create_viewer(audio_dir = "s1/s1_sounds/",
               picture_dir = "s1/s1_pics/",
               table = df,
@@ -1016,7 +1032,14 @@ 

Create a viewer## Output created: s1/stimuli_viewer.html

As a result, a stimuli_viewer.html was created in the s1 folder.

-
## ├── s1
+
## │   └── zilo_test.flextext
+## ├── first_example.html
+## ├── glossed_document.html
+## ├── introduction_to_phonfieldwork.Rmd.orig
+## ├── my_stimuli_df.csv
+## ├── my_stimuli_df.xlsx
+## ├── phonfieldwork.Rmd
+## ├── s1
 ## │   ├── 1_s1_tip.wav
 ## │   ├── 2_s1_tap.wav
 ## │   ├── 3_s1_top.wav
@@ -1028,14 +1051,7 @@ 

Create a viewer## │   ├── s1_all.TextGrid ## │   ├── s1_all.wav ## │   ├── s1_pics -## │   │   ├── 1_s1_ı.png -## │   │   ├── 2_s1_æ.png -## │   │   └── 3_s1_ɒ.png ## │   ├── s1_sounds -## │   │   ├── 1_s1_ı.wav -## │   │   ├── 2_s1_æ.wav -## │   │   └── 3_s1_ɒ.wav -## │   ├── s1_tip.png ## │   └── stimuli_viewer.html ## ├── s2 ## │   ├── s2_tap_2.wav @@ -1047,75 +1063,50 @@

Create a viewer -
+
 textgrid_to_df("s1/s1_all.TextGrid")
-
##    id time_start  time_end      content tier     tier_name          source
-## 1   1  0.0000000 0.4821542          tip    1        labels s1_all.TextGrid
-## 4   1  0.0000000 0.4821542 1_s1_tip.wav    2 backup labels s1_all.TextGrid
-## 7   1  0.0000000 0.1072426                 3               s1_all.TextGrid
-## 8   2  0.1072426 0.1887230            ı    3               s1_all.TextGrid
-## 9   3  0.1887230 0.4821542                 3               s1_all.TextGrid
-## 2   2  0.4821542 0.9120635          tap    1        labels s1_all.TextGrid
-## 5   2  0.4821542 0.9120635 2_s1_tap.wav    2 backup labels s1_all.TextGrid
-## 10  4  0.4821542 0.5770552                 3               s1_all.TextGrid
-## 11  5  0.5770552 0.6793392            æ    3               s1_all.TextGrid
-## 12  6  0.6793392 0.9120635                 3               s1_all.TextGrid
-## 3   3  0.9120635 1.3942177          top    1        labels s1_all.TextGrid
-## 6   3  0.9120635 1.3942177 3_s1_top.wav    2 backup labels s1_all.TextGrid
-## 13  7  0.9120635 1.0364661                 3               s1_all.TextGrid
-## 14  8  1.0364661 1.1066780            ɒ    3               s1_all.TextGrid
-## 15  9  1.1066780 1.3942177                 3               s1_all.TextGrid
+
## Error in file(file_name, encoding = readr::guess_encoding(file_name)$encoding): invalid 'encoding' argument

So in order to create desired table we can use tier_to_df() function:

-
-t1 <- tier_to_df("s1/s1_all.TextGrid", tier = 1)
-t1
-
##   id time_start  time_end content tier tier_name          source
-## 1  1  0.0000000 0.4821542     tip    1    labels s1_all.TextGrid
-## 2  2  0.4821542 0.9120635     tap    1    labels s1_all.TextGrid
-## 3  3  0.9120635 1.3942177     top    1    labels s1_all.TextGrid
-
-t3 <- tier_to_df("s1/s1_all.TextGrid", tier = 3)
-t3
-
##    id time_start  time_end content tier tier_name          source
-## 7   1  0.0000000 0.1072426            3           s1_all.TextGrid
-## 8   2  0.1072426 0.1887230       ı    3           s1_all.TextGrid
-## 9   3  0.1887230 0.4821542            3           s1_all.TextGrid
-## 10  4  0.4821542 0.5770552            3           s1_all.TextGrid
-## 11  5  0.5770552 0.6793392       æ    3           s1_all.TextGrid
-## 12  6  0.6793392 0.9120635            3           s1_all.TextGrid
-## 13  7  0.9120635 1.0364661            3           s1_all.TextGrid
-## 14  8  1.0364661 1.1066780       ɒ    3           s1_all.TextGrid
-## 15  9  1.1066780 1.3942177            3           s1_all.TextGrid
+
+t1 <- tier_to_df("s1/s1_all.TextGrid", tier = 1)
+
## Error in file(file_name, encoding = readr::guess_encoding(file_name)$encoding): invalid 'encoding' argument
+
+t1
+
## Error in eval(expr, envir, enclos): object 't1' not found
+
+t3 <- tier_to_df("s1/s1_all.TextGrid", tier = 3)
+
## Error in file(file_name, encoding = readr::guess_encoding(file_name)$encoding): invalid 'encoding' argument
+
+t3
+
## Error in eval(expr, envir, enclos): object 't3' not found

As we see the first tier is ready, but the third tier contains empty annotations. Let’s remove them:

-
-t3 <- t3[t3$content != "",]
-t3
-
##    id time_start  time_end content tier tier_name          source
-## 8   2  0.1072426 0.1887230       ı    3           s1_all.TextGrid
-## 11  5  0.5770552 0.6793392       æ    3           s1_all.TextGrid
-## 14  8  1.0364661 1.1066780       ɒ    3           s1_all.TextGrid
+
+t3 <- t3[t3$content != "",]
+
## Error in eval(expr, envir, enclos): object 't3' not found
+
+t3
+
## Error in eval(expr, envir, enclos): object 't3' not found

So from this point it is possible to create the table that we wanted:

-
+
 new_df <- data.frame(words = t1$content,
-                     sounds = t3$content)
-new_df
-
##   words sounds
-## 1   tip      ı
-## 2   tap      æ
-## 3   top      ɒ
+ sounds = t3$content)
+
## Error in eval(expr, envir, enclos): object 't1' not found
+
+new_df
+
## Error in eval(expr, envir, enclos): object 'new_df' not found

So now we are ready to run our code for creating an annotation viewer:

-
+
 create_viewer(audio_dir = "s1/s1_sounds/",
               picture_dir = "s1/s1_pics/",
               table = new_df,
               output_dir = "s1/",
               output_file = "stimuli_viewer")
## Since the result .html file possibly containes some vulnerable data, researcher(s) bear the whole responsibility for the publishing of the result. Run vignette("ethical_research_with_phonfieldwork") for more details.
-
## Output created: s1/stimuli_viewer.html
+
## Error in eval(expr, envir, enclos): object 'new_df' not found

By default sorting in the result annotation viewer will be according file names in the system, so if you want to have another default sorting you can specify column names that the result table should be sorted by @@ -1139,16 +1130,17 @@

Create a viewerI will add some glottocodes for Russian, Polish and Czech to the dataframe that we have already worked with (for those data it doesn’t make any sense, I just giving an example of usage):

-
-new_df$glottocode <- c("russ1263", "poli1260", "czec1258")
-create_viewer(audio_dir = "s1/s1_sounds/",
+
+new_df$glottocode <- c("russ1263", "poli1260", "czec1258")
+
## Error: object 'new_df' not found
+
+create_viewer(audio_dir = "s1/s1_sounds/",
               picture_dir = "s1/s1_pics/",
               table = new_df,
               output_dir = "s1/",
               output_file = "stimuli_viewer2",
               map = TRUE)
-
## Since the result .html file possibly containes some vulnerable data, researcher(s) bear the whole responsibility for the publishing of the result. Run vignette("ethical_research_with_phonfieldwork") for more details.
-
## Output created: s1/stimuli_viewer2.html
+
## Error in eval(expr, envir, enclos): object 'new_df' not found

Here is the result file.

It is also possible to provide your own coordinates with diff --git a/docs/articles/unnamed-chunk-23-1.png b/docs/articles/unnamed-chunk-23-1.png index b59210a..6f9faac 100644 Binary files a/docs/articles/unnamed-chunk-23-1.png and b/docs/articles/unnamed-chunk-23-1.png differ diff --git a/docs/articles/unnamed-chunk-25-1.png b/docs/articles/unnamed-chunk-25-1.png index dd2519e..4fccc73 100644 Binary files a/docs/articles/unnamed-chunk-25-1.png and b/docs/articles/unnamed-chunk-25-1.png differ diff --git a/docs/articles/unnamed-chunk-27-1.png b/docs/articles/unnamed-chunk-27-1.png index 6e72968..dc7bcb8 100644 Binary files a/docs/articles/unnamed-chunk-27-1.png and b/docs/articles/unnamed-chunk-27-1.png differ diff --git a/docs/articles/unnamed-chunk-29-1.png b/docs/articles/unnamed-chunk-29-1.png index fdf685f..766d8ed 100644 Binary files a/docs/articles/unnamed-chunk-29-1.png and b/docs/articles/unnamed-chunk-29-1.png differ diff --git a/docs/articles/unnamed-chunk-31-1.png b/docs/articles/unnamed-chunk-31-1.png index f5859f2..766d8ed 100644 Binary files a/docs/articles/unnamed-chunk-31-1.png and b/docs/articles/unnamed-chunk-31-1.png differ diff --git a/docs/articles/unnamed-chunk-41-1.png b/docs/articles/unnamed-chunk-41-1.png index e60191f..9dfa83f 100644 Binary files a/docs/articles/unnamed-chunk-41-1.png and b/docs/articles/unnamed-chunk-41-1.png differ diff --git a/docs/articles/unnamed-chunk-42-1.png b/docs/articles/unnamed-chunk-42-1.png index f59cd46..a675bb7 100644 Binary files a/docs/articles/unnamed-chunk-42-1.png and b/docs/articles/unnamed-chunk-42-1.png differ diff --git a/docs/authors.html b/docs/authors.html index 1b433ed..b211865 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -17,7 +17,7 @@ phonfieldwork - 0.0.12 + 0.0.13

diff --git a/docs/index.html b/docs/index.html index 4151139..9c68d79 100644 --- a/docs/index.html +++ b/docs/index.html @@ -33,7 +33,7 @@ phonfieldwork - 0.0.12 + 0.0.13

@@ -95,7 +95,7 @@
  • extract sounds according to annotation
  • extract annotation from multiple linguistic formats (Praat .TextGrid, ELAN .eaf, EXMARaLDA .exb, Audacity .txt and subtitles .srt)
  • visualise oscilograms, spectrograms and annotations
  • -
  • create an html viewer like this, ethical problems of this kind of viewer in linguistic research are covered in the vignette vignette("ethical_research_with_phonfieldwork").
  • +
  • create an html viewer like this, ethical problems of this kind of viewer in linguistic research are covered in the vignette vignette("ethical_research_with_phonfieldwork").
  • For more details see tutorial.

    The main goal of the phonfieldwork package is to make the full research workflow, from data collection to data extraction and data representation, easier for people that are not familiar with programming. However, most of the phonfieldwork functionality can be found in other software and packages:

    diff --git a/docs/news/index.html b/docs/news/index.html index 893196e..7743f12 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -17,7 +17,7 @@ phonfieldwork - 0.0.12 + 0.0.13
    diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index b68397e..e55c68d 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -1,11 +1,11 @@ -pandoc: 2.19.2 +pandoc: 3.1.2 pkgdown: 2.0.7 pkgdown_sha: ~ articles: data_manipulation_with_tidyverse: data_manipulation_with_tidyverse.html ethical_research_with_phonfieldwork: ethical_research_with_phonfieldwork.html phonfieldwork: phonfieldwork.html -last_built: 2023-02-16T15:30Z +last_built: 2024-04-08T18:14Z urls: reference: https://ropensci.github.io/phonfieldwork/reference article: https://ropensci.github.io/phonfieldwork/articles diff --git a/docs/reference/index.html b/docs/reference/index.html index 6250bdd..31cc675 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -17,7 +17,7 @@ phonfieldwork - 0.0.12 + 0.0.13

    diff --git a/vignettes/data_manipulation_with_tidyverse.Rmd b/vignettes/data_manipulation_with_tidyverse.Rmd index e994077..f0c3769 100644 --- a/vignettes/data_manipulation_with_tidyverse.Rmd +++ b/vignettes/data_manipulation_with_tidyverse.Rmd @@ -1,7 +1,7 @@ --- title: "Manipulating `phonfieldwork` data with `tidyverse`" author: "George Moroz, [NRU HSE Linguistic Convergence Laboratory](https://ilcl.hse.ru/en/)" -date: "2023-02-16" +date: "2024-04-08" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Manipulating `phonfieldwork` data with `tidyverse`} @@ -42,22 +42,7 @@ If we convert this file to R we will achieve something like this: ```r textgrid_to_df("s1/s1_all.TextGrid") -#> id time_start time_end content tier tier_name source -#> 1 1 0.0000000 0.4821542 tip 1 labels s1_all.TextGrid -#> 4 1 0.0000000 0.4821542 1_s1_tip.wav 2 backup labels s1_all.TextGrid -#> 7 1 0.0000000 0.1072426 3 s1_all.TextGrid -#> 8 2 0.1072426 0.1887230 ı 3 s1_all.TextGrid -#> 9 3 0.1887230 0.4821542 3 s1_all.TextGrid -#> 2 2 0.4821542 0.9120635 tap 1 labels s1_all.TextGrid -#> 5 2 0.4821542 0.9120635 2_s1_tap.wav 2 backup labels s1_all.TextGrid -#> 10 4 0.4821542 0.5770552 3 s1_all.TextGrid -#> 11 5 0.5770552 0.6793392 æ 3 s1_all.TextGrid -#> 12 6 0.6793392 0.9120635 3 s1_all.TextGrid -#> 3 3 0.9120635 1.3942177 top 1 labels s1_all.TextGrid -#> 6 3 0.9120635 1.3942177 3_s1_top.wav 2 backup labels s1_all.TextGrid -#> 13 7 0.9120635 1.0364661 3 s1_all.TextGrid -#> 14 8 1.0364661 1.1066780 ɒ 3 s1_all.TextGrid -#> 15 9 1.1066780 1.3942177 3 s1_all.TextGrid +#> Error in file(file_name, encoding = readr::guess_encoding(file_name)$encoding): invalid 'encoding' argument ``` As we see this table has a long format structure: each observation has its own row. We can select the first two rows with the `filter()` function, remove all unnecessary columns with the `select()` function and spread everything in a table with the `pivot_wider()` function: @@ -68,12 +53,7 @@ textgrid_to_df("s1/s1_all.TextGrid") %>% filter(tier %in% 1:2) %>% select(-time_start, -time_end, -tier_name) %>% pivot_wider(names_from = tier, values_from = content) -#> # A tibble: 3 × 4 -#> id source `1` `2` -#> -#> 1 1 s1_all.TextGrid tip 1_s1_tip.wav -#> 2 2 s1_all.TextGrid tap 2_s1_tap.wav -#> 3 3 s1_all.TextGrid top 3_s1_top.wav +#> Error in file(file_name, encoding = readr::guess_encoding(file_name)$encoding): invalid 'encoding' argument ``` ## .flextext format @@ -129,19 +109,19 @@ df %>% #> the `.groups` argument. #> # A tibble: 136 × 6 #> # Groups: p_id, s_id, free_trans [19] -#> p_id s_id free_trans w_id txt gls -#> -#> 1 1 1 Жил-был (у Гъули?) петух. 1 б-ик1-о-й=гъоди "an-быть-pst-cvb(pf)… -#> 2 1 1 Жил-был (у Гъули?) петух. 2 б--о-ч1игу=гъоди "an--pst-neg.cvb-=re… -#> 3 1 1 Жил-был (у Гъули?) петух. 3 Гъули-б "Гъули-an(gen)" -#> 4 1 1 Жил-был (у Гъули?) петух. 4 х1елеко "петух" -#> 5 1 1 Жил-был (у Гъули?) петух. 5 . "" -#> 6 2 2 Он грелся на улице(?). 6 къват1и-ла=гъоди "улица-in-=rep" -#> 7 2 2 Он грелся на улице(?). 7 б-ик1-о-j "an-быть-pst-cvb(pf)" -#> 8 2 2 Он грелся на улице(?). 8 букьир-ъа "Букир-sup" -#> 9 2 2 Он грелся на улице(?). 9 . "" -#> 10 2 3 [Ему в ногу] воткнулась колючка. 10 къинни-й=гъоди "втыкаться-cvb(pf)-=… -#> # … with 126 more rows +#> p_id s_id free_trans w_id txt gls +#> +#> 1 1 1 Жил-был (у Гъули?) петух. 1 б-ик1-о-й=гъоди "an-быть-pst-cvb(p… +#> 2 1 1 Жил-был (у Гъули?) петух. 2 б--о-ч1игу=гъоди "an--pst-neg.cvb-=… +#> 3 1 1 Жил-был (у Гъули?) петух. 3 Гъули-б "Гъули-an(gen)" +#> 4 1 1 Жил-был (у Гъули?) петух. 4 х1елеко "петух" +#> 5 1 1 Жил-был (у Гъули?) петух. 5 . "" +#> 6 2 2 Он грелся на улице(?). 6 къват1и-ла=гъоди "улица-in-=rep" +#> 7 2 2 Он грелся на улице(?). 7 б-ик1-о-j "an-быть-pst-cvb(p… +#> 8 2 2 Он грелся на улице(?). 8 букьир-ъа "Букир-sup" +#> 9 2 2 Он грелся на улице(?). 9 . "" +#> 10 2 3 [Ему в ногу] воткнулась колючка. 10 къинни-й=гъоди "втыкаться-cvb(pf)… +#> # ℹ 126 more rows ``` The first `filter()` removes some garbage rows that are present in our example flextext. The `select()` function selects only six important columns from 15 presented in the dataset. The `group_by()` and `summarise()` merge all text from `txt` variable and all glosses from `gls` variable together. Pipe operater `%>% ` make it possible to pass the result from the previous funstion as an input to the following one. @@ -165,27 +145,27 @@ df %>% #> argument. #> # A tibble: 19 × 5 #> # Groups: p_id, s_id [19] -#> p_id s_id free_trans txt gls -#> -#> 1 1 1 "Жил-был (у Гъули?) петух." б-ик… "an-… -#> 2 2 2 "Он грелся на улице(?)." къва… "ули… -#> 3 2 3 "[Ему в ногу] воткнулась колючка." къин… "вты… -#> 4 3 4 "Когда колючка воткнулась, [он] ушел к Бихтаю." ццан… "кол… -#> 5 4 5 "Гъули не обнаружил дома Бихтай, дома ее нет, из себя не вынул и… бихь… "Бих… -#> 6 5 6 "Оттуда пошел к Умалаю, " б-uʔ… "an-… -#> 7 6 7 "Оттуда петух пошел к Патимат." х1ел… "пет… -#> 8 8 10 "Оттуда [петух] пошел к Ханичай." гье-… "dem… -#> 9 9 11 "Иди к Хурмат, ..." хъан… "Хан… -#> 10 10 12 "Когда дошёл до двора Хурмат, из окна появился мальчик, сказал:" рул1… "гов… -#> 11 11 13 "Три дня не ели, мы с ней не зная, сказал он." рул1… "гов… -#> 12 12 14 "Оттуда он ушёл и дошёл до Айшат Исмаиловой и её не обнаружив, о… гье-… "dem… -#> 13 13 15 "Захраил …?" й--и… "f--… -#> 14 14 16 "И он пошел в село. Захраил сказала, что колючка воткнулась в пе… б-ук… "an-… -#> 15 15 17 "Оттуда снизу вверх к Исрапилу ..." гьег… "там… -#> 16 16 18 "Шли-шли и пришли к Гаджи." гье-… "dem… -#> 17 17 19 "Они поссорились (?) и прогнали (?) петуха." й-ей… "f-р… -#> 18 18 20 "Когда закончили ссориться, [пошли?] к Забиту." джид… "дел… -#> 19 19 21 "На воротах Забита петух обнаружил замок и пошел к Зумайрат." х1ел… "пет… +#> p_id s_id free_trans txt gls +#> +#> 1 1 1 "Жил-был (у Гъули?) петух." б-ик… "an-… +#> 2 2 2 "Он грелся на улице(?)." къва… "ули… +#> 3 2 3 "[Ему в ногу] воткнулась колючка." къин… "вты… +#> 4 3 4 "Когда колючка воткнулась, [он] ушел к Бихтаю." ццан… "кол… +#> 5 4 5 "Гъули не обнаружил дома Бихтай, дома ее нет, из себя не вынул… бихь… "Бих… +#> 6 5 6 "Оттуда пошел к Умалаю, " б-uʔ… "an-… +#> 7 6 7 "Оттуда петух пошел к Патимат." х1ел… "пет… +#> 8 8 10 "Оттуда [петух] пошел к Ханичай." гье-… "dem… +#> 9 9 11 "Иди к Хурмат, ..." хъан… "Хан… +#> 10 10 12 "Когда дошёл до двора Хурмат, из окна появился мальчик, сказал… рул1… "гов… +#> 11 11 13 "Три дня не ели, мы с ней не зная, сказал он." рул1… "гов… +#> 12 12 14 "Оттуда он ушёл и дошёл до Айшат Исмаиловой и её не обнаружив,… гье-… "dem… +#> 13 13 15 "Захраил …?" й--и… "f--… +#> 14 14 16 "И он пошел в село. Захраил сказала, что колючка воткнулась в … б-ук… "an-… +#> 15 15 17 "Оттуда снизу вверх к Исрапилу ..." гьег… "там… +#> 16 16 18 "Шли-шли и пришли к Гаджи." гье-… "dem… +#> 17 17 19 "Они поссорились (?) и прогнали (?) петуха." й-ей… "f-р… +#> 18 18 20 "Когда закончили ссориться, [пошли?] к Забиту." джид… "дел… +#> 19 19 21 "На воротах Забита петух обнаружил замок и пошел к Зумайрат." х1ел… "пет… ``` It is also very easy to get some simple statistics from the data: diff --git a/vignettes/glossed_document.docx b/vignettes/glossed_document.docx index 3126d50..97cccf4 100644 Binary files a/vignettes/glossed_document.docx and b/vignettes/glossed_document.docx differ diff --git a/vignettes/phonfieldwork.Rmd b/vignettes/phonfieldwork.Rmd index a2bf343..e60b80a 100644 --- a/vignettes/phonfieldwork.Rmd +++ b/vignettes/phonfieldwork.Rmd @@ -2,7 +2,7 @@ title: "Phonetic fieldwork and experiments with `phonfieldwork` package" author: "G. Moroz, [NRU HSE Linguistic Convergence Laboratory](https://ilcl.hse.ru/en/)" bibliography: bibliography.bib -date: "2023-02-16" +date: "2024-04-08" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Phonetic fieldwork and experiments with `phonfieldwork` package} @@ -61,7 +61,19 @@ citation("phonfieldwork") ``` ``` -## Error in person$family: $ operator is invalid for atomic vectors +## To cite package 'phonfieldwork' in publications use: +## +## Moroz G (2023). "Phonetic fieldwork research and experiments with the R +## package phonfieldwork." In Kobozeva I, Semyonova K, Kostyuk A, Zakharov L, +## Svetozarova N (eds.), _«…Vperyod i vverkh po lestnitse zvuchashey». Sbornik +## statye k 80-letiyu Olgi Fyodorovny Krivnovoy [Festschrift in memoriam to Olga +## Fyodorovna Krivnova]_. Buki Vedi, Moscow. +## +## Moroz G (2020). _Phonetic fieldwork and experiments with phonfieldwork +## package_. . +## +## To see these entries in BibTeX format, use 'print(, bibtex=TRUE)', +## 'toBibtex(.)', or set 'options(citation.bibtex.max=999)'. ``` If you have any trouble using the package, do not hesitate to create [an issue on Github](https://github.com/ropensci/phonfieldwork/issues/new). @@ -167,7 +179,7 @@ my_image ``` ``` -## [1] "/home/agricolamz/R/x86_64-pc-linux-gnu-library/4.2/phonfieldwork/extdata/r-logo.png" +## [1] "/home/agricolamz/R/x86_64-pc-linux-gnu-library/4.3/phonfieldwork/extdata/r-logo.png" ``` ```r @@ -289,7 +301,7 @@ get_sound_duration("s1/2_s1_tap.wav") ``` ## file duration -## 1 2_s1_tap.wav 0.4299093 +## 1 2_s1_tap.wav 0.4821542 ``` It is also possible to analyze the whole folder using the `read_from_folder()` function. The first argument is the path to the folder. The second argument is the type of information or file type (possible values: "audacity", "duration", "eaf", "exb", "flextext", "formant", "intensity", "picth", "srt", "textgrid"): @@ -392,12 +404,22 @@ annotate_textgrid(annotation = c("", "ı", "", "", "æ", "", "", "ɒ", ""), backup = FALSE) ``` + +``` +## Error in file(file_name, encoding = readr::guess_encoding(file_name)$encoding): invalid 'encoding' argument +``` + ![](unnamed-chunk-29-1.png) You can see that we created a third tier with annotation. The only thing left is to move annotation boundaries in Praat (this can not be automated): + +``` +## Error in file(file_name, encoding = readr::guess_encoding(file_name)$encoding): invalid 'encoding' argument +``` + ![](unnamed-chunk-31-1.png) You can see from the last figure that no backup tier was created (`backup = FALSE`), that the third tier was annotated (`tier = 3`). @@ -459,8 +481,15 @@ extract_intervals(file_name = "s1/s1_all.wav", prefix = "s1_") ``` +``` +## Error in file(file_name, encoding = readr::guess_encoding(file_name)$encoding): invalid 'encoding' argument +``` + ``` +## ├── my_stimuli_df.csv +## ├── my_stimuli_df.xlsx +## ├── phonfieldwork.Rmd ## ├── s1 ## │   ├── 1_s1_tip.wav ## │   ├── 2_s1_tap.wav @@ -473,9 +502,6 @@ extract_intervals(file_name = "s1/s1_all.wav", ## │   ├── s1_all.TextGrid ## │   ├── s1_all.wav ## │   └── s1_sounds -## │   ├── 1_s1_ı.wav -## │   ├── 2_s1_æ.wav -## │   └── 3_s1_ɒ.wav ## ├── s2 ## │   ├── s2_tap_2.wav ## │   ├── s2_tip_1.TextGrid @@ -491,7 +517,9 @@ It is possible to view an oscilogram and spetrogram of any soundfile: draw_sound(file_name = "s1/s1_sounds/1_s1_ı.wav") ``` -![](unnamed-chunk-40-1.png) +``` +## Error in tuneR::readWave(file_name): File 's1/s1_sounds/1_s1_ı.wav' does not exist. +``` There are additional parameters: @@ -524,8 +552,7 @@ draw_sound("s1/s1_all.wav", ``` ``` -## Warning in df$tier == unique(df$tier): longer object length is not a multiple of shorter -## object length +## Error in file(file_name, encoding = readr::guess_encoding(file_name)$encoding): invalid 'encoding' argument ``` ![](unnamed-chunk-41-1.png) @@ -540,8 +567,7 @@ draw_sound("s1/s1_all.wav", ``` ``` -## Warning in df$tier == unique(df$tier): longer object length is not a multiple of shorter -## object length +## Error in file(file_name, encoding = readr::guess_encoding(file_name)$encoding): invalid 'encoding' argument ``` ![](unnamed-chunk-42-1.png) @@ -555,8 +581,16 @@ draw_sound(file_name = "s1/s1_sounds/1_s1_ı.wav", title = "s1 tip") ``` +``` +## Error in tuneR::readWave(file_name): File 's1/s1_sounds/1_s1_ı.wav' does not exist. +``` + ``` +## ├── introduction_to_phonfieldwork.Rmd.orig +## ├── my_stimuli_df.csv +## ├── my_stimuli_df.xlsx +## ├── phonfieldwork.Rmd ## ├── s1 ## │   ├── 1_s1_tip.wav ## │   ├── 2_s1_tap.wav @@ -568,11 +602,7 @@ draw_sound(file_name = "s1/s1_sounds/1_s1_ı.wav", ## │   │   └── logging.csv ## │   ├── s1_all.TextGrid ## │   ├── s1_all.wav -## │   ├── s1_sounds -## │   │   ├── 1_s1_ı.wav -## │   │   ├── 2_s1_æ.wav -## │   │   └── 3_s1_ɒ.wav -## │   └── s1_tip.png +## │   └── s1_sounds ## ├── s2 ## │   ├── s2_tap_2.wav ## │   ├── s2_tip_1.TextGrid @@ -588,8 +618,20 @@ draw_sound(sounds_from_folder = "s1/s1_sounds/", pic_folder_name = "s1_pics") ``` +``` +## Error in draw_sound(file_name = file_name, annotation = annotation, from = from, : The draw_sound() functions works only with .wav(e) or +## .mp3 formats +``` + ``` +## │   ├── s1_all.TextGrid +## │   └── zilo_test.flextext +## ├── first_example.html +## ├── introduction_to_phonfieldwork.Rmd.orig +## ├── my_stimuli_df.csv +## ├── my_stimuli_df.xlsx +## ├── phonfieldwork.Rmd ## ├── s1 ## │   ├── 1_s1_tip.wav ## │   ├── 2_s1_tap.wav @@ -602,14 +644,7 @@ draw_sound(sounds_from_folder = "s1/s1_sounds/", ## │   ├── s1_all.TextGrid ## │   ├── s1_all.wav ## │   ├── s1_pics -## │   │   ├── 1_s1_ı.png -## │   │   ├── 2_s1_æ.png -## │   │   └── 3_s1_ɒ.png -## │   ├── s1_sounds -## │   │   ├── 1_s1_ı.wav -## │   │   ├── 2_s1_æ.wav -## │   │   └── 3_s1_ɒ.wav -## │   └── s1_tip.png +## │   └── s1_sounds ## ├── s2 ## │   ├── s2_tap_2.wav ## │   ├── s2_tip_1.TextGrid @@ -863,6 +898,13 @@ We will start with the previous folder structure: ``` +## │   └── zilo_test.flextext +## ├── first_example.html +## ├── glossed_document.html +## ├── introduction_to_phonfieldwork.Rmd.orig +## ├── my_stimuli_df.csv +## ├── my_stimuli_df.xlsx +## ├── phonfieldwork.Rmd ## ├── s1 ## │   ├── 1_s1_tip.wav ## │   ├── 2_s1_tap.wav @@ -875,14 +917,7 @@ We will start with the previous folder structure: ## │   ├── s1_all.TextGrid ## │   ├── s1_all.wav ## │   ├── s1_pics -## │   │   ├── 1_s1_ı.png -## │   │   ├── 2_s1_æ.png -## │   │   └── 3_s1_ɒ.png -## │   ├── s1_sounds -## │   │   ├── 1_s1_ı.wav -## │   │   ├── 2_s1_æ.wav -## │   │   └── 3_s1_ɒ.wav -## │   └── s1_tip.png +## │   └── s1_sounds ## ├── s2 ## │   ├── s2_tap_2.wav ## │   ├── s2_tip_1.TextGrid @@ -898,7 +933,7 @@ list.files("s1/s1_sounds/") # sounds ``` ``` -## [1] "1_s1_ı.wav" "2_s1_æ.wav" "3_s1_ɒ.wav" +## character(0) ``` ```r @@ -906,7 +941,7 @@ list.files("s1/s1_pics/") # pictures ``` ``` -## [1] "1_s1_ı.png" "2_s1_æ.png" "3_s1_ɒ.png" +## character(0) ``` So what is left is the table. It is possible to create manually (or upload it form .csv or .xlsx files, see section 4.1): @@ -948,6 +983,13 @@ As a result, a `stimuli_viewer.html` was created in the `s1` folder. ``` +## │   └── zilo_test.flextext +## ├── first_example.html +## ├── glossed_document.html +## ├── introduction_to_phonfieldwork.Rmd.orig +## ├── my_stimuli_df.csv +## ├── my_stimuli_df.xlsx +## ├── phonfieldwork.Rmd ## ├── s1 ## │   ├── 1_s1_tip.wav ## │   ├── 2_s1_tap.wav @@ -960,14 +1002,7 @@ As a result, a `stimuli_viewer.html` was created in the `s1` folder. ## │   ├── s1_all.TextGrid ## │   ├── s1_all.wav ## │   ├── s1_pics -## │   │   ├── 1_s1_ı.png -## │   │   ├── 2_s1_æ.png -## │   │   └── 3_s1_ɒ.png ## │   ├── s1_sounds -## │   │   ├── 1_s1_ı.wav -## │   │   ├── 2_s1_æ.wav -## │   │   └── 3_s1_ɒ.wav -## │   ├── s1_tip.png ## │   └── stimuli_viewer.html ## ├── s2 ## │   ├── s2_tap_2.wav @@ -986,22 +1021,7 @@ textgrid_to_df("s1/s1_all.TextGrid") ``` ``` -## id time_start time_end content tier tier_name source -## 1 1 0.0000000 0.4821542 tip 1 labels s1_all.TextGrid -## 4 1 0.0000000 0.4821542 1_s1_tip.wav 2 backup labels s1_all.TextGrid -## 7 1 0.0000000 0.1072426 3 s1_all.TextGrid -## 8 2 0.1072426 0.1887230 ı 3 s1_all.TextGrid -## 9 3 0.1887230 0.4821542 3 s1_all.TextGrid -## 2 2 0.4821542 0.9120635 tap 1 labels s1_all.TextGrid -## 5 2 0.4821542 0.9120635 2_s1_tap.wav 2 backup labels s1_all.TextGrid -## 10 4 0.4821542 0.5770552 3 s1_all.TextGrid -## 11 5 0.5770552 0.6793392 æ 3 s1_all.TextGrid -## 12 6 0.6793392 0.9120635 3 s1_all.TextGrid -## 3 3 0.9120635 1.3942177 top 1 labels s1_all.TextGrid -## 6 3 0.9120635 1.3942177 3_s1_top.wav 2 backup labels s1_all.TextGrid -## 13 7 0.9120635 1.0364661 3 s1_all.TextGrid -## 14 8 1.0364661 1.1066780 ɒ 3 s1_all.TextGrid -## 15 9 1.1066780 1.3942177 3 s1_all.TextGrid +## Error in file(file_name, encoding = readr::guess_encoding(file_name)$encoding): invalid 'encoding' argument ``` So in order to create desired table we can use `tier_to_df()` function: @@ -1009,32 +1029,34 @@ So in order to create desired table we can use `tier_to_df()` function: ```r t1 <- tier_to_df("s1/s1_all.TextGrid", tier = 1) +``` + +``` +## Error in file(file_name, encoding = readr::guess_encoding(file_name)$encoding): invalid 'encoding' argument +``` + +```r t1 ``` ``` -## id time_start time_end content tier tier_name source -## 1 1 0.0000000 0.4821542 tip 1 labels s1_all.TextGrid -## 2 2 0.4821542 0.9120635 tap 1 labels s1_all.TextGrid -## 3 3 0.9120635 1.3942177 top 1 labels s1_all.TextGrid +## Error in eval(expr, envir, enclos): object 't1' not found ``` ```r t3 <- tier_to_df("s1/s1_all.TextGrid", tier = 3) +``` + +``` +## Error in file(file_name, encoding = readr::guess_encoding(file_name)$encoding): invalid 'encoding' argument +``` + +```r t3 ``` ``` -## id time_start time_end content tier tier_name source -## 7 1 0.0000000 0.1072426 3 s1_all.TextGrid -## 8 2 0.1072426 0.1887230 ı 3 s1_all.TextGrid -## 9 3 0.1887230 0.4821542 3 s1_all.TextGrid -## 10 4 0.4821542 0.5770552 3 s1_all.TextGrid -## 11 5 0.5770552 0.6793392 æ 3 s1_all.TextGrid -## 12 6 0.6793392 0.9120635 3 s1_all.TextGrid -## 13 7 0.9120635 1.0364661 3 s1_all.TextGrid -## 14 8 1.0364661 1.1066780 ɒ 3 s1_all.TextGrid -## 15 9 1.1066780 1.3942177 3 s1_all.TextGrid +## Error in eval(expr, envir, enclos): object 't3' not found ``` As we see the first tier is ready, but the third tier contains empty annotations. Let's remove them: @@ -1042,14 +1064,18 @@ As we see the first tier is ready, but the third tier contains empty annotations ```r t3 <- t3[t3$content != "",] +``` + +``` +## Error in eval(expr, envir, enclos): object 't3' not found +``` + +```r t3 ``` ``` -## id time_start time_end content tier tier_name source -## 8 2 0.1072426 0.1887230 ı 3 s1_all.TextGrid -## 11 5 0.5770552 0.6793392 æ 3 s1_all.TextGrid -## 14 8 1.0364661 1.1066780 ɒ 3 s1_all.TextGrid +## Error in eval(expr, envir, enclos): object 't3' not found ``` So from this point it is possible to create the table that we wanted: @@ -1058,14 +1084,18 @@ So from this point it is possible to create the table that we wanted: ```r new_df <- data.frame(words = t1$content, sounds = t3$content) +``` + +``` +## Error in eval(expr, envir, enclos): object 't1' not found +``` + +```r new_df ``` ``` -## words sounds -## 1 tip ı -## 2 tap æ -## 3 top ɒ +## Error in eval(expr, envir, enclos): object 'new_df' not found ``` So now we are ready to run our code for creating an annotation viewer: @@ -1084,7 +1114,7 @@ create_viewer(audio_dir = "s1/s1_sounds/", ``` ``` -## Output created: s1/stimuli_viewer.html +## Error in eval(expr, envir, enclos): object 'new_df' not found ``` By default sorting in the result annotation viewer will be according file names in the system, so if you want to have another default sorting you can specify column names that the result table should be sorted by using the `sorting_columns` argument. @@ -1100,6 +1130,13 @@ I will add some glottocodes for Russian, Polish and Czech to the dataframe that ```r new_df$glottocode <- c("russ1263", "poli1260", "czec1258") +``` + +``` +## Error: object 'new_df' not found +``` + +```r create_viewer(audio_dir = "s1/s1_sounds/", picture_dir = "s1/s1_pics/", table = new_df, @@ -1109,11 +1146,7 @@ create_viewer(audio_dir = "s1/s1_sounds/", ``` ``` -## Since the result .html file possibly containes some vulnerable data, researcher(s) bear the whole responsibility for the publishing of the result. Run vignette("ethical_research_with_phonfieldwork") for more details. -``` - -``` -## Output created: s1/stimuli_viewer2.html +## Error in eval(expr, envir, enclos): object 'new_df' not found ``` [Here](https://ropensci.github.io/phonfieldwork/additional/stimuli_viewer2.html) is the result file. diff --git a/vignettes/s1/1_s1_tip.wav b/vignettes/s1/1_s1_tip.wav index 07ae049..d585810 100644 Binary files a/vignettes/s1/1_s1_tip.wav and b/vignettes/s1/1_s1_tip.wav differ diff --git a/vignettes/s1/2_s1_tap.wav b/vignettes/s1/2_s1_tap.wav index d585810..07ae049 100644 Binary files a/vignettes/s1/2_s1_tap.wav and b/vignettes/s1/2_s1_tap.wav differ diff --git a/vignettes/s1/backup/01.wav b/vignettes/s1/backup/01.wav index 07ae049..d585810 100644 Binary files a/vignettes/s1/backup/01.wav and b/vignettes/s1/backup/01.wav differ diff --git a/vignettes/s1/backup/02.wav b/vignettes/s1/backup/02.wav index d585810..07ae049 100644 Binary files a/vignettes/s1/backup/02.wav and b/vignettes/s1/backup/02.wav differ diff --git a/vignettes/s1/s1_all.wav b/vignettes/s1/s1_all.wav index 5340285..f22a87b 100644 Binary files a/vignettes/s1/s1_all.wav and b/vignettes/s1/s1_all.wav differ diff --git "a/vignettes/s1/s1_pics/1_s1_\304\261.png" "b/vignettes/s1/s1_pics/1_s1_\304\261.png" deleted file mode 100644 index 209b380..0000000 Binary files "a/vignettes/s1/s1_pics/1_s1_\304\261.png" and /dev/null differ diff --git "a/vignettes/s1/s1_pics/2_s1_\303\246.png" "b/vignettes/s1/s1_pics/2_s1_\303\246.png" deleted file mode 100644 index e6df5f7..0000000 Binary files "a/vignettes/s1/s1_pics/2_s1_\303\246.png" and /dev/null differ diff --git "a/vignettes/s1/s1_pics/3_s1_\311\222.png" "b/vignettes/s1/s1_pics/3_s1_\311\222.png" deleted file mode 100644 index eefa218..0000000 Binary files "a/vignettes/s1/s1_pics/3_s1_\311\222.png" and /dev/null differ diff --git "a/vignettes/s1/s1_sounds/1_s1_\304\261.wav" "b/vignettes/s1/s1_sounds/1_s1_\304\261.wav" deleted file mode 100644 index f73c1cf..0000000 Binary files "a/vignettes/s1/s1_sounds/1_s1_\304\261.wav" and /dev/null differ diff --git "a/vignettes/s1/s1_sounds/2_s1_\303\246.wav" "b/vignettes/s1/s1_sounds/2_s1_\303\246.wav" deleted file mode 100644 index 38a2bbd..0000000 Binary files "a/vignettes/s1/s1_sounds/2_s1_\303\246.wav" and /dev/null differ diff --git "a/vignettes/s1/s1_sounds/3_s1_\311\222.wav" "b/vignettes/s1/s1_sounds/3_s1_\311\222.wav" deleted file mode 100644 index 61d6057..0000000 Binary files "a/vignettes/s1/s1_sounds/3_s1_\311\222.wav" and /dev/null differ diff --git a/vignettes/s1/s1_tip.png b/vignettes/s1/s1_tip.png deleted file mode 100644 index 96fd72b..0000000 Binary files a/vignettes/s1/s1_tip.png and /dev/null differ diff --git a/vignettes/unnamed-chunk-23-1.png b/vignettes/unnamed-chunk-23-1.png index b59210a..6f9faac 100644 Binary files a/vignettes/unnamed-chunk-23-1.png and b/vignettes/unnamed-chunk-23-1.png differ diff --git a/vignettes/unnamed-chunk-25-1.png b/vignettes/unnamed-chunk-25-1.png index dd2519e..4fccc73 100644 Binary files a/vignettes/unnamed-chunk-25-1.png and b/vignettes/unnamed-chunk-25-1.png differ diff --git a/vignettes/unnamed-chunk-27-1.png b/vignettes/unnamed-chunk-27-1.png index 6e72968..dc7bcb8 100644 Binary files a/vignettes/unnamed-chunk-27-1.png and b/vignettes/unnamed-chunk-27-1.png differ diff --git a/vignettes/unnamed-chunk-29-1.png b/vignettes/unnamed-chunk-29-1.png index fdf685f..766d8ed 100644 Binary files a/vignettes/unnamed-chunk-29-1.png and b/vignettes/unnamed-chunk-29-1.png differ diff --git a/vignettes/unnamed-chunk-31-1.png b/vignettes/unnamed-chunk-31-1.png index f5859f2..766d8ed 100644 Binary files a/vignettes/unnamed-chunk-31-1.png and b/vignettes/unnamed-chunk-31-1.png differ diff --git a/vignettes/unnamed-chunk-40-1.png b/vignettes/unnamed-chunk-40-1.png deleted file mode 100644 index 19baf4a..0000000 Binary files a/vignettes/unnamed-chunk-40-1.png and /dev/null differ diff --git a/vignettes/unnamed-chunk-41-1.png b/vignettes/unnamed-chunk-41-1.png index e60191f..9dfa83f 100644 Binary files a/vignettes/unnamed-chunk-41-1.png and b/vignettes/unnamed-chunk-41-1.png differ diff --git a/vignettes/unnamed-chunk-42-1.png b/vignettes/unnamed-chunk-42-1.png index f59cd46..a675bb7 100644 Binary files a/vignettes/unnamed-chunk-42-1.png and b/vignettes/unnamed-chunk-42-1.png differ