add create_subannotation() function

ropensci · May 10, 2020 · 1db0a9b · 1db0a9b
1 parent b646387
commit 1db0a9b
Show file tree

Hide file tree

Showing 26 changed files with 438 additions and 252 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -18,4 +18,4 @@ URL: https://CRAN.R-project.org/package=phonfieldwork, https://agricolamz.github
 BugReports: https://github.com/agricolamz/phonfieldwork/issues
 Encoding: UTF-8
 LazyData: true
-RoxygenNote: 7.0.2
+RoxygenNote: 7.1.0
diff --git a/NAMESPACE b/NAMESPACE
@@ -6,6 +6,7 @@ export(concatenate_soundfiles)
 export(create_image_look_up)
 export(create_presentation)
 export(create_sound_play)
+export(create_subannotation)
 export(create_viewer)
 export(df_to_tier)
 export(draw_sound)

diff --git a/NEWS b/NEWS
@@ -1,4 +1,5 @@
-phonfieldwork 0.0.4 (...2020)
+phonfieldwork 0.0.4 (10.05.2020)
+  - add "create_subannotation()" function
 
 phonfieldwork 0.0.3 (07.01.2020)
   - vertically and horisontally center text in presentations created by "create_presentation()"; thx @Pandaklez #1

diff --git a/R/annotate_textgrid.R b/R/annotate_textgrid.R
@@ -19,6 +19,8 @@
 #'
 #' @export
 #'
+#' @importFrom tuneR readWave
+#'
 
 annotate_textgrid <- function(annotation,
                               textgrid,
@@ -34,7 +36,6 @@ annotate_textgrid <- function(annotation,
     tg <- readLines(normalizePath(textgrid))
   }
 
-
 # get start and end info about tiers --------------------------------------
   starts <- grep("item \\[\\d{1,}\\]:", tg)
   ends <- c(starts[-1]-1, length(tg))
@@ -92,7 +93,7 @@ annotate_textgrid <- function(annotation,
   tg[starts[tier_number]:ends[tier_number]] <- w_tier
 
 # write the result TextGrid -----------------------------------------------
-  if (isTRUE(write)) {
+  if (write) {
     writeLines(tg, normalizePath(textgrid))
   } else {
     return(tg)

diff --git a/R/create_subannotation.R b/R/create_subannotation.R
@@ -0,0 +1,57 @@
+#' Create boundaries in a texgrid tier
+#'
+#'
+#' @author George Moroz <[email protected]>
+#'
+#' @param textgrid character with a filename or path to the TextGrid
+#' @param tier value that could be either ordinal number of the tier either name of the tier
+#' @param new_tier_name a name of a new created tier
+#' @param n_of_annotations number of new annotations per annotation to create
+#' @param each non-negative integer. Each new blank annotation is repeated every first, second or ... times
+#' @param omit_blank logical. If TRUE (by dafault) it doesn't create subannotation for empy annotations.
+#' @param overwrite logical. If TRUE (by dafault) it overwrites an existing tier.
+#'
+#' @return a string that contain TextGrid. If argument write is \code{TRUE}, then no output.
+#'
+#' @examples
+#' create_subannotation(textgrid = example_textgrid, tier = 1, overwrite = FALSE)
+#'
+#' @export
+#'
+
+create_subannotation <- function(textgrid,
+                                 tier = 1,
+                                 new_tier_name = "",
+                                 n_of_annotations = 4,
+                                 each = 1,
+                                 omit_blank = TRUE,
+                                 overwrite = TRUE){
+
+# read TextGrid -----------------------------------------------------------
+  if(grepl("TextGrid", textgrid[2])){
+    tg <- textgrid
+  } else{
+    tg <- readLines(normalizePath(textgrid))
+  }
+
+  df <- phonfieldwork::tier_to_df(tg, tier = tier)
+  df
+
+  if(omit_blank){
+    df <- df[df$annotation != "",]
+  }
+
+  lapply(1:nrow(df), function(i){
+    t <- seq(df$start[i], df$end[i], length.out = each*(n_of_annotations+1))
+    data.frame(start = t[-length(t)],
+               end = t[-1])
+  }) ->
+    l
+
+  final <- Reduce(rbind, l)
+  final <- cbind(id = 1:nrow(final), final, annotation = "")
+  phonfieldwork::df_to_tier(final,
+                            textgrid = textgrid,
+                            tier_name = new_tier_name,
+                            overwrite = overwrite)
+}
diff --git a/R/create_viewer.R b/R/create_viewer.R
@@ -29,7 +29,7 @@ create_viewer <- function(audio_dir,
                           tiers = 1,
                           merge_column,
                           caption_column = NULL,
-                          about = "This page were created with the phonfieldworks package for R",
+                          about = "This page were created with the `phonfieldworks` package (Moroz 2019) for R (R Core Team 2020).",
                           output_dir,
                           output_file = "stimuli_viewer",
                           render = TRUE){

diff --git a/R/df_to_tier.R b/R/df_to_tier.R
@@ -70,7 +70,7 @@ df_to_tier <- function(df, textgrid, tier_name = "", overwrite = TRUE){
     paste0(tier_type, ": size = ", nrow(df), " "),
     unlist(all_annotations)
   )
-  if(isTRUE(overwrite)){
+  if(overwrite){
     writeLines(append(tg, add_tier), textgrid)
   } else {
     append(tg, add_tier)

diff --git a/docs/annotation_of_s1.csv b/docs/annotation_of_s1.csv
@@ -1,8 +1,10 @@
-,id,start,end,annotation
-1,1,0,0.108702066839713,
-2,2,0.108702066839713,0.184762881187424,
-3,3,0.184762881187424,0.577955857574024,
-4,4,0.577955857574024,0.689276652082925,
-5,5,0.689276652082925,1.03734416788977,
-6,6,1.03734416788977,1.13213917296352,
-7,7,1.13213917296352,1.39421768707483,
+"","id","start","end","annotation"
+"1",1,0,0.0981938873393881,""
+"2",2,0.0981938873393881,0.180033198124212,"ı"
+"3",3,0.180033198124212,0.482154195011338,""
+"4",4,0.482154195011338,0.578069846032217,""
+"5",5,0.578069846032217,0.691528890529359,"æ"
+"6",6,0.691528890529359,0.912063492063492,""
+"7",7,0.912063492063492,1.03562599269282,""
+"8",8,1.03562599269282,1.13048519382978,"ɒ"
+"9",9,1.13048519382978,1.39421768707483,""
diff --git a/docs/first_example.html b/docs/first_example.html
@@ -3110,7 +3110,7 @@
         <!-- The content of this hgroup is replaced programmatically through the slide_config.json. -->
     <hgroup class="auto-fadein">
       <h1 data-config-title><!-- populated from slide_config.json --></h1>
-      <h2 data-config-subtitle><!-- populated from slide_config.json --></h2>
+
       <p data-config-presenter><!-- populated from slide_config.json --></p>
           </hgroup>
   </slide>

diff --git a/docs/images/03_annotate_textgrid_result.png b/docs/images/03_annotate_textgrid_result.png
diff --git a/docs/images/04_annotate_textgrid_result.png b/docs/images/04_annotate_textgrid_result.png
diff --git a/docs/images/05_annotate_textgrid_result.png b/docs/images/05_annotate_textgrid_result.png
diff --git a/docs/index.Rmd b/docs/index.Rmd
@@ -219,28 +219,38 @@ annotate_textgrid(annotation = my_stimuli_df$stimuli,
 
 As you can see in the example, the `annotate_textgrid()` function creates a backup of the tier and adds a new tier on top of the previous one. It is possible to prevent the function from doing so by setting the `backup` argument to `FALSE`.
 
-It is possible to annotate every second (third, fourth, etc.) interval. Imagine that someone annotated each vowel in the recording, so the TextGrid will look as follows:
+Imagine that we are interested in annotation of vowels. The most common solution will be open Praat and create new annotations. But it is also possible to create them in advance using subannotations. The idea that you choose some baseline tier that later will be automatically cutted into smaller pieces on the other tier.
 
-```{r, include=FALSE}
-annotation <- read.csv("annotation_of_s1.csv")
-annotation$annotation <- ""
-df_to_tier(df = annotation, textgrid = "s1/s1_all.TextGrid") 
+```{r}
+create_subannotation(textgrid = "s1/s1_all.TextGrid", 
+                     tier = 1, # this is a baseline tier
+                     n_of_annotations = 3) # how many empty annotations per unit?
 ```
 
 ![](images/03_annotate_textgrid_result.png)
 
-Now you can use the second column in `my_stimuli_df`, which contains vowels.
+Now we can annotate created tier:
 
 ```{r}
-my_stimuli_df$vowel
-annotate_textgrid(annotation = my_stimuli_df$vowel,
+annotate_textgrid(annotation = c("", "ı", "", "", "æ", "", "", "ɒ", ""),
                   textgrid = "s1/s1_all.TextGrid",
-                  each = 2,
                   tier = 3, 
                   backup = FALSE)
 ```
 
 ![](images/04_annotate_textgrid_result.png)
+
+You can see that we created a third tier with annotation. The only thing left is to move annotation boundaries in Praat (this can not be automated):
+
+```{r, include=FALSE}
+writeLines(readLines("s1/s1_all.TextGrid")[1:44], "s1/s1_all.TextGrid")
+df_to_tier(df = read.csv("annotation_of_s1.csv"), 
+           textgrid = "s1/s1_all.TextGrid", 
+           overwrite = TRUE) 
+```
+
+![](images/05_annotate_textgrid_result.png)
+
 You can see from the last figure that no backup tier was created (`backup = FALSE`), that the third tier was annotated (`tier = 3`), and that an annotation was performed in every second interval (`each = 2`).
 
 ## Extracting your data

diff --git a/docs/index.html b/docs/index.html
diff --git a/docs/s1/s1_all.TextGrid b/docs/s1/s1_all.TextGrid
@@ -4,7 +4,7 @@ Object class = "TextGrid"
 xmin = 0 
 xmax = 1.39421768707483
 tiers? <exists> 
-size = 3 
+size = 4 
 item []: 
     item [1]:
         class = "IntervalTier"
@@ -42,39 +42,45 @@ item []:
         intervals [3]:
             xmin = 0.912063492063492
             xmax = 1.39421768707483
-            text = "s1_top.wav"
-
-    item [3]:
+    item [4]:
         class = "IntervalTier" 
         name = "" 
         xmin = 0
         xmax = 1.39421768707483
-        intervals: size = 7 
+        intervals: size = 9 
         intervals [1]:
             xmin = 0
-            xmax = 0.108702066839713
+            xmax = 0.0981938873393881
             text = "" 
         intervals [2]:
-            xmin = 0.108702066839713
-            xmax = 0.184762881187424
+            xmin = 0.0981938873393881
+            xmax = 0.180033198124212
             text = "ı" 
         intervals [3]:
-            xmin = 0.184762881187424
-            xmax = 0.577955857574024
+            xmin = 0.180033198124212
+            xmax = 0.482154195011338
             text = "" 
         intervals [4]:
-            xmin = 0.577955857574024
-            xmax = 0.689276652082925
-            text = "æ" 
-        intervals [5]:
-            xmin = 0.689276652082925
-            xmax = 1.03734416788977
+            xmin = 0.482154195011338
+            xmax = 0.578069846032217
             text = "" 
+        intervals [5]:
+            xmin = 0.578069846032217
+            xmax = 0.691528890529359
+            text = "æ" 
         intervals [6]:
-            xmin = 1.03734416788977
-            xmax = 1.13213917296352
-            text = "ɒ" 
+            xmin = 0.691528890529359
+            xmax = 0.912063492063492
+            text = "" 
         intervals [7]:
-            xmin = 1.13213917296352
+            xmin = 0.912063492063492
+            xmax = 1.03562599269282
+            text = "" 
+        intervals [8]:
+            xmin = 1.03562599269282
+            xmax = 1.13048519382978
+            text = "ɒ" 
+        intervals [9]:
+            xmin = 1.13048519382978
             xmax = 1.39421768707483
             text = "" 
diff --git a/docs/s1/s1_pics/1_s1_ı.png b/docs/s1/s1_pics/1_s1_ı.png
diff --git a/docs/s1/s1_pics/2_s1_æ.png b/docs/s1/s1_pics/2_s1_æ.png
diff --git a/docs/s1/s1_pics/3_s1_ɒ.png b/docs/s1/s1_pics/3_s1_ɒ.png
diff --git a/docs/s1/s1_sounds/1_s1_ı.wav b/docs/s1/s1_sounds/1_s1_ı.wav
diff --git a/docs/s1/s1_sounds/2_s1_æ.wav b/docs/s1/s1_sounds/2_s1_æ.wav
diff --git a/docs/s1/s1_sounds/3_s1_ɒ.wav b/docs/s1/s1_sounds/3_s1_ɒ.wav
diff --git a/docs/s1/s1_tip.png b/docs/s1/s1_tip.png