tdhock · tdhock · Aug 16, 2024 · Aug 15, 2024 · Aug 15, 2024 · Aug 15, 2024
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -3,7 +3,7 @@ Authors@R: c(
     person("Toby", "Hocking",
      email="toby.hocking@r-project.org",
      role=c("aut", "cre")))
-Version: 2024.7.16
+Version: 2024.8.15
 License: GPL-3
 Title: Named Capture to Data Tables
 Description: User-friendly functions for extracting a data

diff --git a/NEWS b/NEWS
@@ -1,3 +1,7 @@
+Changes in version 2024.8.15
+
+- bugfix: using alternatives_with_shared_groups with nc::capture_first_vec now works on a subject of length=1.
+
 Changes in version 2024.7.16
 
 - arrow::arrow_with_dataset() to avoid CRAN NOTE.

diff --git a/R/alternatives.R b/R/alternatives.R
@@ -121,6 +121,17 @@ alternatives_with_shared_groups <- structure(function
 ### list).
 }, ex=function(){
 
+  ## Example 0: matching family and given names.
+  nc::capture_first_vec(
+    c("Toby Dylan Hocking","Hocking, Toby Dylan"),
+    nc::alternatives_with_shared_groups(
+      family="[A-Z][a-z]+",
+      given="[^,]+",
+      list(given, " ", family),
+      list(family, ", ", given)
+    )
+  )
+
   ## Example 1: matching dates in different formats, but always same
   ## type in each alternative.
   subject.vec <- c("mar 17, 1983", "26 sep 2017", "17 mar 1984")

diff --git a/R/apply_type_funs.R b/R/apply_type_funs.R
@@ -46,7 +46,7 @@ apply_type_funs <- function
     if(1 < length(dup.type.tab)){
       stop(domain=NA, gettextf("capture groups with identical names should have conversion functions that all return the same type; problem group name=%s has types %s", dup.name, paste(names(dup.type.tab), collapse = ",")))
     }
-    is.match.name <- is.match[, dup.col.indices]
+    is.match.name <- is.match[, dup.col.indices, drop=FALSE]
     if(any(1 < rowSums(is.match.name))){
       stop(domain=NA, gettextf("duplicate capture group names are only allowed in alternatives, problem: %s", dup.name))
     }

diff --git a/man/alternatives_with_shared_groups.Rd b/man/alternatives_with_shared_groups.Rd
@@ -21,6 +21,17 @@ list).}
 
 \examples{
 
+## Example 0: matching family and given names.
+nc::capture_first_vec(
+  c("Toby Dylan Hocking","Hocking, Toby Dylan"),
+  nc::alternatives_with_shared_groups(
+    family="[A-Z][a-z]+",
+    given="[^,]+",
+    list(given, " ", family),
+    list(family, ", ", given)
+  )
+)
+
 ## Example 1: matching dates in different formats, but always same
 ## type in each alternative.
 subject.vec <- c("mar 17, 1983", "26 sep 2017", "17 mar 1984")

diff --git a/man/capture_first_glob.Rd b/man/capture_first_glob.Rd
@@ -49,7 +49,7 @@ data.chunk.pattern <- list(
 (data.chunk.dt <- nc::capture_first_glob(glob, data.chunk.pattern, READ=read.bedGraph))
 
 ## Write same data set in Hive partition, then re-read.
-if(requireNamespace("arrow")){
+if(requireNamespace("arrow") && arrow::arrow_with_dataset()){
   path <- tempfile()
   max_rows_per_file <- if(interactive())3 else 1000
   arrow::write_dataset(

diff --git a/tests/testthat/test-CRAN-alternatives.R b/tests/testthat/test-CRAN-alternatives.R
@@ -145,7 +145,7 @@ test_that("altlist + alternatives with names ok", {
   expect_identical(match.dt[["year"]], c("1983", "2017", "1984"))
 })
 
-test_that("alternatives_with_shared_groups ok", {
+test_that("alternatives_with_shared_groups ok with 3 subjects", {
   subject.vec <- c("mar 17, 1983", "26 sep 2017", "17 mar 1984")
   pattern <- nc::alternatives_with_shared_groups(
     month="[a-z]{3}", day="[0-9]{2}", year="[0-9]{4}",
@@ -158,3 +158,17 @@ test_that("alternatives_with_shared_groups ok", {
   expect_identical(match.dt[["day"]], c("17", "26", "17"))
   expect_identical(match.dt[["year"]], c("1983", "2017", "1984"))
 })
+
+test_that("alternatives_with_shared_groups ok with 1 subject", {
+  subject.vec <- "mar 17, 1983"
+  pattern <- nc::alternatives_with_shared_groups(
+    month="[a-z]{3}", day="[0-9]{2}", year="[0-9]{4}",
+    list(month, " ", day, ", ", year),
+    list(day, " ", month, " ", year))
+  match.dt <- nc::capture_first_vec(subject.vec, pattern)
+  sorted.names <- c("day", "month", "year")
+  expect_identical(sort(names(match.dt)), sorted.names)
+  expect_identical(match.dt[["month"]], "mar")
+  expect_identical(match.dt[["day"]], "17")
+  expect_identical(match.dt[["year"]], "1983")
+})