Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ Authors@R: c(
person("Toby", "Hocking",
email="toby.hocking@r-project.org",
role=c("aut", "cre")))
Version: 2024.7.16
Version: 2024.8.15
License: GPL-3
Title: Named Capture to Data Tables
Description: User-friendly functions for extracting a data
Expand Down
4 changes: 4 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
Changes in version 2024.8.15

- bugfix: using alternatives_with_shared_groups with nc::capture_first_vec now works on a subject of length=1.

Changes in version 2024.7.16

- arrow::arrow_with_dataset() to avoid CRAN NOTE.
Expand Down
11 changes: 11 additions & 0 deletions R/alternatives.R
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,17 @@ alternatives_with_shared_groups <- structure(function
### list).
}, ex=function(){

## Example 0: matching family and given names.
nc::capture_first_vec(
c("Toby Dylan Hocking","Hocking, Toby Dylan"),
nc::alternatives_with_shared_groups(
family="[A-Z][a-z]+",
given="[^,]+",
list(given, " ", family),
list(family, ", ", given)
)
)

## Example 1: matching dates in different formats, but always same
## type in each alternative.
subject.vec <- c("mar 17, 1983", "26 sep 2017", "17 mar 1984")
Expand Down
2 changes: 1 addition & 1 deletion R/apply_type_funs.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ apply_type_funs <- function
if(1 < length(dup.type.tab)){
stop(domain=NA, gettextf("capture groups with identical names should have conversion functions that all return the same type; problem group name=%s has types %s", dup.name, paste(names(dup.type.tab), collapse = ",")))
}
is.match.name <- is.match[, dup.col.indices]
is.match.name <- is.match[, dup.col.indices, drop=FALSE]
if(any(1 < rowSums(is.match.name))){
stop(domain=NA, gettextf("duplicate capture group names are only allowed in alternatives, problem: %s", dup.name))
}
Expand Down
11 changes: 11 additions & 0 deletions man/alternatives_with_shared_groups.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,17 @@ list).}

\examples{

## Example 0: matching family and given names.
nc::capture_first_vec(
c("Toby Dylan Hocking","Hocking, Toby Dylan"),
nc::alternatives_with_shared_groups(
family="[A-Z][a-z]+",
given="[^,]+",
list(given, " ", family),
list(family, ", ", given)
)
)

## Example 1: matching dates in different formats, but always same
## type in each alternative.
subject.vec <- c("mar 17, 1983", "26 sep 2017", "17 mar 1984")
Expand Down
2 changes: 1 addition & 1 deletion man/capture_first_glob.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ data.chunk.pattern <- list(
(data.chunk.dt <- nc::capture_first_glob(glob, data.chunk.pattern, READ=read.bedGraph))

## Write same data set in Hive partition, then re-read.
if(requireNamespace("arrow")){
if(requireNamespace("arrow") && arrow::arrow_with_dataset()){
path <- tempfile()
max_rows_per_file <- if(interactive())3 else 1000
arrow::write_dataset(
Expand Down
16 changes: 15 additions & 1 deletion tests/testthat/test-CRAN-alternatives.R
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ test_that("altlist + alternatives with names ok", {
expect_identical(match.dt[["year"]], c("1983", "2017", "1984"))
})

test_that("alternatives_with_shared_groups ok", {
test_that("alternatives_with_shared_groups ok with 3 subjects", {
subject.vec <- c("mar 17, 1983", "26 sep 2017", "17 mar 1984")
pattern <- nc::alternatives_with_shared_groups(
month="[a-z]{3}", day="[0-9]{2}", year="[0-9]{4}",
Expand All @@ -158,3 +158,17 @@ test_that("alternatives_with_shared_groups ok", {
expect_identical(match.dt[["day"]], c("17", "26", "17"))
expect_identical(match.dt[["year"]], c("1983", "2017", "1984"))
})

test_that("alternatives_with_shared_groups ok with 1 subject", {
subject.vec <- "mar 17, 1983"
pattern <- nc::alternatives_with_shared_groups(
month="[a-z]{3}", day="[0-9]{2}", year="[0-9]{4}",
list(month, " ", day, ", ", year),
list(day, " ", month, " ", year))
match.dt <- nc::capture_first_vec(subject.vec, pattern)
sorted.names <- c("day", "month", "year")
expect_identical(sort(names(match.dt)), sorted.names)
expect_identical(match.dt[["month"]], "mar")
expect_identical(match.dt[["day"]], "17")
expect_identical(match.dt[["year"]], "1983")
})