Use ngettext() for some plural messages in R (#6476)

* ngettext() for R-side plural messages * Fix test references * Find and switch more ngettext candidates in R * missing ',' * Another round of tests fixes * Remove ':' from message too in test
Rdatatable · Sep 11, 2024 · c581ffa · c581ffa
1 parent 8cc2dd1
commit c581ffa
Show file tree

Hide file tree

Showing 12 changed files with 86 additions and 63 deletions.
diff --git a/R/bmerge.R b/R/bmerge.R
@@ -173,7 +173,8 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
       xo = forderv(nqx, c(ncol(nqx), xcols))
       if (verbose) {catf("done in %s\n",timetaken(last.started.at)); flush.console()}
     } else nqgrp = integer(0L)
-    if (verbose) catf("  Found %d non-equi group(s) ...\n", nqmaxgrp)
+    if (verbose)
+      catf(ngettext(nqmaxgrp, "  Found %d non-equi group ...\n", "  Found %d non-equi groups ...\n"), nqmaxgrp, domain=NA)
   }
 
   if (verbose) {last.started.at=proc.time();catf("Starting bmerge ...\n");flush.console()}

diff --git a/R/data.table.R b/R/data.table.R
@@ -715,7 +715,9 @@ replace_dot_alias = function(e) {
       if (is.factor(j)) j = as.character(j)  # fix for FR: #358
       if (is.character(j)) {
         if (notj) {
-          if (anyNA(idx <- chmatch(j, names_x))) warningf("column(s) not removed because not found: %s", brackify(j[is.na(idx)]))
+          if (anyNA(idx <- chmatch(j, names_x)))
+            warningf(ngettext(sum(is.na(idx)), "column not removed because not found: %s", "columns not removed because not found: %s"),
+                     brackify(j[is.na(idx)]), domain=NA)
           # all duplicates of the name in names(x) must be removed; e.g. data.table(x=1, y=2, x=3)[, !"x"] should just output 'y'.
           w = !names_x %chin% j
           ansvars = names_x[w]
@@ -729,7 +731,8 @@ replace_dot_alias = function(e) {
         if (!length(ansvals)) return(null.data.table())
         if (!length(leftcols)) {
           if (!anyNA(ansvals)) return(.Call(CsubsetDT, x, irows, ansvals))
-          else stopf("column(s) not found: %s", brackify(ansvars[is.na(ansvals)]))
+          else stopf(ngettext(sum(is.na(ansvals)), "column not found: %s", "columns not found: %s"),
+                     brackify(ansvars[is.na(ansvals)]), domain=NA)
         }
         # else the NA in ansvals are for join inherited scope (test 1973), and NA could be in irows from join and data in i should be returned (test 1977)
         #   in both cases leave to the R-level subsetting of i and x together further below
@@ -895,7 +898,10 @@ replace_dot_alias = function(e) {
           }
         }
         tt = lengths(byval)
-        if (any(tt!=xnrow)) stopf("The items in the 'by' or 'keyby' list are length(s) %s. Each must be length %d; the same length as there are rows in x (after subsetting if i is provided).", brackify(tt), xnrow)
+        if (any(tt!=xnrow)) {
+          plural_part <- sprintf(ngettext(length(tt), "The item in the 'by' or 'keyby' list is length %s.", "The items in the 'by' or 'keyby' list have lengths %s."), brackify(tt))
+          stopf("%s Each must be length %d; the same length as there are rows in x (after subsetting if i is provided).", plural_part, xnrow)
+        }
         if (is.null(bynames)) bynames = rep.int("",length(byval))
         if (length(idx <- which(!nzchar(bynames))) && !bynull) {
           # TODO: improve this and unify auto-naming of jsub and bysub
@@ -2731,8 +2737,8 @@ setnames = function(x,old,new,skip_absent=FALSE) {
 
 setcolorder = function(x, neworder=key(x), before=NULL, after=NULL)  # before/after #4358
 {
-  if (is.character(neworder) && anyDuplicated(names(x)))
-    stopf("x has some duplicated column name(s): %s. Please remove or rename the duplicate(s) and try again.", brackify(unique(names(x)[duplicated(names(x))])))
+  if (is.character(neworder))
+    check_duplicate_names(x)
   if (!is.null(before) && !is.null(after))
     stopf("Provide either before= or after= but not both")
   if (length(before)>1L || length(after)>1L)

diff --git a/R/fcast.R b/R/fcast.R
@@ -192,7 +192,7 @@ dcast.data.table = function(data, formula, fun.aggregate = NULL, sep = "_", ...,
     fun.call = aggregate_funs(fun.call, lvals, sep, ...)
     maybe_err = function(list.of.columns) {
       if (!all(lengths(list.of.columns) == 1L)) {
-        msg <- gettext("Aggregating function(s) should take a vector as input and return a single value (length=1), but they do not, so the result is undefined. Please fix by modifying your function so that a single value is always returned.")
+        msg <- gettext("Aggregating functions should take a vector as input and return a single value (length=1), but they do not, so the result is undefined. Please fix by modifying your function so that a single value is always returned.")
         if (is.null(fill)) { # TODO change to always stopf #6329
           stop(msg, domain=NA, call. = FALSE)
         } else {

diff --git a/R/fmelt.R b/R/fmelt.R
@@ -28,7 +28,7 @@ patterns = function(..., cols=character(0L), ignore.case=FALSE, perl=FALSE, fixe
     stopf("Input patterns must be of type character.")
   matched = lapply(p, grep, cols, ignore.case=ignore.case, perl=perl, fixed=fixed, useBytes=useBytes)
   if (length(idx <- which(lengths(matched) == 0L)))
-    stopf('Pattern(s) not found: [%s]', brackify(p[idx]))
+    stopf(ngettext(length(idx), 'Pattern not found: [%s]', 'Patterns not found: [%s]'), brackify(p[idx]), domain=NA)
   if (length(matched) == 1L) return(matched[[1L]])
   matched
 }

diff --git a/R/foverlaps.R b/R/foverlaps.R
@@ -44,10 +44,11 @@ foverlaps = function(x, y, by.x=if (!is.null(key(x))) key(x) else key(y), by.y=k
     stopf("Duplicate columns are not allowed in overlap joins. This may change in the future.")
   if (length(by.x) != length(by.y))
     stopf("length(by.x) != length(by.y). Columns specified in by.x should correspond to columns specified in by.y and should be of same lengths.")
-  if (any(dup.x<-duplicated(names(x)))) #1730 - handling join possible but would require workarounds on setcolorder further, it is really better just to rename dup column
-    stopf("%s has some duplicated column name(s): %s. Please remove or rename the duplicate(s) and try again.", "x", brackify(unique(names(x)[dup.x])))
-  if (any(dup.y<-duplicated(names(y))))
-    stopf("%s has some duplicated column name(s): %s. Please remove or rename the duplicate(s) and try again.", "y", brackify(unique(names(y)[dup.y])))
+
+  #1730 - handling join possible but would require workarounds on setcolorder further, it is really better just to rename dup column
+  check_duplicate_names(x)
+  check_duplicate_names(y)
+
   xnames = by.x; xintervals = tail(xnames, 2L)
   ynames = by.y; yintervals = tail(ynames, 2L)
   xval1 = x[[xintervals[1L]]]; xval2 = x[[xintervals[2L]]]

diff --git a/R/fread.R b/R/fread.R
@@ -326,7 +326,9 @@ yaml=FALSE, autostart=NA, tmpdir=tempdir(), tz="UTC")
     } else {
       cols_to_factor = which(vapply_1b(ans, is.character))
     }
-    if (verbose) catf("stringsAsFactors=%s converted %d column(s): %s\n", stringsAsFactors, length(cols_to_factor), brackify(names(ans)[cols_to_factor]))
+    if (verbose)
+      catf(ngettext(length(cols_to_factor), "stringsAsFactors=%s converted %d column: %s\n", "stringsAsFactors=%s converted %d columns: %s\n"),
+           stringsAsFactors, length(cols_to_factor), brackify(names(ans)[cols_to_factor]), domain=NA)
     for (j in cols_to_factor) set(ans, j=j, value=as_factor(.subset2(ans, j)))
   }
 

diff --git a/R/merge.R b/R/merge.R
@@ -21,10 +21,11 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
     else
       warningf("Input data.table '%s' has no columns.", "y")
   }
+  check_duplicate_names(x)
+  check_duplicate_names(y)
+
   nm_x = names(x)
   nm_y = names(y)
-  if (anyDuplicated(nm_x)) stopf("%s has some duplicated column name(s): %s. Please remove or rename the duplicate(s) and try again.", "x", brackify(nm_x[duplicated(nm_x)]))
-  if (anyDuplicated(nm_y)) stopf("%s has some duplicated column name(s): %s. Please remove or rename the duplicate(s) and try again.", "y", brackify(nm_y[duplicated(nm_y)]))
 
   ## set up 'by'/'by.x'/'by.y'
   if ( (!is.null(by.x) || !is.null(by.y)) && length(by.x)!=length(by.y) )

diff --git a/R/print.data.table.R b/R/print.data.table.R
@@ -283,7 +283,8 @@ trunc_cols_message = function(not_printed, abbs, class, col.names){
   n = length(not_printed)
   if (class && col.names != "none") classes = paste0(" ", tail(abbs, n)) else classes = ""
   catf(
-    "%d variable(s) not shown: %s\n",
-    n, brackify(paste0(not_printed, classes))
+    ngettext(n, "%d variable not shown: %s\n", "%d variables not shown: %s\n"),
+    n, brackify(paste0(not_printed, classes)),
+    domain=NA
   )
 }
diff --git a/R/setops.R b/R/setops.R
@@ -42,7 +42,9 @@ funique = function(x) {
   if (!identical(names(x), names(y))) stopf("x and y must have the same column order")
   bad_types = c("raw", "complex", if (block_list) "list")
   found = bad_types %chin% c(vapply_1c(x, typeof), vapply_1c(y, typeof))
-  if (any(found)) stopf("unsupported column type(s) found in x or y: %s", brackify(bad_types[found]))
+  if (any(found))
+    stopf(ngettext(sum(found), "unsupported column type found in x or y: %s", "unsupported column types found in x or y: %s"),
+          brackify(bad_types[found]), domain=NA)
   super = function(x) {
     # allow character->factor and integer->numeric because from v1.12.4 i's type is retained by joins, #3820
     ans = class(x)[1L]

diff --git a/R/test.data.table.R b/R/test.data.table.R
@@ -252,8 +252,8 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F
   if (nfail > 0L) {
     # nocov start
     stopf(
-      "%d error(s) out of %d. Search %s for test number(s) %s. Duration: %s.",
-      nfail, ntest, names(fn), toString(env$whichfail), timetaken(env$started.at)
+      ngettext(nfail, "%d error out of %d. Search %s for test number %s. Duration: %s.", "%d errors out of %d. Search %s for test numbers %s. Duration: %s."),
+      nfail, ntest, names(fn), toString(env$whichfail), timetaken(env$started.at), domain=NA
     )
     # important to stopf() here, so that 'R CMD check' fails
     # nocov end

diff --git a/R/utils.R b/R/utils.R
@@ -30,6 +30,15 @@ internal_error = function(...) {
   stop(e, call. = FALSE, domain = NA)
 }
 
+check_duplicate_names = function(x, table_name=deparse(substitute(x))) {
+  if (!anyDuplicated(nm <- names(x))) return(invisible())
+  duplicate_names = unique(nm[duplicated(nm)])
+  stopf(ngettext(length(duplicate_names),
+                  "%s has duplicated column name %s. Please remove or rename the duplicate and try again.",
+                  "%s has duplicated column names %s. Please remove or rename the duplicates and try again."),
+        table_name, brackify(duplicate_names), domain=NA)
+}
+
 # TODO(R>=4.0.0): Remove this workaround. From R 4.0.0, rep_len() dispatches rep.Date(), which we need.
 #   Before that, rep_len() strips attributes --> breaks data.table()'s internal recycle() helper.
 #   This also impacts test 2 in S4.Rraw, because the error message differs for rep.int() vs. rep_len().