Skip to content

Commit

Permalink
Merge branch 'excel_time_to_numeric' of github.com:billdenney/janitor…
Browse files Browse the repository at this point in the history
… into excel_time_to_numeric

# Conflicts:
#	R/excel_time_to_numeric.R
  • Loading branch information
billdenney committed Nov 1, 2023
2 parents 88bcc3b + 46678a6 commit 4970d44
Show file tree
Hide file tree
Showing 2 changed files with 89 additions and 83 deletions.
69 changes: 38 additions & 31 deletions R/excel_time_to_numeric.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,12 @@
#' @family Date-time cleaning
#' @seealso \code{\link{excel_numeric_to_date}}
#' @export
excel_time_to_numeric <- function(time_value, round_seconds=TRUE) {
excel_time_to_numeric <- function(time_value, round_seconds = TRUE) {
UseMethod("excel_time_to_numeric")
}

#' @export
excel_time_to_numeric.logical <- function(time_value, round_seconds=TRUE) {
excel_time_to_numeric.logical <- function(time_value, round_seconds = TRUE) {
if (all(is.na(time_value))) {
rep(NA_real_, length(time_value))
} else {
Expand All @@ -36,11 +36,11 @@ excel_time_to_numeric.logical <- function(time_value, round_seconds=TRUE) {
}

#' @export
excel_time_to_numeric.numeric <- function(time_value, round_seconds=TRUE) {
excel_time_to_numeric.numeric <- function(time_value, round_seconds = TRUE) {
if (all(is.na(time_value) |
time_value >= 0 &
time_value < 1)) {
seconds <- time_value*86400
time_value >= 0 &
time_value < 1)) {
seconds <- time_value * 86400
if (round_seconds) {
seconds <- round(seconds)
}
Expand All @@ -51,9 +51,9 @@ excel_time_to_numeric.numeric <- function(time_value, round_seconds=TRUE) {
}

#' @export
excel_time_to_numeric.POSIXct <- function(time_value, round_seconds=TRUE) {
excel_time_to_numeric.POSIXct <- function(time_value, round_seconds = TRUE) {
# using trunc removes timezone inconsistency. Timezones aren't used in Excel.
seconds <- as.numeric(time_value) - as.numeric(trunc(time_value, units="days"))
seconds <- as.numeric(time_value) - as.numeric(trunc(time_value, units = "days"))
mask_good_seconds <- is.na(seconds) | (seconds >= 0 & seconds < 86400)
if (all(mask_good_seconds)) {
if (round_seconds) {
Expand All @@ -67,37 +67,37 @@ excel_time_to_numeric.POSIXct <- function(time_value, round_seconds=TRUE) {
}

#' @export
excel_time_to_numeric.POSIXlt <- function(time_value, round_seconds=TRUE) {
excel_time_to_numeric.POSIXlt <- function(time_value, round_seconds = TRUE) {
excel_time_to_numeric.POSIXct(
as.POSIXct(time_value),
round_seconds = round_seconds
)
}

#' @export
excel_time_to_numeric.character <- function(time_value, round_seconds=TRUE) {
excel_time_to_numeric.character <- function(time_value, round_seconds = TRUE) {
ret <- rep(NA_real_, length(time_value))
patterns <-
list(
number="^0(\\.[0-9]*)?$",
number = "^0(\\.[0-9]*)?$",
# SI numbers have to have the form [number]E-[number] becasue the number
# has to be between 0 and 1 and can't be bigger than 1.
si_number="^[1-9](\\.[0-9]*)?E-[0-9]+$",
"12hr"="^([0]?[1-9]|1[0-2]):([0-5][0-9])(?::([0-5][0-9]))? ?([AP]M)$",
"24hr"="^([0-1]?[0-9]|2[0-3]):([0-5][0-9])(?::([0-5][0-9]))?$",
si_number = "^[1-9](\\.[0-9]*)?E-[0-9]+$",
"12hr" = "^([0]?[1-9]|1[0-2]):([0-5][0-9])(?::([0-5][0-9]))? ?([AP]M)$",
"24hr" = "^([0-1]?[0-9]|2[0-3]):([0-5][0-9])(?::([0-5][0-9]))?$",
# The ".*?" at the end of POSIX is to allow for a time zone, but it allows
# for imperfect parsing if there were just a date and a space.
# The the entire time is optional to allow for midnight which shows as
# just the date and time zone.
POSIX="1899-12-31 (?:([0-1]?[0-9]|2[0-3]):([0-5][0-9])(?::([0-5][0-9]))?)?.*?$"
POSIX = "1899-12-31 (?:([0-1]?[0-9]|2[0-3]):([0-5][0-9])(?::([0-5][0-9]))?)?.*?$"
)
mask_na <- is.na(time_value)
mask_number <-
grepl(pattern=patterns$number, x=time_value) |
grepl(pattern=patterns$si_number, x=time_value)
mask_POSIX <- grepl(pattern=patterns[["POSIX"]], x=time_value)
mask_12hr <- grepl(pattern=patterns[["12hr"]], x=time_value, ignore.case=TRUE)
mask_24hr <- grepl(pattern=patterns[["24hr"]], x=time_value)
grepl(pattern = patterns$number, x = time_value) |
grepl(pattern = patterns$si_number, x = time_value)
mask_POSIX <- grepl(pattern = patterns[["POSIX"]], x = time_value)
mask_12hr <- grepl(pattern = patterns[["12hr"]], x = time_value, ignore.case = TRUE)
mask_24hr <- grepl(pattern = patterns[["24hr"]], x = time_value)
unmatched <- !(mask_na | mask_number | mask_POSIX | mask_12hr | mask_24hr)
if (any(unmatched)) {
stop(
Expand All @@ -109,46 +109,53 @@ excel_time_to_numeric.character <- function(time_value, round_seconds=TRUE) {
if (any(mask_number)) {
ret[mask_number] <-
excel_time_to_numeric.numeric(
time_value=as.numeric(time_value[mask_number]),
round_seconds=round_seconds
time_value = as.numeric(time_value[mask_number]),
round_seconds = round_seconds
)
}
mask_clock <- mask_12hr | mask_24hr | mask_POSIX
if (any(mask_clock)) {
hours <- minutes <- seconds <- rep(NA_real_, length(time_value))
if (any(mask_POSIX)) {
hours[mask_POSIX] <-
gsub(pattern=patterns$POSIX, replacement="\\1", x=time_value[mask_POSIX])
gsub(pattern = patterns$POSIX, replacement = "\\1", x = time_value[mask_POSIX])
minutes[mask_POSIX] <-
gsub(pattern=patterns$POSIX, replacement="\\2", x=time_value[mask_POSIX])
gsub(pattern = patterns$POSIX, replacement = "\\2", x = time_value[mask_POSIX])
seconds[mask_POSIX] <-
gsub(pattern=patterns$POSIX, replacement="\\3", x=time_value[mask_POSIX])
gsub(pattern = patterns$POSIX, replacement = "\\3", x = time_value[mask_POSIX])
}
if (any(mask_12hr)) {
mask_pm <- rep(FALSE, length(time_value))
hours[mask_12hr] <-
gsub(pattern=patterns[["12hr"]], replacement="\\1", x=time_value[mask_12hr], ignore.case=TRUE)
gsub(pattern = patterns[["12hr"]], replacement = "\\1", x = time_value[mask_12hr], ignore.case = TRUE)
minutes[mask_12hr] <-
gsub(pattern=patterns[["12hr"]], replacement="\\2", x=time_value[mask_12hr], ignore.case=TRUE)
gsub(pattern = patterns[["12hr"]], replacement = "\\2", x = time_value[mask_12hr], ignore.case = TRUE)
seconds[mask_12hr] <-
gsub(pattern=patterns[["12hr"]], replacement="\\3", x=time_value[mask_12hr], ignore.case=TRUE)
gsub(pattern = patterns[["12hr"]], replacement = "\\3", x = time_value[mask_12hr], ignore.case = TRUE)
# 12 is 0 hours in the AM and the PM conversion below adds the needed 12
# at noon.
mask_0_hours <- mask_12hr & (hours %in% "12")
hours[mask_0_hours] <- "0"
mask_pm[mask_12hr] <-
<<<<<<< HEAD
tolower(
gsub(pattern=patterns[["12hr"]], replacement="\\4", x=time_value[mask_12hr], ignore.case=TRUE)
) %in% "pm"
=======
mask_12hr &
tolower(
gsub(pattern = patterns[["12hr"]], replacement = "\\4", x = time_value[mask_12hr], ignore.case = TRUE)
) %in% "pm"
>>>>>>> 46678a6055087327582beda1313ca2585de0a2f8
hours[mask_pm] <- 12 + as.numeric(hours[mask_pm])
}
if (any(mask_24hr)) {
hours[mask_24hr] <-
gsub(pattern=patterns[["24hr"]], replacement="\\1", x=time_value[mask_24hr])
gsub(pattern = patterns[["24hr"]], replacement = "\\1", x = time_value[mask_24hr])
minutes[mask_24hr] <-
gsub(pattern=patterns[["24hr"]], replacement="\\2", x=time_value[mask_24hr])
gsub(pattern = patterns[["24hr"]], replacement = "\\2", x = time_value[mask_24hr])
seconds[mask_24hr] <-
gsub(pattern=patterns[["24hr"]], replacement="\\3", x=time_value[mask_24hr])
gsub(pattern = patterns[["24hr"]], replacement = "\\3", x = time_value[mask_24hr])
}
hours[hours %in% ""] <- "0"
minutes[minutes %in% ""] <- "0"
Expand Down
103 changes: 51 additions & 52 deletions tests/testthat/test-excel_time_to_numeric.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,26 +8,26 @@ test_that("excel_time_to_numeric numbers function correctly", {
})

test_that("excel_time_to_numeric POSIX objects extract the correct part of the time", {
expect_equal(excel_time_to_numeric(as.POSIXct("1899-12-31 08:00")), 8*3600)
expect_equal(excel_time_to_numeric(as.POSIXct("1899-12-31 13:00")), 13*3600)
expect_equal(excel_time_to_numeric(as.POSIXct("1899-12-31 13:05:10")), 13*3600 + 5*60 + 10)
expect_equal(excel_time_to_numeric(as.POSIXct("1899-12-31 08:00")), 8 * 3600)
expect_equal(excel_time_to_numeric(as.POSIXct("1899-12-31 13:00")), 13 * 3600)
expect_equal(excel_time_to_numeric(as.POSIXct("1899-12-31 13:05:10")), 13 * 3600 + 5 * 60 + 10)
})

test_that("excel_time_to_numeric POSIX objects ignore the time zone", {
expect_equal(excel_time_to_numeric(as.POSIXct("1899-12-31 13:00", tz="EST")), 13*3600)
expect_equal(excel_time_to_numeric(as.POSIXct("1899-12-31 13:00", tz="UTC")), 13*3600)
expect_equal(excel_time_to_numeric(as.POSIXct("1899-12-31 13:00", tz = "EST")), 13 * 3600)
expect_equal(excel_time_to_numeric(as.POSIXct("1899-12-31 13:00", tz = "UTC")), 13 * 3600)
expect_equal(
excel_time_to_numeric(
as.POSIXct(c("1899-12-31 13:00", "1899-12-31 13:00"), tz="EST")
as.POSIXct(c("1899-12-31 13:00", "1899-12-31 13:00"), tz = "EST")
),
rep(13*3600, 2)
rep(13 * 3600, 2)
)
})

test_that("excel_time_to_numeric POSIXlt works like POSIXct", {
expect_equal(
excel_time_to_numeric(as.POSIXct("1899-12-31 13:00", tz="EST")),
excel_time_to_numeric(as.POSIXlt("1899-12-31 13:00", tz="EST"))
excel_time_to_numeric(as.POSIXct("1899-12-31 13:00", tz = "EST")),
excel_time_to_numeric(as.POSIXlt("1899-12-31 13:00", tz = "EST"))
)
})

Expand All @@ -43,34 +43,34 @@ test_that("excel_time_to_numeric logical values return as expected", {
})

test_that("excel_time_to_numeric, character strings of numbers work as expected", {
expect_equal(excel_time_to_numeric("0.5"), 12*3600)
expect_equal(excel_time_to_numeric("0.5"), 12 * 3600)
expect_equal(excel_time_to_numeric("0"), 0)
expect_equal(excel_time_to_numeric("0."), 0)
expect_equal(excel_time_to_numeric("0.000000"), 0)
expect_equal(excel_time_to_numeric("0.00001"), 1)
expect_equal(
excel_time_to_numeric("0.00001", round_seconds=FALSE),
0.00001*86400
excel_time_to_numeric("0.00001", round_seconds = FALSE),
0.00001 * 86400
)
# Confirm scientific notation values
expect_equal(
excel_time_to_numeric("2.9166666666666664E-2", round_seconds=TRUE),
excel_time_to_numeric("2.9166666666666664E-2", round_seconds = TRUE),
2520
)
})

test_that("excel_time_to_numeric, am/pm times work", {
expect_equal(excel_time_to_numeric("8:00am"), 8*3600)
expect_equal(excel_time_to_numeric("8:00pm"), 20*3600)
expect_equal(excel_time_to_numeric("8:10am"), 8*3600 + 10*60)
expect_equal(excel_time_to_numeric("8:10:05am"), 8*3600 + 10*60 + 5)
expect_equal(excel_time_to_numeric("8:00am"), 8 * 3600)
expect_equal(excel_time_to_numeric("8:00pm"), 20 * 3600)
expect_equal(excel_time_to_numeric("8:10am"), 8 * 3600 + 10 * 60)
expect_equal(excel_time_to_numeric("8:10:05am"), 8 * 3600 + 10 * 60 + 5)
expect_equal(
excel_time_to_numeric("12:10:05am"), 10*60 + 5,
info="After midnight is treated as 0 not 12."
excel_time_to_numeric("12:10:05am"), 10 * 60 + 5,
info = "After midnight is treated as 0 not 12."
)
expect_equal(
excel_time_to_numeric("12:10:05pm"), 12*3600 + 10*60 + 5,
info="After noon is treated as 12."
excel_time_to_numeric("12:10:05pm"), 12 * 3600 + 10 * 60 + 5,
info = "After noon is treated as 12."
)
# Test mixed AM/PM and 24-hour clock values
expect_equal(
Expand Down Expand Up @@ -99,45 +99,45 @@ test_that("excel_time_to_numeric, am/pm times work case insensitively and with s
})

test_that("excel_time_to_numeric, 24-hour times work (zero-padded hours or not)", {
expect_equal(excel_time_to_numeric("8:00"), 8*3600)
expect_equal(excel_time_to_numeric("08:00"), 8*3600)
expect_equal(excel_time_to_numeric("08:10"), 8*3600 + 10*60)
expect_equal(excel_time_to_numeric("8:10:05"), 8*3600 + 10*60 + 5)
expect_equal(excel_time_to_numeric("21:05"), 21*3600 + 5*60)
expect_equal(excel_time_to_numeric("21:05:20"), 21*3600 + 5*60 + 20)
expect_equal(excel_time_to_numeric("8:00"), 8 * 3600)
expect_equal(excel_time_to_numeric("08:00"), 8 * 3600)
expect_equal(excel_time_to_numeric("08:10"), 8 * 3600 + 10 * 60)
expect_equal(excel_time_to_numeric("8:10:05"), 8 * 3600 + 10 * 60 + 5)
expect_equal(excel_time_to_numeric("21:05"), 21 * 3600 + 5 * 60)
expect_equal(excel_time_to_numeric("21:05:20"), 21 * 3600 + 5 * 60 + 20)
})

test_that("excel_time_to_numeric, 24-hour times work (zero-padded hours or not)", {
expect_equal(excel_time_to_numeric("8:00"), 8*3600)
expect_equal(excel_time_to_numeric("08:00"), 8*3600)
expect_equal(excel_time_to_numeric("08:10"), 8*3600 + 10*60)
expect_equal(excel_time_to_numeric("8:10:05"), 8*3600 + 10*60 + 5)
expect_equal(excel_time_to_numeric("21:05"), 21*3600 + 5*60)
expect_equal(excel_time_to_numeric("0:05"), 5*60)
expect_equal(excel_time_to_numeric("00:05"), 5*60)
expect_equal(excel_time_to_numeric("21:05:20"), 21*3600 + 5*60 + 20)
expect_equal(excel_time_to_numeric("8:00"), 8 * 3600)
expect_equal(excel_time_to_numeric("08:00"), 8 * 3600)
expect_equal(excel_time_to_numeric("08:10"), 8 * 3600 + 10 * 60)
expect_equal(excel_time_to_numeric("8:10:05"), 8 * 3600 + 10 * 60 + 5)
expect_equal(excel_time_to_numeric("21:05"), 21 * 3600 + 5 * 60)
expect_equal(excel_time_to_numeric("0:05"), 5 * 60)
expect_equal(excel_time_to_numeric("00:05"), 5 * 60)
expect_equal(excel_time_to_numeric("21:05:20"), 21 * 3600 + 5 * 60 + 20)
})

test_that("excel_time_to_numeric, POSIX times on 1899-12-31 work", {
expect_equal(excel_time_to_numeric("1899-12-31 8:00"), 8*3600)
expect_equal(excel_time_to_numeric("1899-12-31 08:00"), 8*3600)
expect_equal(excel_time_to_numeric("1899-12-31 08:10"), 8*3600 + 10*60)
expect_equal(excel_time_to_numeric("1899-12-31 8:10:05"), 8*3600 + 10*60 + 5)
expect_equal(excel_time_to_numeric("1899-12-31 21:05"), 21*3600 + 5*60)
expect_equal(excel_time_to_numeric("1899-12-31 0:05"), 5*60)
expect_equal(excel_time_to_numeric("1899-12-31 00:05"), 5*60)
expect_equal(excel_time_to_numeric("1899-12-31 21:05:20"), 21*3600 + 5*60 + 20)
expect_equal(excel_time_to_numeric("1899-12-31 8:00"), 8 * 3600)
expect_equal(excel_time_to_numeric("1899-12-31 08:00"), 8 * 3600)
expect_equal(excel_time_to_numeric("1899-12-31 08:10"), 8 * 3600 + 10 * 60)
expect_equal(excel_time_to_numeric("1899-12-31 8:10:05"), 8 * 3600 + 10 * 60 + 5)
expect_equal(excel_time_to_numeric("1899-12-31 21:05"), 21 * 3600 + 5 * 60)
expect_equal(excel_time_to_numeric("1899-12-31 0:05"), 5 * 60)
expect_equal(excel_time_to_numeric("1899-12-31 00:05"), 5 * 60)
expect_equal(excel_time_to_numeric("1899-12-31 21:05:20"), 21 * 3600 + 5 * 60 + 20)
})

test_that("excel_time_to_numeric, POSIX times ignore extra text (which is hopefully a time zone)", {
expect_equal(excel_time_to_numeric("1899-12-31 8:00 foo"), 8*3600)
expect_equal(excel_time_to_numeric("1899-12-31 08:00 foo"), 8*3600)
expect_equal(excel_time_to_numeric("1899-12-31 08:10 foo"), 8*3600 + 10*60)
expect_equal(excel_time_to_numeric("1899-12-31 8:10:05 foo"), 8*3600 + 10*60 + 5)
expect_equal(excel_time_to_numeric("1899-12-31 21:05 foo"), 21*3600 + 5*60)
expect_equal(excel_time_to_numeric("1899-12-31 0:05 foo"), 5*60)
expect_equal(excel_time_to_numeric("1899-12-31 00:05 foo"), 5*60)
expect_equal(excel_time_to_numeric("1899-12-31 21:05:20 foo"), 21*3600 + 5*60 + 20)
expect_equal(excel_time_to_numeric("1899-12-31 8:00 foo"), 8 * 3600)
expect_equal(excel_time_to_numeric("1899-12-31 08:00 foo"), 8 * 3600)
expect_equal(excel_time_to_numeric("1899-12-31 08:10 foo"), 8 * 3600 + 10 * 60)
expect_equal(excel_time_to_numeric("1899-12-31 8:10:05 foo"), 8 * 3600 + 10 * 60 + 5)
expect_equal(excel_time_to_numeric("1899-12-31 21:05 foo"), 21 * 3600 + 5 * 60)
expect_equal(excel_time_to_numeric("1899-12-31 0:05 foo"), 5 * 60)
expect_equal(excel_time_to_numeric("1899-12-31 00:05 foo"), 5 * 60)
expect_equal(excel_time_to_numeric("1899-12-31 21:05:20 foo"), 21 * 3600 + 5 * 60 + 20)
})

test_that("excel_time_to_numeric, POSIX times treat no time as midnight but only if there is a space indicating a mostly-well-formed date-time object.", {
Expand All @@ -160,4 +160,3 @@ test_that("excel_time_to_numeric, invalid character times trigger an error", {
expect_error(excel_time_to_numeric("23:05:90"))
expect_error(excel_time_to_numeric("1899-12-30 21:05:20"))
})

0 comments on commit 4970d44

Please sign in to comment.