Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 71 additions & 5 deletions r/R/type.R
Original file line number Diff line number Diff line change
Expand Up @@ -267,20 +267,28 @@ type.default <- function(x) {
#' * Called from `schema()` or `struct()`, `double()` also is supported as a
#' way of creating a `float64()`
#'
#' @param unit For date/time types, the time unit (day, second, millisecond, etc.)
#' @param timezone For `timestamp()`, an optional time zone.
#' `date32()` creates a datetime type with a "day" unit, like the R `Date`
#' class. `date64()` has a "ms" unit.
#'
#' @param unit For time/timestamp types, the time unit. `time32()` can take
#' either "s" or "ms", while `time64()` can be "us" or "ns". `timestamp()` can
#' take any of those four values.
#' @param timezone For `timestamp()`, an optional time zone string.
#' @param precision For `decimal()`, precision
#' @param scale For `decimal()`, scale
#' @param type For `list_of()`, a data type to make a list-of-type
#' @param ... For `struct()`, a named list of types to define the struct columns
#'
#' @name data-type
#' @return An Arrow type object inheriting from `arrow::DataType`.
#' @export
#' @seealso [dictionary()] for creating a dictionary (factor-like) type.
#' @examples
#' \donttest{
#' bool()
#' struct(a = int32(), b = double())
#' timestamp("ms", timezone = "CEST")
#' time64("ns")
#' }
int8 <- function() shared_ptr(`arrow::Int8`, Int8__initialize())

Expand Down Expand Up @@ -358,22 +366,80 @@ date64 <- function() shared_ptr(`arrow::Date64`, Date64__initialize())

#' @rdname data-type
#' @export
time32 <- function(unit) shared_ptr(`arrow::Time32`, Time32__initialize(unit))
time32 <- function(unit = c("ms", "s")) {
if (is.character(unit)) {
unit <- match.arg(unit)
}
unit <- make_valid_time_unit(unit, valid_time32_units)
shared_ptr(`arrow::Time32`, Time32__initialize(unit))
}

valid_time32_units <- c(
"ms" = TimeUnit$MILLI,
"s" = TimeUnit$SECOND
)

valid_time64_units <- c(
"ns" = TimeUnit$NANO,
"us" = TimeUnit$MICRO
)

make_valid_time_unit <- function(unit, valid_units) {
if (is.character(unit)) {
unit <- valid_units[match.arg(unit, choices = names(valid_units))]
}
if (is.numeric(unit)) {
# Allow non-integer input for convenience
unit <- as.integer(unit)
} else {
stop('"unit" should be one of ', oxford_paste(names(valid_units), "or"), call.=FALSE)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a non-Oxford paste? :-)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not for me ;)

(paste() is the vectorized R string concatenation function)

}
if (!(unit %in% valid_units)) {
stop('"unit" should be one of ', oxford_paste(valid_units, "or"), call.=FALSE)
}
unit
}

oxford_paste <- function(x, conjunction = "and") {
if (is.character(x)) {
x <- paste0('"', x, '"')
}
if (length(x) < 2) {
return(x)
}
x[length(x)] <- paste(conjunction, x[length(x)])
if (length(x) > 2) {
return(paste(x, collapse = ", "))
} else {
return(paste(x, collapse = " "))
}
}

#' @rdname data-type
#' @export
time64 <- function(unit) shared_ptr(`arrow::Time64`, Time64__initialize(unit))
time64 <- function(unit = c("ns", "us")) {
if (is.character(unit)) {
unit <- match.arg(unit)
}
unit <- make_valid_time_unit(unit, valid_time64_units)
shared_ptr(`arrow::Time64`, Time64__initialize(unit))
}

#' @rdname data-type
#' @export
null <- function() shared_ptr(`arrow::Null`, Null__initialize())

#' @rdname data-type
#' @export
timestamp <- function(unit, timezone) {
timestamp <- function(unit = c("s", "ms", "us", "ns"), timezone) {
if (is.character(unit)) {
unit <- match.arg(unit)
}
unit <- make_valid_time_unit(unit, c(valid_time64_units, valid_time32_units))
if (missing(timezone)) {
shared_ptr(`arrow::Timestamp`, Timestamp__initialize1(unit))
} else {
assert_that(is.character(timezone), length(timezone) == 1)
shared_ptr(`arrow::Timestamp`, Timestamp__initialize2(unit, timezone))
}
}
Expand Down
20 changes: 15 additions & 5 deletions r/man/data-type.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

38 changes: 38 additions & 0 deletions r/tests/testthat/test-data-type.R
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,10 @@ test_that("timestamp type works as expected", {
expect_equal(x$unit(), unclass(TimeUnit$NANO))
})

test_that("timestamp with timezone", {
expect_equal(timestamp(timezone = "EST")$ToString(), "timestamp[s, tz=EST]")
})

test_that("time32 types work as expected", {
x <- time32(TimeUnit$SECOND)
expect_equal(x$id, 19L)
Expand Down Expand Up @@ -285,6 +289,40 @@ test_that("time64 types work as expected", {
expect_equal(x$unit(), unclass(TimeUnit$NANO))
})

test_that("time type unit validation", {
expect_equal(time32(TimeUnit$SECOND), time32("s"))
expect_equal(time32(TimeUnit$MILLI), time32("ms"))
expect_equal(time32(), time32(TimeUnit$MILLI))
expect_error(time32(4), '"unit" should be one of 1 or 0')
expect_error(time32(NULL), '"unit" should be one of "ms" or "s"')
expect_error(time32("years"), "'arg' should be one of")

expect_equal(time64(TimeUnit$NANO), time64("n"))
expect_equal(time64(TimeUnit$MICRO), time64("us"))
expect_equal(time64(), time64(TimeUnit$NANO))
expect_error(time64(4), '"unit" should be one of 3 or 2')
expect_error(time64(NULL), '"unit" should be one of "ns" or "us"')
expect_error(time64("years"), "'arg' should be one of")
})

test_that("timestamp type input validation", {
expect_equal(timestamp("ms"), timestamp(TimeUnit$MILLI))
expect_equal(timestamp(), timestamp(TimeUnit$SECOND))
expect_error(
timestamp(NULL),
'"unit" should be one of "ns", "us", "ms", or "s"'
)
expect_error(
timestamp(timezone = 1231231),
"timezone is not a character vector"
)
expect_error(
timestamp(timezone = c("not", "a", "timezone")),
"length(timezone) not equal to 1",
fixed = TRUE
)
})

test_that("list type works as expected", {
x <- list_of(int32())
expect_equal(x$id, 23L)
Expand Down