Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion r/R/array.R
Original file line number Diff line number Diff line change
Expand Up @@ -472,9 +472,13 @@ names.StructArray <- function(x, ...) StructType__field_names(x$type)
#' @export
dim.StructArray <- function(x, ...) c(length(x), x$type$num_fields)

as_df.StructArray <- function(x, ...) {
as.vector(x)
}

#' @export
as.data.frame.StructArray <- function(x, row.names = NULL, optional = FALSE, ...) {
as.vector(x)
as.data.frame(as_df(x), row.names = row.names, optional = optional, ...)
}

#' @rdname array
Expand Down
8 changes: 8 additions & 0 deletions r/R/arrow-tabular.R
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,10 @@ ArrowTabular <- R6Class("ArrowTabular",

#' @export
as.data.frame.ArrowTabular <- function(x, row.names = NULL, optional = FALSE, ...) {
as.data.frame(as_df(x), row.names = row.names, optional = optional, ...)
}

as_df.ArrowTabular <- function(x, ...) {
df <- x$to_data_frame()
apply_arrow_r_metadata(df, x$metadata$r)
}
Expand Down Expand Up @@ -259,3 +263,7 @@ na.omit.ArrowTabular <- function(object, ...) {

#' @export
na.exclude.ArrowTabular <- na.omit.ArrowTabular

as_df <- function(x) {
UseMethod("as_df")
}
2 changes: 1 addition & 1 deletion r/R/csv.R
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ read_delim_arrow <- function(file,
}

if (isTRUE(as_data_frame)) {
tab <- as.data.frame(tab)
tab <- as_df(tab)
}

tab
Expand Down
6 changes: 5 additions & 1 deletion r/R/dataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -522,9 +522,13 @@ dim.Dataset <- function(x) c(x$num_rows, x$num_cols)
#' @export
c.Dataset <- function(...) Dataset$create(list(...))

as_df.Dataset <- function(x, ...) {
collect.Dataset(x)
}

#' @export
as.data.frame.Dataset <- function(x, row.names = NULL, optional = FALSE, ...) {
collect.Dataset(x)
as.data.frame(as_df(x), row.names = row.names, optional = optional, ...)
}

#' @export
Expand Down
3 changes: 2 additions & 1 deletion r/R/dplyr-collect.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,10 @@ collect.arrow_dplyr_query <- function(x, as_data_frame = TRUE, ...) {
out <- compute.arrow_dplyr_query(x)
collect.ArrowTabular(out, as_data_frame)
}

collect.ArrowTabular <- function(x, as_data_frame = TRUE, ...) {
if (as_data_frame) {
as.data.frame(x, ...)
as_df(x, ...)
} else {
x
}
Expand Down
2 changes: 1 addition & 1 deletion r/R/dplyr-glimpse.R
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ glimpse.ArrowTabular <- function(x,
var_headings <- paste("$", center_pad(tickify(names(x)), var_types))

# Assemble the data glimpse
df <- as.data.frame(head_tab)
df <- as_df(head_tab)
formatted_data <- map_chr(df, function(.) {
tryCatch(
paste(pillar::format_glimpse(.), collapse = ", "),
Expand Down
4 changes: 4 additions & 0 deletions r/R/dplyr.R
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,10 @@ unique.RecordBatchReader <- unique.arrow_dplyr_query

#' @export
as.data.frame.arrow_dplyr_query <- function(x, row.names = NULL, optional = FALSE, ...) {
as.data.frame(as_df(x), row.names = row.names, optional = optional, ...)
}

as_df.arrow_dplyr_query <- function(x, ...) {
collect.arrow_dplyr_query(x, as_data_frame = TRUE, ...)
}

Expand Down
2 changes: 1 addition & 1 deletion r/R/feather.R
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ read_feather <- function(file, col_select = NULL, as_data_frame = TRUE, mmap = T
)

if (isTRUE(as_data_frame)) {
out <- as.data.frame(out)
out <- as_df(out)
}
out
}
Expand Down
2 changes: 1 addition & 1 deletion r/R/ipc-stream.R
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ read_ipc_stream <- function(file, as_data_frame = TRUE, ...) {
# https://issues.apache.org/jira/browse/ARROW-6830
out <- RecordBatchStreamReader$create(file)$read_table()
if (as_data_frame) {
out <- as.data.frame(out)
out <- as_df(out)
}
out
}
2 changes: 1 addition & 1 deletion r/R/json.R
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ read_json_arrow <- function(file,
}

if (isTRUE(as_data_frame)) {
tab <- as.data.frame(tab)
tab <- as_df(tab)
}
tab
}
Expand Down
2 changes: 1 addition & 1 deletion r/R/parquet.R
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ read_parquet <- function(file,
}

if (as_data_frame) {
tab <- as.data.frame(tab)
tab <- as_df(tab)
}
tab
}
Expand Down
6 changes: 5 additions & 1 deletion r/R/record-batch-reader.R
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,11 @@ dim.RecordBatchReader <- function(x) c(NA_integer_, length(x$schema))

#' @export
as.data.frame.RecordBatchReader <- function(x, row.names = NULL, optional = FALSE, ...) {
as.data.frame(x$read_table(), row.names = row.names, optional = optional, ...)
as.data.frame(as_df(x), row.names = row.names, optional = optional, ...)
}

as_df.RecordBatchReader <- function(x, ...){
x$read_table()
}

#' @export
Expand Down
2 changes: 1 addition & 1 deletion r/R/schema.R
Original file line number Diff line number Diff line change
Expand Up @@ -386,5 +386,5 @@ as_schema.StructType <- function(x, ...) {

#' @export
as.data.frame.Schema <- function(x, row.names = NULL, optional = FALSE, ...) {
as.data.frame(Table__from_schema(x))
as.data.frame(Table__from_schema(x), row.names = row.names, optional = optional, ...)
}
3 changes: 2 additions & 1 deletion r/src/table.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,8 @@ arrow::Status AddMetadataFromDots(SEXP lst, int num_fields,
// "top level" attributes, only relevant if the first object is not named and a data
// frame
cpp11::strings names = Rf_getAttrib(lst, R_NamesSymbol);
if (names[0] == "" && Rf_inherits(VECTOR_ELT(lst, 0), "data.frame")) {
if (names[0] == "" && Rf_inherits(VECTOR_ELT(lst, 0), "data.frame") &&
Rf_xlength(lst) == 1) {
SEXP top_level = metadata[0] = arrow_attributes(VECTOR_ELT(lst, 0), true);
if (!Rf_isNull(top_level) && XLENGTH(top_level) > 0) {
has_top_level_metadata = true;
Expand Down
16 changes: 8 additions & 8 deletions r/tests/testthat/test-RecordBatch.R
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,7 @@ test_that("record_batch() handles data frame columns", {
b = struct(x = int32(), y = int32())
)
)
out <- as.data.frame(batch)
out <- as_tibble(batch)
expect_equal(out, tibble::tibble(a = 1:10, b = tib))

# if not named, columns from tib are auto spliced
Expand All @@ -355,7 +355,7 @@ test_that("record_batch() handles data frame columns", {
batch2$schema,
schema(a = int32(), x = int32(), y = int32())
)
out <- as.data.frame(batch2)
out <- as_tibble(batch2)
expect_equal(out, tibble::tibble(a = 1:10, !!!tib))
})

Expand All @@ -366,7 +366,7 @@ test_that("record_batch() handles data frame columns with schema spec", {
schema <- schema(a = int32(), b = struct(x = int16(), y = float64()))
batch <- record_batch(a = 1:10, b = tib, schema = schema)
expect_equal(batch$schema, schema)
out <- as.data.frame(batch)
out <- as_tibble(batch)
expect_equal(out, tibble::tibble(a = 1:10, b = tib_float))

schema <- schema(a = int32(), b = struct(x = int16(), y = utf8()))
Expand All @@ -386,7 +386,7 @@ test_that("record_batch() auto splices (ARROW-5718)", {
expect_equal(batch3, batch4)
expect_equal(batch3$schema, schema(x = int32(), y = utf8(), z = int32()))
expect_equal(
as.data.frame(batch3),
as_tibble(batch3),
tibble::as_tibble(cbind(df, data.frame(z = 1:10)))
)

Expand All @@ -395,15 +395,15 @@ test_that("record_batch() auto splices (ARROW-5718)", {
batch6 <- record_batch(!!!df, schema = s)
expect_equal(batch5, batch6)
expect_equal(batch5$schema, s)
expect_equal(as.data.frame(batch5), df)
expect_equal(as_tibble(batch5), df)

s2 <- schema(x = float64(), y = utf8(), z = int16())
batch7 <- record_batch(df, z = 1:10, schema = s2)
batch8 <- record_batch(!!!df, z = 1:10, schema = s2)
expect_equal(batch7, batch8)
expect_equal(batch7$schema, s2)
expect_equal(
as.data.frame(batch7),
as_tibble(batch7),
tibble::as_tibble(cbind(df, data.frame(z = 1:10)))
)
})
Expand Down Expand Up @@ -627,7 +627,7 @@ test_that("Handling string data with embedded nuls", {
# altrep. Without it (i.e. 3.5.0 and below, the error would trigger immediately
# on `as.vector()` where as with it, the error only happens on materialization)
skip_on_r_older_than("3.6")
df <- as.data.frame(batch_with_nul)
df <- as_tibble(batch_with_nul)

expect_error(
df$b[],
Expand All @@ -648,7 +648,7 @@ test_that("Handling string data with embedded nuls", {
suppressWarnings(
expect_warning(
expect_equal(
as.data.frame(batch_with_nul)$b,
as_tibble(batch_with_nul)$b,
c("person", "woman", "man", "camera", "tv"),
ignore_attr = TRUE
),
Expand Down
31 changes: 26 additions & 5 deletions r/tests/testthat/test-Table.R
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ test_that("table() handles ... of arrays, chunked arrays, vectors", {
tab$schema,
schema(a = int32(), b = int32(), c = float64(), x = int32(), y = utf8())
)
res <- as.data.frame(tab)
res <- as_tibble(tab)
expect_equal(names(res), c("a", "b", "c", "x", "y"))
expect_equal(
res,
Expand All @@ -280,14 +280,14 @@ test_that("table() auto splices (ARROW-5718)", {
tab2 <- Table$create(!!!df)
expect_equal(tab1, tab2)
expect_equal(tab1$schema, schema(x = int32(), y = utf8()))
expect_equal(as.data.frame(tab1), df)
expect_equal(as_tibble(tab1), df)

s <- schema(x = float64(), y = utf8())
tab3 <- Table$create(df, schema = s)
tab4 <- Table$create(!!!df, schema = s)
expect_equal(tab3, tab4)
expect_equal(tab3$schema, s)
expect_equal(as.data.frame(tab3), df)
expect_equal(as_tibble(tab3), df)
})

test_that("Validation when creating table with schema (ARROW-10953)", {
Expand Down Expand Up @@ -366,7 +366,7 @@ test_that("Can create table with specific dictionary types", {
expect_equal(sch, tab$schema)
if (i != int64()) {
# TODO: same downcast to int32 as we do for int64() type elsewhere
expect_identical(as.data.frame(tab), fact)
expect_identical(as_tibble(tab), fact)
}
}
})
Expand All @@ -380,7 +380,7 @@ test_that("Table unifies dictionary on conversion back to R (ARROW-8374)", {
res <- tibble::tibble(f = factor(c("a", "c", NA), levels = c("a", "b", "c", "d")))
tab <- Table$create(b1, b2, b3, b4)

expect_identical(as.data.frame(tab), res)
expect_identical(as_tibble(tab), res)
})

test_that("Table$SelectColumns()", {
Expand Down Expand Up @@ -711,3 +711,24 @@ test_that("as_arrow_table() errors on data.frame with NULL names", {
names(df) <- NULL
expect_error(as_arrow_table(df), "Input data frame columns must be named")
})

test_that("as.data.frame() on an ArrowTabular object returns a vanilla data.frame and not a tibble", {
df <- data.frame(x = 1)
out1 <- as.data.frame(arrow::arrow_table(df, name = "1"))
out2 <- as.data.frame(arrow::arrow_table(name = "1", df))
out3 <- as.data.frame(arrow::arrow_table(df))

expect_s3_class(out1, "data.frame", exact = TRUE)
expect_s3_class(out2, "data.frame", exact = TRUE)
expect_s3_class(out3, "data.frame", exact = TRUE)
})

test_that("as_tibble.ArrowTabular retains groups", {
# calling as_tibble.default on ArrowTabular objects results in any grouping being dropped, which is why
# we need as_tibble.ArrowTabular
df <- data.frame(x = 1:4, y = c("a", "b"))
df_grouped <- dplyr::group_by(df, y)
arrow_grouped <- arrow_table(df_grouped)
expect_data_frame(arrow_grouped, df_grouped)

})
2 changes: 1 addition & 1 deletion r/tests/testthat/test-compute-aggregate.R
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,7 @@ test_that("is_in", {

test_that("value_counts", {
a <- Array$create(c(1, 4, 3, 1, 1, 3, 4))
result_df <- tibble::tibble(
result_df <- data.frame(
values = c(1, 4, 3),
counts = c(3L, 2L, 2L)
)
Expand Down
4 changes: 2 additions & 2 deletions r/tests/testthat/test-compute-sort.R
Original file line number Diff line number Diff line change
Expand Up @@ -141,15 +141,15 @@ test_that("Table$SortIndices()", {
sort(tbl$chr, na.last = TRUE)
)
expect_identical(
as.data.frame(x$Take(x$SortIndices(c("int", "dbl"), c(FALSE, FALSE)))),
as_tibble(x$Take(x$SortIndices(c("int", "dbl"), c(FALSE, FALSE)))),
tbl %>% arrange(int, dbl)
)
})

test_that("RecordBatch$SortIndices()", {
x <- record_batch(tbl)
expect_identical(
as.data.frame(x$Take(x$SortIndices(c("chr", "int", "dbl"), TRUE))),
as_tibble(x$Take(x$SortIndices(c("chr", "int", "dbl"), TRUE))),
tbl %>% arrange(desc(chr), desc(int), desc(dbl))
)
})
2 changes: 1 addition & 1 deletion r/tests/testthat/test-dataset-csv.R
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ test_that("CSV scan options", {
sb$FragmentScanOptions(options)

tab <- sb$Finish()$ToTable()
expect_equal(as.data.frame(tab), tibble(chr = c("foo", NA)))
expect_equal(as_tibble(tab), tibble(chr = c("foo", NA)))

# Set default convert options in CsvFileFormat
csv_format <- CsvFileFormat$create(
Expand Down
Loading