Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions arrow.go
Original file line number Diff line number Diff line change
Expand Up @@ -301,3 +301,10 @@
defer r.mu.Unlock()
return r.err
}

// MoveArrowToDataChunk moves a record batch into an existing DuckDB DataChunk.
// Useful for implementing table functions that read from Arrow sources.
func (a *Arrow) MoveArrowToDataChunk(rec arrow.RecordBatch, chunk DataChunk) error {
ed := arrowmapping.MoveArrowToDataChunk(a.conn.conn, rec, chunk.chunk)

Check failure on line 308 in arrow.go

View workflow job for this annotation

GitHub Actions / Test Arrow (ubuntu-latest, 1.24)

undefined: arrowmapping.MoveArrowToDataChunk

Check failure on line 308 in arrow.go

View workflow job for this annotation

GitHub Actions / Test Arrow (macos-latest, 1.24)

undefined: arrowmapping.MoveArrowToDataChunk

Check failure on line 308 in arrow.go

View workflow job for this annotation

GitHub Actions / Test Arrow (windows-latest, 1.24)

undefined: arrowmapping.MoveArrowToDataChunk
return errorDataError(ed)
}
93 changes: 93 additions & 0 deletions arrow_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"context"
"database/sql"
"database/sql/driver"
"fmt"
"sync"
"testing"

Expand Down Expand Up @@ -315,3 +316,95 @@ func TestArrowClosedConn(t *testing.T) {
})
require.Error(t, err)
}

func TestArrowTableUDF(t *testing.T) {
db := openDbWrapper(t, ``)
defer closeDbWrapper(t, db)

conn := openConnWrapper(t, db, context.Background())
defer closeConnWrapper(t, conn)

c := newConnectorWrapper(t, ``, nil)
defer closeConnectorWrapper(t, c)

innerConn := openDriverConnWrapper(t, c)
defer closeDriverConnWrapper(t, &innerConn)

ar, err := NewArrowFromConn(innerConn)
require.NoError(t, err)

// Create an arrow array of type Float64 buffered in memory
schema := arrow.NewSchema([]arrow.Field{
{Name: "col0", Type: arrow.PrimitiveTypes.Float64},
}, nil)
alloc := memory.NewGoAllocator()
builder := array.NewFloat64Builder(alloc)
defer builder.Release()

// Add values > data chunk size to test multiple chunks
for range 10000 {
builder.Append(float64(0.5))
}

arr := builder.NewArray()
rb := array.NewRecordBatch(schema, []arrow.Array{arr}, int64(arr.Len()))
tbl := array.NewTableFromRecords(schema, []arrow.RecordBatch{rb})

RegisterTableUDF(conn, "get_arrow", ChunkTableFunction{
BindArguments: func(named map[string]any, args ...any) (ChunkTableSource, error) {
return &arrowTableUdf{tbl: tbl, ar: ar}, nil
},
})

res, err := db.QueryContext(context.Background(), `SELECT * FROM get_arrow()`)
require.NoError(t, err)
defer closeRowsWrapper(t, res)

var rowCount int
for res.Next() {
var val float64
require.NoError(t, res.Scan(&val))
require.Equal(t, 0.5, val)
rowCount++
}
require.Equal(t, 10000, rowCount)
}

// Define a table UDF
type arrowTableUdf struct {
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It could make sense to expose a utility UDF struct for this, such as NewArrowTableUDF(duckdb.Arrow, arrow.Table). Then library users would only need to write the BindArguments function to construct the arrow.Table from their existing dataset.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it would be nice to have a separate registration method related to the Arrow and separate UDF type for the arrow data, Could you please wrap the existing chunked table UDF or create a new one?

ar *Arrow
tbl arrow.Table
rdr *array.TableReader
}

func (u *arrowTableUdf) Init() {
u.rdr = array.NewTableReader(u.tbl, int64(GetDataChunkCapacity()))
}

func (u *arrowTableUdf) ColumnInfos() []ColumnInfo {
t, _ := NewTypeInfo(TYPE_DOUBLE)
return []ColumnInfo{{
Name: "col0",
T: t,
}}
}

func (u *arrowTableUdf) Cardinality() *CardinalityInfo {
return &CardinalityInfo{
Cardinality: uint(u.tbl.NumRows()),
Exact: true,
}
}

func (u *arrowTableUdf) FillChunk(chunk DataChunk) error {
if u.rdr.Next() {
b := u.rdr.RecordBatch()
defer b.Release()
if err := u.ar.MoveArrowToDataChunk(b, chunk); err != nil {
return fmt.Errorf("failed to move arrow to data chunk: %w", err)
}
chunk.SetSize(int(b.NumRows()))
}

return nil
}
3 changes: 2 additions & 1 deletion arrowmapping/arrow_mapping.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,6 @@ var (
NewArrowSchema = bindings.NewArrowSchema
DataChunkToArrowArray = bindings.DataChunkToArrowArray
SchemaFromArrow = bindings.SchemaFromArrow
DataChunkFromArrow = bindings.DataChunkFromArrow
DataChunkFromArrow = bindings.NewDataChunkFromArrow
MoveArrowToDataChunk = bindings.MoveArrowToDataChunk
)
3 changes: 2 additions & 1 deletion arrowmapping/arrow_mapping_darwin_amd64.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,6 @@ var (
NewArrowSchema = bindings.NewArrowSchema
DataChunkToArrowArray = bindings.DataChunkToArrowArray
SchemaFromArrow = bindings.SchemaFromArrow
DataChunkFromArrow = bindings.DataChunkFromArrow
DataChunkFromArrow = bindings.NewDataChunkFromArrow
MoveArrowToDataChunk = bindings.MoveArrowToDataChunk
)
3 changes: 2 additions & 1 deletion arrowmapping/arrow_mapping_darwin_arm64.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,6 @@ var (
NewArrowSchema = bindings.NewArrowSchema
DataChunkToArrowArray = bindings.DataChunkToArrowArray
SchemaFromArrow = bindings.SchemaFromArrow
DataChunkFromArrow = bindings.DataChunkFromArrow
DataChunkFromArrow = bindings.NewDataChunkFromArrow
MoveArrowToDataChunk = bindings.MoveArrowToDataChunk
)
3 changes: 2 additions & 1 deletion arrowmapping/arrow_mapping_linux_amd64.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,6 @@ var (
NewArrowSchema = bindings.NewArrowSchema
DataChunkToArrowArray = bindings.DataChunkToArrowArray
SchemaFromArrow = bindings.SchemaFromArrow
DataChunkFromArrow = bindings.DataChunkFromArrow
DataChunkFromArrow = bindings.NewDataChunkFromArrow
MoveArrowToDataChunk = bindings.MoveArrowToDataChunk
)
3 changes: 2 additions & 1 deletion arrowmapping/arrow_mapping_linux_arm64.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,6 @@ var (
NewArrowSchema = bindings.NewArrowSchema
DataChunkToArrowArray = bindings.DataChunkToArrowArray
SchemaFromArrow = bindings.SchemaFromArrow
DataChunkFromArrow = bindings.DataChunkFromArrow
DataChunkFromArrow = bindings.NewDataChunkFromArrow
MoveArrowToDataChunk = bindings.MoveArrowToDataChunk
)
3 changes: 2 additions & 1 deletion arrowmapping/arrow_mapping_windows_amd64.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,6 @@ var (
NewArrowSchema = bindings.NewArrowSchema
DataChunkToArrowArray = bindings.DataChunkToArrowArray
SchemaFromArrow = bindings.SchemaFromArrow
DataChunkFromArrow = bindings.DataChunkFromArrow
DataChunkFromArrow = bindings.NewDataChunkFromArrow
MoveArrowToDataChunk = bindings.MoveArrowToDataChunk
)
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,5 @@ require (
golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)

replace github.com/duckdb/duckdb-go-bindings => github.com/wmTJc9IK0Q/duckdb-go-bindings v0.0.0-20251201050337-6b010671d08c
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@ github.com/apache/thrift v0.22.0 h1:r7mTJdj51TMDe6RtcmNdQxgn9XcyfGDOzegMDRg47uc=
github.com/apache/thrift v0.22.0/go.mod h1:1e7J/O1Ae6ZQMTYdy9xa3w9k+XHWPfRvdPyJeynQ+/g=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/duckdb/duckdb-go-bindings v0.1.23 h1:sJRXraxfC/gdHI2T7oHqrdp1VdKemrgqWGQ8986mH1c=
github.com/duckdb/duckdb-go-bindings v0.1.23/go.mod h1:WA7U/o+b37MK2kiOPPueVZ+FIxt5AZFCjszi8hHeH18=
github.com/duckdb/duckdb-go-bindings/darwin-amd64 v0.1.23 h1:Xyw1fWu4jzOtv2Hqkaehr7f+qbIWNRfBMbZyD+g8dyU=
github.com/duckdb/duckdb-go-bindings/darwin-amd64 v0.1.23/go.mod h1:jfbOHwGZqNCpMAxV4g4g5jmWr0gKdMvh2fGusPubxC4=
github.com/duckdb/duckdb-go-bindings/darwin-arm64 v0.1.23 h1:85Xomx5NxZ+Nt+VepUJzuMYbBTH+nB6JlBXIyJuTovA=
Expand Down Expand Up @@ -50,6 +48,8 @@ github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRI
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
github.com/wmTJc9IK0Q/duckdb-go-bindings v0.0.0-20251201050337-6b010671d08c h1:DUXYLuHDhiy+pcrgFxQHZo/SxLG3Gun0xAJOnlq1HnA=
github.com/wmTJc9IK0Q/duckdb-go-bindings v0.0.0-20251201050337-6b010671d08c/go.mod h1:WA7U/o+b37MK2kiOPPueVZ+FIxt5AZFCjszi8hHeH18=
github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ=
github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0=
github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0=
Expand Down
Loading