-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Closed
Labels
bugSomething isn't workingSomething isn't workinghelp wantedExtra attention is neededExtra attention is needed
Description
Describe the bug
The following test works on Datafusion 33, but fails on 34:
#[tokio::test]
async fn struct_projection_regression() -> datafusion::error::Result<()> {
let ctx = SessionContext::new();
let schema = Arc::new(Schema::new(vec![
Field::new("a", DataType::Int64, false),
Field::new_struct(
"s",
vec![
Field::new("x", DataType::Int64, false),
Field::new("y", DataType::Int64, false),
],
false,
),
]));
struct TestProvider(SchemaRef);
#[async_trait]
impl TableProvider for TestProvider {
fn as_any(&self) -> &dyn std::any::Any {
self
}
fn schema(&self) -> SchemaRef {
self.0.clone()
}
fn table_type(&self) -> TableType {
TableType::Base
}
async fn scan(
&self,
_state: &SessionState,
projection: Option<&Vec<usize>>,
_filters: &[Expr],
_limit: Option<usize>,
) -> datafusion::error::Result<Arc<dyn ExecutionPlan>> {
dbg!(&projection);
assert!(projection.unwrap().len() == 1);
Ok(Arc::new(ValuesExec::try_new_from_batches(
self.schema().project(projection.unwrap())?.into(),
vec![RecordBatch::try_new(
self.schema(),
vec![
Arc::new(Int64Array::from(vec![1, 2, 3])),
Arc::new(StructArray::from(vec![
(
Arc::new(Field::new("x", DataType::Int64, false)),
Arc::new(Int64Array::from(vec![4, 5, 6])) as ArrayRef,
),
(
Arc::new(Field::new("y", DataType::Int64, false)),
Arc::new(Int64Array::from(vec![7, 8, 9])) as ArrayRef,
),
])),
],
)?
.project(projection.unwrap())?],
)?))
}
}
let df = ctx
.read_table(Arc::new(TestProvider(schema)))?
.select(vec![col("s").field("x")])?;
let df_results = df.explain(false, false)?.collect().await?;
assert_batches_eq!(
[
"+---------------+--------------------------------------------------+",
"| plan_type | plan |",
"+---------------+--------------------------------------------------+",
"| logical_plan | Projection: (?table?.s)[x] |",
"| | TableScan: ?table? projection=[s] |",
"| physical_plan | ProjectionExec: expr=[(s@0).[x] as ?table?.s[x]] |",
"| | ValuesExec |",
"| | |",
"+---------------+--------------------------------------------------+",
],
&df_results
);
Ok(())
}Datafusion 34 fails this line assert!(projection.unwrap().len() == 1); because projection contains [1, 2].
If I only select col("s") without accessing the field x, the projection correctly only contains [1].
To Reproduce
A testcase is provided above.
Expected behavior
I would expect the projection passed to TableProvider to only contain [1] as that is the only field needed in the query.
Additional context
No response
Metadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't workinghelp wantedExtra attention is neededExtra attention is needed