Skip to content

Commit d122f15

Browse files
authored
Merge pull request #13414 from rouault/fix_13401
ogr2ogr/VectorTranslate: fix (and improve) selectFields support in Arrow code path
2 parents 827bd66 + e8eb6b9 commit d122f15

File tree

2 files changed

+104
-59
lines changed

2 files changed

+104
-59
lines changed

apps/ogr2ogr_lib.cpp

Lines changed: 67 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -520,12 +520,6 @@ struct AssociatedLayers
520520

521521
class SetupTargetLayer
522522
{
523-
bool CanUseWriteArrowBatch(OGRLayer *poSrcLayer, OGRLayer *poDstLayer,
524-
bool bJustCreatedLayer,
525-
const GDALVectorTranslateOptions *psOptions,
526-
bool bPreserveFID, bool &bError,
527-
OGRArrowArrayStream &streamSrc);
528-
529523
public:
530524
GDALDataset *m_poSrcDS = nullptr;
531525
GDALDataset *m_poDstDS = nullptr;
@@ -565,6 +559,15 @@ class SetupTargetLayer
565559
std::unique_ptr<TargetLayerInfo>
566560
Setup(OGRLayer *poSrcLayer, const char *pszNewLayerName,
567561
GDALVectorTranslateOptions *psOptions, GIntBig &nTotalEventsDone);
562+
563+
private:
564+
bool CanUseWriteArrowBatch(OGRLayer *poSrcLayer, OGRLayer *poDstLayer,
565+
bool bJustCreatedLayer,
566+
const GDALVectorTranslateOptions *psOptions,
567+
bool bPreserveFID, bool &bError,
568+
OGRArrowArrayStream &streamSrc);
569+
570+
void SetIgnoredFields(OGRLayer *poSrcLayer);
568571
};
569572

570573
class LayerTranslator
@@ -4253,8 +4256,8 @@ bool SetupTargetLayer::CanUseWriteArrowBatch(
42534256
!psOptions->bUpsert && !psOptions->bSkipFailures &&
42544257
!psOptions->poClipSrc && !psOptions->poClipDst &&
42554258
psOptions->asGCPs.empty() && !psOptions->bWrapDateline &&
4256-
!m_papszSelFields && !m_bAddMissingFields &&
4257-
m_eGType == GEOMTYPE_UNCHANGED && psOptions->eGeomOp == GEOMOP_NONE &&
4259+
!m_bAddMissingFields && m_eGType == GEOMTYPE_UNCHANGED &&
4260+
psOptions->eGeomOp == GEOMOP_NONE &&
42584261
m_eGeomTypeConversion == GTC_DEFAULT && m_nCoordDim < 0 &&
42594262
!m_papszFieldTypesToString && !m_papszMapFieldType &&
42604263
!m_bUnsetFieldWidth && !m_bExplodeCollections && !m_pszZField &&
@@ -4282,6 +4285,11 @@ bool SetupTargetLayer::CanUseWriteArrowBatch(
42824285
}
42834286
}
42844287

4288+
if (m_bSelFieldsSet)
4289+
{
4290+
SetIgnoredFields(poSrcLayer);
4291+
}
4292+
42854293
const CPLStringList aosGetArrowStreamOptions(BuildGetArrowStreamOptions(
42864294
poSrcLayer, poDstLayer, psOptions, bPreserveFID));
42874295
if (poSrcLayer->GetArrowStream(streamSrc.get(),
@@ -4428,6 +4436,55 @@ bool SetupTargetLayer::CanUseWriteArrowBatch(
44284436
return bUseWriteArrowBatch;
44294437
}
44304438

4439+
/************************************************************************/
4440+
/* SetupTargetLayer::SetIgnoredFields() */
4441+
/************************************************************************/
4442+
4443+
void SetupTargetLayer::SetIgnoredFields(OGRLayer *poSrcLayer)
4444+
{
4445+
bool bUseIgnoredFields = true;
4446+
CPLStringList aosWHEREUsedFields;
4447+
const auto poSrcFDefn = poSrcLayer->GetLayerDefn();
4448+
4449+
if (m_pszWHERE)
4450+
{
4451+
/* We must not ignore fields used in the -where expression
4452+
* (#4015) */
4453+
OGRFeatureQuery oFeatureQuery;
4454+
if (oFeatureQuery.Compile(poSrcFDefn, m_pszWHERE, FALSE, nullptr) ==
4455+
OGRERR_NONE)
4456+
{
4457+
aosWHEREUsedFields = oFeatureQuery.GetUsedFields();
4458+
}
4459+
else
4460+
{
4461+
bUseIgnoredFields = false;
4462+
}
4463+
}
4464+
4465+
CPLStringList aosIgnoredFields;
4466+
for (int iSrcField = 0;
4467+
bUseIgnoredFields && iSrcField < poSrcFDefn->GetFieldCount();
4468+
iSrcField++)
4469+
{
4470+
const char *pszFieldName =
4471+
poSrcFDefn->GetFieldDefn(iSrcField)->GetNameRef();
4472+
bool bFieldRequested =
4473+
CSLFindString(m_papszSelFields, pszFieldName) >= 0;
4474+
bFieldRequested |= aosWHEREUsedFields.FindString(pszFieldName) >= 0;
4475+
bFieldRequested |=
4476+
(m_pszZField != nullptr && EQUAL(pszFieldName, m_pszZField));
4477+
4478+
// If the source field is not requested, add it to the list of ignored
4479+
// fields.
4480+
if (!bFieldRequested)
4481+
aosIgnoredFields.push_back(pszFieldName);
4482+
}
4483+
if (bUseIgnoredFields)
4484+
poSrcLayer->SetIgnoredFields(
4485+
const_cast<const char **>(aosIgnoredFields.List()));
4486+
}
4487+
44314488
/************************************************************************/
44324489
/* SetupTargetLayer::Setup() */
44334490
/************************************************************************/
@@ -5301,58 +5358,9 @@ SetupTargetLayer::Setup(OGRLayer *poSrcLayer, const char *pszNewLayerName,
53015358
/* Use SetIgnoredFields() on source layer if available */
53025359
/* --------------------------------------------------------------------
53035360
*/
5304-
if (poSrcLayer->TestCapability(OLCIgnoreFields))
5361+
if (m_bSelFieldsSet && poSrcLayer->TestCapability(OLCIgnoreFields))
53055362
{
5306-
bool bUseIgnoredFields = true;
5307-
CPLStringList aosWHEREUsedFields;
5308-
5309-
if (m_pszWHERE)
5310-
{
5311-
/* We must not ignore fields used in the -where expression
5312-
* (#4015) */
5313-
OGRFeatureQuery oFeatureQuery;
5314-
if (oFeatureQuery.Compile(poSrcLayer->GetLayerDefn(),
5315-
m_pszWHERE, FALSE,
5316-
nullptr) == OGRERR_NONE)
5317-
{
5318-
aosWHEREUsedFields = oFeatureQuery.GetUsedFields();
5319-
}
5320-
else
5321-
{
5322-
bUseIgnoredFields = false;
5323-
}
5324-
}
5325-
5326-
CPLStringList aosIgnoredFields;
5327-
for (int iSrcField = 0;
5328-
bUseIgnoredFields && iSrcField < poSrcFDefn->GetFieldCount();
5329-
iSrcField++)
5330-
{
5331-
const char *pszFieldName =
5332-
poSrcFDefn->GetFieldDefn(iSrcField)->GetNameRef();
5333-
bool bFieldRequested = false;
5334-
for (int iField = 0;
5335-
m_papszSelFields && m_papszSelFields[iField]; iField++)
5336-
{
5337-
if (EQUAL(pszFieldName, m_papszSelFields[iField]))
5338-
{
5339-
bFieldRequested = true;
5340-
break;
5341-
}
5342-
}
5343-
bFieldRequested |=
5344-
aosWHEREUsedFields.FindString(pszFieldName) >= 0;
5345-
bFieldRequested |= (m_pszZField != nullptr &&
5346-
EQUAL(pszFieldName, m_pszZField));
5347-
5348-
/* If source field not requested, add it to ignored files list
5349-
*/
5350-
if (!bFieldRequested)
5351-
aosIgnoredFields.push_back(pszFieldName);
5352-
}
5353-
if (bUseIgnoredFields)
5354-
poSrcLayer->SetIgnoredFields(
5355-
const_cast<const char **>(aosIgnoredFields.List()));
5363+
SetIgnoredFields(poSrcLayer);
53565364
}
53575365
}
53585366
else if (!bAppend || m_bAddMissingFields)

autotest/utilities/test_ogr2ogr_lib.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,25 @@ def test_ogr2ogr_lib_6():
166166
assert feat.GetFieldAsString("PRFEDEA") == "35043411"
167167

168168

169+
###############################################################################
170+
# Test selectFields with arrow optimization
171+
172+
173+
@pytest.mark.require_driver("GPKG")
174+
def test_ogr2ogr_lib_selectFields_gpkg(tmp_vsimem):
175+
176+
srcDS = gdal.VectorTranslate(tmp_vsimem / "in.gpkg", "../ogr/data/poly.shp")
177+
gdal.VectorTranslate(
178+
tmp_vsimem / "out.gpkg", srcDS, selectFields=["eas_id", "prfedea"]
179+
)
180+
ds = ogr.Open(tmp_vsimem / "out.gpkg")
181+
lyr = ds.GetLayer(0)
182+
assert lyr.GetLayerDefn().GetFieldCount() == 2
183+
feat = lyr.GetNextFeature()
184+
assert feat.GetFieldAsDouble("EAS_ID") == 168
185+
assert feat.GetFieldAsString("PRFEDEA") == "35043411"
186+
187+
169188
###############################################################################
170189
# Test selectFields to []
171190

@@ -176,6 +195,24 @@ def test_ogr2ogr_lib_sel_fields_empty():
176195
ds = gdal.VectorTranslate("", srcDS, format="MEM", selectFields=[])
177196
lyr = ds.GetLayer(0)
178197
assert lyr.GetLayerDefn().GetFieldCount() == 0
198+
feat = lyr.GetNextFeature()
199+
assert feat.GetGeometryRef() is not None
200+
201+
202+
###############################################################################
203+
# Test selectFields to []
204+
205+
206+
@pytest.mark.require_driver("GPKG")
207+
def test_ogr2ogr_lib_sel_fields_empty_with_arow_optimization(tmp_vsimem):
208+
209+
srcDS = gdal.VectorTranslate(tmp_vsimem / "in.gpkg", "../ogr/data/poly.shp")
210+
gdal.VectorTranslate(tmp_vsimem / "out.gpkg", srcDS, selectFields=[])
211+
ds = ogr.Open(tmp_vsimem / "out.gpkg")
212+
lyr = ds.GetLayer(0)
213+
assert lyr.GetLayerDefn().GetFieldCount() == 0
214+
feat = lyr.GetNextFeature()
215+
assert feat.GetGeometryRef() is not None
179216

180217

181218
###############################################################################

0 commit comments

Comments
 (0)