unionai-oss · ShootingStarD · May 1, 2025 · May 2, 2025 · May 6, 2025 · May 6, 2025
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -72,4 +72,4 @@ repos:
     hooks:
       - id: codespell
         additional_dependencies:
-          - tomli
+          - tomli
diff --git a/pandera/io/pandas_io.py b/pandera/io/pandas_io.py
@@ -6,7 +6,7 @@
 from functools import partial
 from pathlib import Path
 from typing import Dict, Optional, Union
-
+from frictionless.fields import AnyField
 import pandas as pd
 
 import pandera.errors
@@ -642,11 +642,13 @@
         duplicates, no missing values etc.
     """
 
-    def __init__(self, field, primary_keys) -> None:
+    def __init__(self, field: AnyField, primary_keys: list[str]) -> None:
         self.constraints = field.constraints or {}
         self.primary_keys = primary_keys
+        self.description = field.description
+        self.title = field.title
         self.name = field.name
-        self.type = field.get("type", "string")
+        self.type = field.to_dict().get("type", "string")
 
     @property
     def dtype(self) -> str:
@@ -791,6 +793,8 @@
             "required": self.required,
             "name": self.name,
             "regex": self.regex,
+            "description": self.description,
+            "title": self.title,
         }
 
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -8,12 +8,10 @@ version_file = "pandera/_version.py"
 [project]
 name = "pandera"
 dynamic = ["version"]
-authors = [
-    {name = "Niels Bantilan", email = "[email protected]"},
-]
+authors = [{ name = "Niels Bantilan", email = "[email protected]" }]
 description = "A light-weight and flexible data validation and testing tool for statistical data objects."
 readme = "README.md"
-license = {file = "LICENSE.txt"}
+license = { file = "LICENSE.txt" }
 requires-python = ">=3.9"
 keywords = ["pandas", "validation", "data-structures"]
 classifiers = [
@@ -43,55 +41,26 @@ Documentation = "https://pandera.readthedocs.io"
 Homepage = "https://github.com/pandera-dev/pandera"
 
 [project.optional-dependencies]
-pandas = [
-    "numpy >= 1.24.4",
-    "pandas >= 2.1.1",
-]
-strategies = [
-    "hypothesis >= 6.92.7",
-]
-hypotheses = [
-    "scipy",
-]
-io = [
-    "pyyaml >= 5.1",
-    "black",
-    "frictionless <= 4.40.8",
-]
+pandas = ["numpy >= 1.24.4", "pandas >= 2.1.1"]
+strategies = ["hypothesis >= 6.92.7"]
+hypotheses = ["scipy"]
+io = ["pyyaml >= 5.1", "black", "frictionless >= 5.18.1"]
 mypy = ["pandas-stubs"]
 fastapi = ["fastapi"]
-geopandas = [
-    "geopandas",
-    "shapely",
-]
+geopandas = ["geopandas", "shapely"]
 pyspark = ["pyspark[connect] >= 3.2.0"]
-modin = [
-    "modin",
-    "ray",
-    "dask[dataframe]",
-    "distributed",
-]
-modin-ray = [
-    "modin",
-    "ray",
-]
-modin-dask = [
-    "modin",
-    "dask[dataframe]",
-    "distributed",
-]
-dask = [
-    "dask[dataframe]",
-    "distributed",
-]
+modin = ["modin", "ray", "dask[dataframe]", "distributed"]
+modin-ray = ["modin", "ray"]
+modin-dask = ["modin", "dask[dataframe]", "distributed"]
+dask = ["dask[dataframe]", "distributed"]
 
 polars = ["polars >= 0.20.0"]
 all = [
     "hypothesis >= 6.92.7",
     "scipy",
     "pyyaml >= 5.1",
     "black",
-    "frictionless <= 4.40.8",
+    "frictionless >= 5.18.1",
     "pyspark[connect] >= 3.2.0",
     "modin",
     "ray",
@@ -121,12 +90,7 @@ dev = [
     "python-multipart",
     "uv",
 ]
-testing = [
-    "pytest",
-    "pytest-cov",
-    "pytest-xdist",
-    "pytest-asyncio",
-]
+testing = ["pytest", "pytest-cov", "pytest-xdist", "pytest-asyncio"]
 docs = [
     "sphinx",
     "sphinx-design",
@@ -147,7 +111,7 @@ docs = [
 
 [tool.setuptools]
 packages = ["pandera"]
-package-data = {"pandera" = ["py.typed"]}
+package-data = { "pandera" = ["py.typed"] }
 
 [tool.pyright]
 include = ["pandera", "tests"]
@@ -159,22 +123,11 @@ log_cli_level = 20
 
 [tool.ruff]
 line-length = 120
-extend-exclude = [
-    "setup.py",
-    ".venv",
-    ".nox",
-    ".git",
-    "asv_bench",
-]
+extend-exclude = ["setup.py", ".venv", ".nox", ".git", "asv_bench"]
 
 [tool.black]
 line-length = 79
-target-version = [
-  'py39',
-  'py310',
-  'py311',
-  'py312',
-]
+target-version = ['py39', 'py310', 'py311', 'py312']
 include = '\.pyi?$'
 exclude = '''
 (
@@ -187,4 +140,4 @@ exclude = '''
 '''
 
 [tool.codespell]
-ignore-words-list = ["notin", "splitted", "fo", "strat"]
+ignore-words-list = ["notin", "splitted", "fo", "strat"]
diff --git a/requirements.txt b/requirements.txt
@@ -7,7 +7,7 @@ typing_extensions
 hypothesis >= 6.92.7
 pyyaml >= 5.1
 typing_inspect >= 0.6.0
-frictionless <= 4.40.8
+frictionless >= 5.18.1
 pyarrow
 pydantic
 scipy

diff --git a/tests/io/test_pandas_io.py b/tests/io/test_pandas_io.py
@@ -15,6 +15,7 @@
 import pandera.typing as pat
 from pandera.api.pandas.container import DataFrameSchema
 from pandera.engines import pandas_engine
+from pandera.io import from_frictionless_schema
 
 try:
     from pandera import io
@@ -1155,6 +1156,7 @@ def datetime_check(pandas_obj, *, stat): ...
       maxLength: 80
       minLength: 3
     name: string_col
+    type: string
   - constraints:
       pattern: \\d{3}[A-Z]
     name: string_col_2
@@ -1172,16 +1174,17 @@ def datetime_check(pandas_obj, *, stat): ...
       required: true
     name: float_col
     type: number
-  - constraints:
-    name: float_col_2
+  - name: float_col_2
     type: number
   - constraints:
       minimum: "20201231"
     name: date_col
+    type: date
 primaryKey: integer_col
 """
 )
 
+
 FRICTIONLESS_JSON = {
     "fields": [
         {
@@ -1432,7 +1435,7 @@ def datetime_check(pandas_obj, *, stat): ...
 )
 def test_frictionless_schema_parses_correctly(frictionless_schema):
     """Test parsing frictionless schema from yaml and json."""
-    schema = pandera.io.from_frictionless_schema(frictionless_schema)
+    schema = from_frictionless_schema(frictionless_schema)
 
     assert str(schema.to_yaml()).strip() == YAML_FROM_FRICTIONLESS.strip()
 
@@ -1513,11 +1516,52 @@ def test_frictionless_schema_primary_key(frictionless_schema):
     If the primary key is only one field, the unique field should be in the
     column level and not the dataframe level.
     """
-    schema = pandera.io.from_frictionless_schema(frictionless_schema)
+    schema = from_frictionless_schema(frictionless_schema)
     if len(frictionless_schema["primaryKey"]) == 1:
         assert schema.columns[frictionless_schema["primaryKey"][0]].unique
         assert schema.unique is None
     else:
         assert schema.unique == frictionless_schema["primaryKey"]
         for key in frictionless_schema["primaryKey"]:
             assert not schema.columns[key].unique
+
+
+@pytest.mark.parametrize(
+    "frictionless_schema",
+    [
+        {
+            "fields": [
+                {
+                    "name": "street_id",
+                    "type": "string",
+                    "description": "Id of the street",
+                    "title": "street identifier",
+                    "example": "45566_4455_4",  # example does not exists in pandera so no need to check it
+                },
+                {
+                    "name": "street_type",
+                    "type": "string",
+                    "constraints": {
+                        "enum": ["highway", "motorway", "secondary"]
+                    },
+                },
+                {
+                    "name": "timestamp",
+                    "type": "datetime",
+                    "format": "%Y-%m-%d_%H:%M",
+                },
+                {
+                    "name": "count",
+                    "type": "integer",
+                },
+            ],
+            "primaryKey": ["street_id", "timestamp"],
+        }
+    ],
+)
+def test_frictionless_schema_with_description_and_title(
+    frictionless_schema: dict[str, str],
+):
+    schema = from_frictionless_schema(frictionless_schema)
+    assert schema.columns["street_id"].description == "Id of the street"
+    assert schema.columns["street_id"].title == "street identifier"
-Original file line number
+Diff line change
@@ Expand Up / @@ -72,4 +72,4 @@ repos: @@
         hooks:
           - id: codespell
             additional_dependencies:
-              - tomli
+              - tomli