databrickslabs · bsr-the-mngrm · Oct 18, 2025 · Oct 18, 2025 · Oct 18, 2025 · Copilot
@@ -1,8 +1,8 @@
 import logging
+import datetime
 
 from databricks.labs.dqx.base import DQEngineBase
 from databricks.labs.dqx.engine import DQEngine
-from databricks.labs.dqx.profiler.common import val_maybe_to_str
 from databricks.labs.dqx.profiler.profiler import DQProfile
 from databricks.labs.dqx.telemetry import telemetry_logger
 
@@ -70,49 +70,59 @@ def dq_generate_min_max(column: str, level: str = "error", **params: dict):
         Generates a data quality rule to check if a column's value is within a specified range.
 
         Args:
-                column: The name of the column to check.
-                level: The criticality level of the rule (default is "error").
-                params: Additional parameters, including the minimum and maximum values.
+            column: The name of the column to check.
+            level: The criticality level of the rule (default is "error").
+            params: Additional parameters, including the minimum and maximum values.
 
         Returns:
-                A dictionary representing the data quality rule, or None if no limits are provided.
+            A dictionary representing the data quality rule, or None if no limits are provided.
         """
         min_limit = params.get("min")
         max_limit = params.get("max")
 
-        if not isinstance(min_limit, int) or not isinstance(max_limit, int):
-            return None  # TODO handle timestamp and dates: https://github.com/databrickslabs/dqx/issues/71
+        if min_limit is None and max_limit is None:
+            return None
+
+        def _is_num(value):
+            return isinstance(value, int)
+
+        def _is_temporal(value):
+            return isinstance(value, (datetime.date, datetime.datetime))
+
+        def _same_family(value_a, value_b):
+            # numeric with numeric OR temporal with temporal
+            if value_a is None or value_b is None:
+                return True
+            return (_is_num(value_a) and _is_num(value_b)) or (_is_temporal(value_a) and _is_temporal(value_b))
-            return (_is_num(value_a) and _is_num(value_b)) or (_is_temporal(value_a) and _is_temporal(value_b))
+            return any([
+                _is_num(value_a) and _is_num(value_b),
+                _is_temporal(value_a) and _is_temporal(value_b),
+            ])
-            return (_is_num(value_a) and _is_num(value_b)) or (_is_temporal(value_a) and _is_temporal(value_b))
+            return any([
+                _is_num(value_a) and _is_num(value_b),
+                _is_temporal(value_a) and _is_temporal(value_b),
+            ])
 
-        if min_limit is not None and max_limit is not None:
+        # Both bounds
+        if min_limit is not None and max_limit is not None and _same_family(min_limit, max_limit):
             return {
                 "check": {
                     "function": "is_in_range",
                     "arguments": {
                         "column": column,
-                        "min_limit": val_maybe_to_str(min_limit, include_sql_quotes=False),
-                        "max_limit": val_maybe_to_str(max_limit, include_sql_quotes=False),
+                        # pass through Python ints or datetime/date without stringification
+                        "min_limit": min_limit,
+                        "max_limit": max_limit,
                     },
                 },
                 "name": f"{column}_isnt_in_range",
                 "criticality": level,
             }
 
-        if max_limit is not None:
+        # Only max
+        if max_limit is not None and (_is_num(max_limit) or _is_temporal(max_limit)):
             return {
-                "check": {
-                    "function": "is_not_greater_than",
-                    "arguments": {"column": column, "limit": val_maybe_to_str(max_limit, include_sql_quotes=False)},
-                },
+                "check": {"function": "is_not_greater_than", "arguments": {"column": column, "limit": max_limit}},
                 "name": f"{column}_not_greater_than",
                 "criticality": level,
             }
 
-        if min_limit is not None:
+        # Only min
+        if min_limit is not None and (_is_num(min_limit) or _is_temporal(min_limit)):
             return {
-                "check": {
-                    "function": "is_not_less_than",
-                    "arguments": {"column": column, "limit": val_maybe_to_str(min_limit, include_sql_quotes=False)},
-                },
+                "check": {"function": "is_not_less_than", "arguments": {"column": column, "limit": min_limit}},
                 "name": f"{column}_not_less_than",
                 "criticality": level,
             }

@@ -1,5 +1,5 @@
+import logging
 import datetime
-import logging
-import datetime
+import datetime
+import logging
-import logging
-import datetime
+import datetime
+import logging
-from decimal import Decimal
 
 from databricks.labs.dqx.profiler.generator import DQGenerator
 from databricks.labs.dqx.profiler.profiler import DQProfile
@@ -22,19 +22,6 @@
         parameters={"min": datetime.date(2020, 1, 1), "max": None},
         description="Real min/max values were used",
     ),
-    DQProfile(
-        name="min_max",
-        column="product_expiry_ts",
-        parameters={"min": None, "max": datetime.datetime(2020, 1, 1)},
-        description="Real min/max values were used",
-    ),
-    DQProfile(name="is_random", column="vendor_id", parameters={"in": ["1", "4", "2"]}),
-    DQProfile(
-        name='min_max',
-        column='d1',
-        description='Real min/max values were used',
-        parameters={'max': Decimal('333323.00'), 'min': Decimal('1.23')},
-    ),
 ]
 
 
@@ -71,6 +58,14 @@ def test_generate_dq_rules(ws):
             "name": "rate_code_id_isnt_in_range",
             "criticality": "error",
         },
+        {
+            "check": {
+                "function": "is_not_less_than",
+                "arguments": {"column": "product_launch_date", "limit": datetime.date(2020, 1, 1)},
+            },
+            "name": "product_launch_date_not_less_than",
+            "criticality": "error",
+        },
     ]
     assert expectations == expected
 
@@ -108,13 +103,27 @@ def test_generate_dq_rules_warn(ws):
             "name": "rate_code_id_isnt_in_range",
             "criticality": "warn",
         },
+        {
+            "check": {
+                "function": "is_not_less_than",
+                "arguments": {"column": "product_launch_date", "limit": datetime.date(2020, 1, 1)},
+            },
+            "name": "product_launch_date_not_less_than",
+            "criticality": "warn",
+        },
     ]
     assert expectations == expected
 
 
 def test_generate_dq_rules_logging(ws, caplog):
+    # capture INFO from the generator module where the skip log is emitted
+    caplog.set_level(logging.INFO, logger="databricks.labs.dqx.profiler.generator")
+
     generator = DQGenerator(ws)
-    generator.generate_dq_rules(test_rules)
+    # add an unknown rule to trigger the "skipping..." log
+    unknown_rule = DQProfile(name="is_random", column="vendor_id")
+    generator.generate_dq_rules(test_rules + [unknown_rule])
+
     assert "No rule 'is_random' for column 'vendor_id'. skipping..." in caplog.text
 
 

@@ -0,0 +1,32 @@
+import datetime
+
+from databricks.labs.dqx.profiler.generator import DQGenerator
+
+
+def test_date_both_bounds_is_in_range():
+    result = DQGenerator.dq_generate_min_max(
+        "dcol", **{"min": datetime.date(2020, 1, 1), "max": datetime.date(2020, 12, 31)}
+    )
+    assert result["check"]["function"] == "is_in_range"
+    args = result["check"]["arguments"]
+    assert args["column"] == "dcol"
+    assert args["min_limit"] == datetime.date(2020, 1, 1)
+    assert args["max_limit"] == datetime.date(2020, 12, 31)
+
+
+def test_timestamp_only_min_is_not_less_than():
+    timestamp = datetime.datetime(2024, 6, 1, 12, 0, 0)
+    result = DQGenerator.dq_generate_min_max("tscol", **{"min": timestamp, "max": None})
+    assert result["check"]["function"] == "is_not_less_than"
+    args = result["check"]["arguments"]
+    assert args["column"] == "tscol"
+    assert args["limit"] == timestamp
+
+
+def test_timestamp_only_max_is_not_greater_than():
+    timestamp = datetime.datetime(2024, 6, 30, 23, 59, 59)
+    result = DQGenerator.dq_generate_min_max("tscol", **{"min": None, "max": timestamp})
+    assert result["check"]["function"] == "is_not_greater_than"
+    args = result["check"]["arguments"]
+    assert args["column"] == "tscol"
+    assert args["limit"] == timestamp