quintoandar
diff --git a/‎butterfree/configs/db/__init__.py‎
Lines changed: 8 additions & 1 deletion b/‎butterfree/configs/db/__init__.py‎
Lines changed: 8 additions & 1 deletion
diff --git a/‎butterfree/configs/db/delta.py‎
Lines changed: 131 additions & 0 deletions b/‎butterfree/configs/db/delta.py‎
Lines changed: 131 additions & 0 deletions
diff --git a/‎butterfree/load/writers/__init__.py‎
Lines changed: 7 additions & 1 deletion b/‎butterfree/load/writers/__init__.py‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎butterfree/load/writers/delta_feature_store_writer.py‎
Lines changed: 170 additions & 0 deletions b/‎butterfree/load/writers/delta_feature_store_writer.py‎
Lines changed: 170 additions & 0 deletions
@@ -2,7 +2,14 @@
 
 from butterfree.configs.db.abstract_config import AbstractWriteConfig
 from butterfree.configs.db.cassandra_config import CassandraConfig
+from butterfree.configs.db.delta import DeltaConfig
 from butterfree.configs.db.kafka_config import KafkaConfig
 from butterfree.configs.db.metastore_config import MetastoreConfig
 
-__all__ = ["AbstractWriteConfig", "CassandraConfig", "KafkaConfig", "MetastoreConfig"]
+__all__ = [
+    "AbstractWriteConfig",
+    "CassandraConfig",
+    "KafkaConfig",
+    "MetastoreConfig",
+    "DeltaConfig",
+]
@@ -0,0 +1,131 @@
+"""Holds configurations for Delta Lake operations."""
+
+from typing import Any, Dict, List, Optional
+
+from butterfree.configs.db import AbstractWriteConfig
+
+
+class DeltaConfig(AbstractWriteConfig):
+    """Configuration for Delta Lake operations.
+
+    Attributes:
+        database: Target database name for the Delta table.
+        table: Target table name for the Delta table.
+        merge_on: List of columns to use as merge keys.
+        when_not_matched_insert: Optional condition for insert operations.
+        when_matched_update: Optional condition for update operations.
+        when_matched_delete: Optional condition for delete operations.
+    """
+
+    def __init__(
+        self,
+        database: str,
+        table: str,
+        merge_on: List[str],
+        when_not_matched_insert: Optional[str] = None,
+        when_matched_update: Optional[str] = None,
+        when_matched_delete: Optional[str] = None,
+    ):
+        self.database = database
+        self.table = table
+        self.merge_on = merge_on
+        self.when_not_matched_insert = when_not_matched_insert
+        self.when_matched_update = when_matched_update
+        self.when_matched_delete = when_matched_delete
+
+    @property
+    def database(self) -> str:
+        """Database name."""
+        return self.__database
+
+    @database.setter
+    def database(self, value: str) -> None:
+        if not value:
+            raise ValueError("Config 'database' cannot be empty.")
+        self.__database = value
+
+    @property
+    def table(self) -> str:
+        """Table name."""
+        return self.__table
+
+    @table.setter
+    def table(self, value: str) -> None:
+        if not value:
+            raise ValueError("Config 'table' cannot be empty.")
+        self.__table = value
+
+    @property
+    def merge_on(self) -> List[str]:
+        """List of columns to use as merge keys."""
+        return self.__merge_on
+
+    @merge_on.setter
+    def merge_on(self, value: List[str]) -> None:
+        if not value:
+            raise ValueError("Config 'merge_on' cannot be empty.")
+        self.__merge_on = value
+
+    @property
+    def mode(self) -> str:
+        """Write mode for Spark."""
+        return "overwrite"
+
+    @property
+    def format_(self) -> str:
+        """Write format for Spark."""
+        return "delta"
+
+    @property
+    def when_not_matched_insert(self) -> Optional[str]:
+        """Condition for insert operations."""
+        return self.__when_not_matched_insert
+
+    @when_not_matched_insert.setter
+    def when_not_matched_insert(self, value: Optional[str]) -> None:
+        self.__when_not_matched_insert = value
+
+    @property
+    def when_matched_update(self) -> Optional[str]:
+        """Condition for update operations."""
+        return self.__when_matched_update
+
+    @when_matched_update.setter
+    def when_matched_update(self, value: Optional[str]) -> None:
+        self.__when_matched_update = value
+
+    @property
+    def when_matched_delete(self) -> Optional[str]:
+        """Condition for delete operations."""
+        return self.__when_matched_delete
+
+    @when_matched_delete.setter
+    def when_matched_delete(self, value: Optional[str]) -> None:
+        self.__when_matched_delete = value
+
+    def get_options(self, key: str) -> Dict[str, Any]:
+        """Get options for Delta Lake operations.
+
+        Args:
+            key: table name in Delta Lake.
+
+        Returns:
+            Configuration for Delta Lake operations.
+        """
+        return {
+            "table": self.table,
+            "database": self.database,
+        }
+
+    def translate(self, schema: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        """Get feature set schema to be translated.
+
+        Delta Lake uses the same types as Spark SQL, so no translation is needed.
+
+        Args:
+            schema: Feature set schema in Spark format.
+
+        Returns:
+            The same schema, as Delta Lake uses Spark SQL types.
+        """
+        return schema
@@ -1,9 +1,15 @@
 """Holds data loaders for historical and online feature store."""
 
+from butterfree.load.writers.delta_feature_store_writer import DeltaFeatureStoreWriter
 from butterfree.load.writers.delta_writer import DeltaWriter
 from butterfree.load.writers.historical_feature_store_writer import (
     HistoricalFeatureStoreWriter,
 )
 from butterfree.load.writers.online_feature_store_writer import OnlineFeatureStoreWriter
 
-__all__ = ["HistoricalFeatureStoreWriter", "OnlineFeatureStoreWriter", "DeltaWriter"]
+__all__ = [
+    "HistoricalFeatureStoreWriter",
+    "OnlineFeatureStoreWriter",
+    "DeltaWriter",
+    "DeltaFeatureStoreWriter",
+]
@@ -0,0 +1,170 @@
+from typing import Any, Dict, List, Optional
+
+from pyspark.sql.dataframe import DataFrame
+
+from butterfree.clients import SparkClient
+from butterfree.configs.db import DeltaConfig
+from butterfree.load.writers.delta_writer import DeltaWriter
+from butterfree.load.writers.writer import Writer
+from butterfree.transform import FeatureSet
+
+
+class DeltaFeatureStoreWriter(Writer):
+    """Enable writing feature sets into Delta tables with merge capabilities.
+
+    Attributes:
+        database: database name to use for the Delta table.
+        table: table name to write the feature set to.
+        merge_on: list of columns to use as merge keys.
+        deduplicate: whether to deduplicate data before merging based on featr set keys.
+            Default is False.
+        when_not_matched_insert: optional condition for insert operations.
+            When provided, rows will only be inserted if this condition is true.
+        when_matched_update: optional condition for update operations.
+            When provided, rows will only be updated if this condition is true.
+            Source columns can be referenced as source.<column_name> and target
+            columns as target.<column_name>.
+        when_matched_delete: optional condition for delete operations.
+            When provided, rows will be deleted if this condition is true.
+            Source and target columns can be referenced as in update conditions.
+
+    Example:
+        Simple example regarding DeltaFeatureStoreWriter class instantiation.
+        We can instantiate this class with basic merge configuration:
+
+    >>> from butterfree.load.writers import DeltaFeatureStoreWriter
+    >>> spark_client = SparkClient()
+    >>> writer = DeltaFeatureStoreWriter(
+    ...     database="feature_store",
+    ...     table="user_features",
+    ...     merge_on=["id", "timestamp"]
+    ... )
+    >>> writer.write(feature_set=feature_set,
+    ...             dataframe=dataframe,
+    ...             spark_client=spark_client)
+
+        We can also enable deduplication based on the feature set keys:
+
+    >>> writer = DeltaFeatureStoreWriter(
+    ...     database="feature_store",
+    ...     table="user_features",
+    ...     merge_on=["id", "timestamp"],
+    ...     deduplicate=True
+    ... )
+
+        For more control over the merge operation, we can add conditions:
+
+    >>> writer = DeltaFeatureStoreWriter(
+    ...     database="feature_store",
+    ...     table="user_features",
+    ...     merge_on=["id", "timestamp"],
+    ...     when_matched_update="source.value > target.value",
+    ...     when_not_matched_insert="source.value > 0"
+    ... )
+
+        The writer supports schema evolution by default and will automatically
+        handle updates to the feature set schema.
+
+        When writing with deduplication enabled, the writer will use the feature
+        set's key columns and timestamp to ensure data quality by removing
+        duplicates before merging.
+
+        For optimal performance, it's recommended to:
+        1. Choose appropriate merge keys
+        2. Use conditions to filter unnecessary updates/inserts
+        3. Enable deduplication only when needed
+    """
+
+    def __init__(
+        self,
+        database: str,
+        table: str,
+        merge_on: List[str],
+        when_not_matched_insert: Optional[str] = None,
+        when_matched_update: Optional[str] = None,
+        when_matched_delete: Optional[str] = None,
+    ):
+        self.config = DeltaConfig(
+            database=database,
+            table=table,
+            merge_on=merge_on,
+            when_not_matched_insert=when_not_matched_insert,
+            when_matched_update=when_matched_update,
+            when_matched_delete=when_matched_delete,
+        )
+        self.row_count_validation = False
+
+    def write(
+        self,
+        dataframe: DataFrame,
+        spark_client: SparkClient,
+        feature_set: FeatureSet,
+    ) -> None:
+        """Merges the input dataframe into a Delta table.
+
+        Performs a Delta merge operation with the provided dataframe using the config
+        merge settings. When deduplication is enabled, uses the feature set's key cols
+        to remove duplicates before merging.
+
+        Args:
+            dataframe: Spark dataframe with data to be merged.
+            spark_client: Client with an active Spark connection.
+            feature_set: Feature set instance containing schema and configuration.
+                Used for deduplication when enabled.
+
+        Example:
+            >>> from butterfree.load.writers import DeltaFeatureStoreWriter
+            >>> writer = DeltaFeatureStoreWriter(
+            ...     database="feature_store",
+            ...     table="user_features",
+            ...     merge_on=["id", "timestamp"],
+            ...     deduplicate=True
+            ... )
+            >>> writer.write(
+            ...     dataframe=dataframe,
+            ...     spark_client=spark_client,
+            ...     feature_set=feature_set
+            ... )
+        """
+        options = self.config.get_options(self.config.table)
+
+        DeltaWriter().merge(
+            client=spark_client,
+            database=options["database"],
+            table=options["table"],
+            merge_on=self.config.merge_on,
+            source_df=dataframe,
+            when_not_matched_insert=self.config.when_not_matched_insert,
+            when_matched_update=self.config.when_matched_update,
+            when_matched_delete=self.config.when_matched_delete,
+        )
+
+    def validate(
+        self,
+        dataframe: DataFrame,
+        spark_client: SparkClient,
+        feature_set: FeatureSet,
+    ) -> None:
+        """Validates the dataframe written to Delta table.
+
+        In Delta tables, schema validation is handled by Delta's schema enforcement
+        and evolution. No additional validation is needed.
+
+        Args:
+            dataframe: Spark dataframe to be validated
+            spark_client: Client for Spark connection
+            feature_set: Feature set with the schema definition
+        """
+        pass
+
+    def check_schema(self, dataframe: DataFrame, schema: List[Dict[str, Any]]) -> None:
+        """Checks if the dataframe schema matches the feature set schema.
+
+        Schema validation in Delta tables is handled by Delta Lake's schema enforcement
+        and evolution capabilities.
+
+        Args:
+            dataframe: Spark dataframe to be validated
+            schema: Schema definition from the feature set
+        """
+        pass