Fix tests.

moromimay · moromimay · commit cd29d48245d5 · 2021-02-02T18:11:10.000-03:00
diff --git a/butterfree/clients/spark_client.py b/butterfree/clients/spark_client.py
@@ -34,7 +34,7 @@ def conn(self) -> SparkSession:
     def read(
         self,
         format: str,
-        path: Optional[str] = None,
+        path: Optional[Union[str, List[str]]] = None,
         schema: Optional[StructType] = None,
         stream: bool = False,
         **options: Any,
@@ -57,16 +57,16 @@ def read(
         """
         if not isinstance(format, str):
             raise ValueError("format needs to be a string with the desired read format")
-        if not isinstance(path, str):
-            raise ValueError("path needs to be a string")
+        if not isinstance(path, (str, list)):
+            raise ValueError("path needs to be a string or a list of string")
 
         df_reader: Union[
             DataStreamReader, DataFrameReader
         ] = self.conn.readStream if stream else self.conn.read
 
         df_reader = df_reader.schema(schema) if schema else df_reader
 
-        return df_reader.format(format).load(path, **options)
+        return df_reader.format(format).load(path, **options)  # type: ignore
 
     def read_table(self, table: str, database: str = None) -> DataFrame:
         """Use the SparkSession.read interface to read a metastore table.
diff --git a/butterfree/configs/db/metastore_config.py b/butterfree/configs/db/metastore_config.py
@@ -109,7 +109,7 @@ def get_path_with_partitions(self, key: str, dataframe: DataFrame) -> List:
         )
         for row in dataframe_values:
             path_list.append(
-                f"s3a://{self.bucket}/{key}/year={row['year']}/"
+                f"{self.file_system}://{self.path}/{key}/year={row['year']}/"
                 f"month={row['month']}/day={row['day']}"
             )
 
diff --git a/butterfree/extract/readers/reader.py b/butterfree/extract/readers/reader.py
@@ -52,7 +52,7 @@ def with_(
         self.transformations.append(new_transformation)
         return self
 
-    def with_incremantal_strategy(
+    def with_incremental_strategy(
         self, incremental_strategy: IncrementalStrategy
     ) -> "Reader":
         """Define the incremental strategy for the Reader.
diff --git a/butterfree/load/writers/online_feature_store_writer.py b/butterfree/load/writers/online_feature_store_writer.py
@@ -181,11 +181,17 @@ def write(
         table_name = feature_set.entity if self.write_to_entity else feature_set.name
 
         if not self.debug_mode:
+            config = (
+                self.db_config
+                if self.db_config == CassandraConfig
+                else CassandraConfig()
+            )
+
             cassandra_client = CassandraClient(
-                host=[self.db_config.host],
-                keyspace=self.db_config.keyspace,
-                user=self.db_config.username,
-                password=self.db_config.password,
+                host=[config.host],
+                keyspace=config.keyspace,
+                user=config.username,
+                password=config.password,
             )
 
             dataframe = self.check_schema(cassandra_client, dataframe, table_name)
diff --git a/tests/integration/butterfree/pipelines/test_feature_set_pipeline.py b/tests/integration/butterfree/pipelines/test_feature_set_pipeline.py
@@ -163,7 +163,7 @@ def test_feature_set_pipeline(
                 ],
                 timestamp=TimestampFeature(),
             ),
-            sink=Sink(writers=[historical_writer],),
+            sink=Sink(writers=[historical_writer]),
         )
         test_pipeline.run()
 
diff --git a/tests/unit/butterfree/load/writers/test_historical_feature_store_writer.py b/tests/unit/butterfree/load/writers/test_historical_feature_store_writer.py
@@ -318,8 +318,15 @@ def test_write_with_transform(
         # given
         spark_client = mocker.stub("spark_client")
         spark_client.write_table = mocker.stub("write_table")
+
         writer = HistoricalFeatureStoreWriter().with_(json_transform)
 
+        schema_dataframe = writer._create_partitions(feature_set_dataframe)
+        json_dataframe = writer._apply_transformations(schema_dataframe)
+        writer.check_schema_hook = mocker.stub("check_schema_hook")
+        writer.check_schema_hook.run = mocker.stub("run")
+        writer.check_schema_hook.run.return_value = json_dataframe
+
         # when
         writer.write(
             feature_set=feature_set,
diff --git a/tests/unit/butterfree/load/writers/test_online_feature_store_writer.py b/tests/unit/butterfree/load/writers/test_online_feature_store_writer.py
@@ -151,7 +151,7 @@ def test_write_in_debug_and_stream_mode(self, feature_set, spark_session, mocker
         assert isinstance(handler, StreamingQuery)
 
     @pytest.mark.parametrize("has_checkpoint", [True, False])
-    def test_write_stream(self, feature_set, has_checkpoint, monkeypatch):
+    def test_write_stream(self, feature_set, has_checkpoint, monkeypatch, mocker):
         # arrange
         spark_client = SparkClient()
         spark_client.write_stream = Mock()
@@ -174,6 +174,10 @@ def test_write_stream(self, feature_set, has_checkpoint, monkeypatch):
         writer = OnlineFeatureStoreWriter(cassandra_config)
         writer.filter_latest = Mock()
 
+        writer.check_schema_hook = mocker.stub("check_schema_hook")
+        writer.check_schema_hook.run = mocker.stub("run")
+        writer.check_schema_hook.run.return_value = dataframe
+
         # act
         stream_handler = writer.write(feature_set, dataframe, spark_client)
 
@@ -252,6 +256,10 @@ def test_write_with_transform(
         spark_client.write_dataframe = mocker.stub("write_dataframe")
         writer = OnlineFeatureStoreWriter(cassandra_config).with_(json_transform)
 
+        writer.check_schema_hook = mocker.stub("check_schema_hook")
+        writer.check_schema_hook.run = mocker.stub("run")
+        writer.check_schema_hook.run.return_value = feature_set_dataframe
+
         # when
         writer.write(feature_set, feature_set_dataframe, spark_client)
 
@@ -285,6 +293,10 @@ def test_write_with_kafka_config(
         kafka_config = KafkaConfig()
         writer = OnlineFeatureStoreWriter(kafka_config).with_(json_transform)
 
+        writer.check_schema_hook = mocker.stub("check_schema_hook")
+        writer.check_schema_hook.run = mocker.stub("run")
+        writer.check_schema_hook.run.return_value = feature_set_dataframe
+
         # when
         writer.write(feature_set, feature_set_dataframe, spark_client)
 
@@ -308,6 +320,10 @@ def test_write_with_custom_kafka_config(
             json_transform
         )
 
+        custom_writer.check_schema_hook = mocker.stub("check_schema_hook")
+        custom_writer.check_schema_hook.run = mocker.stub("run")
+        custom_writer.check_schema_hook.run.return_value = feature_set_dataframe
+
         # when
         custom_writer.write(feature_set, feature_set_dataframe, spark_client)
 

Original file line number	Diff line number	Diff line change
`@@ -109,7 +109,7 @@ def get_path_with_partitions(self, key: str, dataframe: DataFrame) -> List:`
`109`	`109`	`)`
`110`	`110`	`for row in dataframe_values:`
`111`	`111`	`path_list.append(`
`112`		`- f"s3a://{self.bucket}/{key}/year={row['year']}/"`
	`112`	`+ f"{self.file_system}://{self.path}/{key}/year={row['year']}/"`
`113`	`113`	`f"month={row['month']}/day={row['day']}"`
`114`	`114`	`)`
`115`	`115`
Original file line number	Diff line number	Diff line change
`@@ -163,7 +163,7 @@ def test_feature_set_pipeline(`
`163`	`163`	`],`
`164`	`164`	`timestamp=TimestampFeature(),`
`165`	`165`	`),`
`166`		`- sink=Sink(writers=[historical_writer],),`
	`166`	`+ sink=Sink(writers=[historical_writer]),`
`167`	`167`	`)`
`168`	`168`	`test_pipeline.run()`
`169`	`169`