Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ VERSION := $(shell grep __version__ setup.py | head -1 | cut -d \" -f2 | cut -d
.PHONY: environment
## create virtual environment for butterfree
environment:
@pyenv install -s 3.7.6
@pyenv virtualenv 3.7.6 butterfree
@pyenv install -s 3.7.13
@pyenv virtualenv 3.7.13 butterfree
@pyenv local butterfree
@PYTHONPATH=. python -m pip install --upgrade pip

Expand Down Expand Up @@ -221,4 +221,4 @@ help:
} \
printf "\n"; \
}' \
| more $(shell test $(shell uname) = Darwin && echo '--no-init --raw-control-chars')
| more $(shell test $(shell uname) = Darwin && echo '--no-init --raw-control-chars')
2 changes: 1 addition & 1 deletion butterfree/configs/db/cassandra_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ def translate(self, schema: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
cassandra_schema.append(
{
"column_name": features["column_name"],
"type": cassandra_mapping[str(features["type"])],
"type": cassandra_mapping[str(features["type"]).replace("()", "")],
"primary_key": features["primary_key"],
}
)
Expand Down
4 changes: 2 additions & 2 deletions butterfree/reports/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ def to_json(self) -> Any:
"features": [
{
"column_name": c["column_name"],
"data_type": str(c["type"]),
"data_type": str(c["type"]).replace("()", ""),
"description": desc,
}
for c, desc in params._features
Expand Down Expand Up @@ -208,7 +208,7 @@ def to_markdown(self) -> Any:

features = ["Column name", "Data type", "Description"]
for c, desc in params._features:
features.extend([c["column_name"], str(c["type"]), desc])
features.extend([c["column_name"], str(c["type"]).replace("()", ""), desc])

count_rows = len(features) // 3

Expand Down
8 changes: 3 additions & 5 deletions requirements.dev.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
cmake==3.18.4
h3==3.7.0
pyarrow==0.15.1
h3==3.7.4
jupyter==1.0.0
twine==3.1.1
mypy==0.790
pyspark-stubs==3.0.0
sphinx==3.5.4
sphinxemoji==0.1.8
sphinx-rtd-theme==0.5.2
recommonmark==0.7.1
recommonmark==0.7.1
pyarrow>=1.0.0
7 changes: 3 additions & 4 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
cassandra-driver>=3.22.0,<4.0
mdutils>=1.2.2,<2.0
pandas>=0.24,<1.1
pandas>=0.24,<2.0
parameters-validation>=1.1.5,<2.0
pyspark==3.*
typer>=0.3,<0.4
setuptools>=41,<42
typing-extensions==3.7.4.3
boto3==1.17.*
typing-extensions>3.7.4,<5
boto3==1.17.*
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ spark_options =
spark.sql.session.timeZone: UTC
spark.driver.bindAddress: 127.0.0.1
spark.sql.legacy.timeParserPolicy: LEGACY
spark.sql.legacy.createHiveTableByDefault: false

[mypy]
# suppress errors about unsatisfied imports
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
license="Copyright",
author="QuintoAndar",
install_requires=requirements,
extras_require={"h3": ["cmake==3.16.3", "h3==3.4.2"]},
extras_require={"h3": ["h3>=3.7.4,<4"]},
python_requires=">=3.7, <4",
entry_points={"console_scripts": ["butterfree=butterfree._cli.main:app"]},
include_package_data=True,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,11 @@ def test_feature_set_pipeline(
self, mocked_df, spark_session, fixed_windows_output_feature_set_dataframe,
):
# arrange

table_reader_id = "a_source"
table_reader_table = "table"
table_reader_db = environment.get_variable("FEATURE_STORE_HISTORICAL_DATABASE")

create_temp_view(dataframe=mocked_df, name=table_reader_id)
create_db_and_table(
spark=spark_session,
Expand All @@ -88,14 +90,16 @@ def test_feature_set_pipeline(
table_reader_table=table_reader_table,
)

dbconfig = Mock()
dbconfig.mode = "overwrite"
dbconfig.format_ = "parquet"
path = "test_folder/historical/entity/feature_set"

dbconfig = MetastoreConfig()
dbconfig.get_options = Mock(
return_value={"path": "test_folder/historical/entity/feature_set"}
return_value={"mode": "overwrite", "format_": "parquet", "path": path}
)

historical_writer = HistoricalFeatureStoreWriter(db_config=dbconfig)
historical_writer = HistoricalFeatureStoreWriter(
db_config=dbconfig, debug_mode=True
)

# act
test_pipeline = FeatureSetPipeline(
Expand Down Expand Up @@ -151,9 +155,13 @@ def test_feature_set_pipeline(
)
test_pipeline.run()

# act and assert
dbconfig.get_path_with_partitions = Mock(
return_value=["historical/entity/feature_set"]
)

# assert
path = dbconfig.get_options("historical/entity/feature_set").get("path")
df = spark_session.read.parquet(path).orderBy(TIMESTAMP_COLUMN)
df = spark_session.sql("select * from historical_feature_store__feature_set")

target_df = fixed_windows_output_feature_set_dataframe.orderBy(
test_pipeline.feature_set.timestamp_column
Expand All @@ -162,9 +170,6 @@ def test_feature_set_pipeline(
# assert
assert_dataframe_equality(df, target_df)

# tear down
shutil.rmtree("test_folder")

def test_feature_set_pipeline_with_dates(
self,
mocked_date_df,
Expand Down