Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion environment-dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ dependencies:
- pip

# required dependencies
- pandas>=2.2,<2.3
- pandas>=2.2,<2.4
- numpy>=1.22.4
- fsspec>=2022.11.0
- packaging>=21.0
Expand Down
9 changes: 6 additions & 3 deletions modin/core/storage_formats/base/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -6614,10 +6614,13 @@ def str_encode(self, encoding, errors):
refer_to="decode",
params="""
encoding : str,
errors : str, default = 'strict'""",
errors : str, default = 'strict'
dtype : str or dtype, optional""",
)
def str_decode(self, encoding, errors):
return StrDefault.register(pandas.Series.str.decode)(self, encoding, errors)
def str_decode(self, encoding, errors, dtype):
return StrDefault.register(pandas.Series.str.decode)(
self, encoding, errors, dtype
)

@doc_utils.doc_str_method(
refer_to="cat",
Expand Down
18 changes: 9 additions & 9 deletions modin/pandas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,20 @@
import pandas
from packaging import version

__pandas_version__ = "2.2"
__min_pandas_version__ = "2.2"
__max_pandas_version__ = "2.4"

if (
version.parse(pandas.__version__).release[:2]
!= version.parse(__pandas_version__).release[:2]
):
pandas_version = version.parse(pandas.__version__)
if pandas_version < version.parse(
__min_pandas_version__
) or pandas_version >= version.parse(__max_pandas_version__):
warnings.warn(
f"The pandas version installed ({pandas.__version__}) does not match the supported pandas version in"
+ f" Modin ({__pandas_version__}.X). This may cause undesired side effects!"
f"The pandas version installed ({pandas.__version__}) is outside the supported range in Modin"
+ f" ({__min_pandas_version__} to {__max_pandas_version__}). This may cause undesired side effects!"
)


# to not pollute namespace
del version
del version, pandas_version, __min_pandas_version__, __max_pandas_version__


with warnings.catch_warnings():
Expand Down
25 changes: 18 additions & 7 deletions modin/pandas/series_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

import numpy as np
import pandas
from pandas._libs import lib

from modin.logging import ClassLogger
from modin.utils import _inherit_docstrings
Expand Down Expand Up @@ -226,9 +227,9 @@ def cat(self, others=None, sep=None, na_rep=None, join="left"):
else self._Series(query_compiler=compiler_result)
)

def decode(self, encoding, errors="strict"):
def decode(self, encoding, errors="strict", dtype=None):
return self._Series(
query_compiler=self._query_compiler.str_decode(encoding, errors)
query_compiler=self._query_compiler.str_decode(encoding, errors, dtype)
)

def split(self, pat=None, *, n=-1, expand=False, regex=None):
Expand Down Expand Up @@ -277,9 +278,11 @@ def join(self, sep):
def get_dummies(self, sep="|"):
return self._Series(query_compiler=self._query_compiler.str_get_dummies(sep))

def contains(self, pat, case=True, flags=0, na=None, regex=True):
def contains(self, pat, case=True, flags=0, na=lib.no_default, regex=True):
if pat is None and not case:
raise AttributeError("'NoneType' object has no attribute 'upper'")
if na is lib.no_default:
na = None
return self._Series(
query_compiler=self._query_compiler.str_contains(
pat, case=case, flags=flags, na=na, regex=regex
Expand Down Expand Up @@ -358,7 +361,9 @@ def count(self, pat, flags=0):
query_compiler=self._query_compiler.str_count(pat, flags=flags)
)

def startswith(self, pat, na=None):
def startswith(self, pat, na=lib.no_default):
if na is lib.no_default:
na = None
return self._Series(
query_compiler=self._query_compiler.str_startswith(pat, na=na)
)
Expand All @@ -368,7 +373,9 @@ def encode(self, encoding, errors="strict"):
query_compiler=self._query_compiler.str_encode(encoding, errors)
)

def endswith(self, pat, na=None):
def endswith(self, pat, na=lib.no_default):
if na is lib.no_default:
na = None
return self._Series(
query_compiler=self._query_compiler.str_endswith(pat, na=na)
)
Expand All @@ -380,18 +387,22 @@ def findall(self, pat, flags=0):
query_compiler=self._query_compiler.str_findall(pat, flags=flags)
)

def fullmatch(self, pat, case=True, flags=0, na=None):
def fullmatch(self, pat, case=True, flags=0, na=lib.no_default):
if not isinstance(pat, (str, re.Pattern)):
raise TypeError("first argument must be string or compiled pattern")
if na is lib.no_default:
na = None
return self._Series(
query_compiler=self._query_compiler.str_fullmatch(
pat, case=case, flags=flags, na=na
)
)

def match(self, pat, case=True, flags=0, na=None):
def match(self, pat, case=True, flags=0, na=lib.no_default):
if not isinstance(pat, (str, re.Pattern)):
raise TypeError("first argument must be string or compiled pattern")
if na is lib.no_default:
na = None
return self._Series(
query_compiler=self._query_compiler.str_match(
pat, case=case, flags=flags, na=na
Expand Down
2 changes: 1 addition & 1 deletion modin/tests/pandas/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def test_top_level_api_equality():
), "Differences found in API: {}".format(extra_in_modin - set(ignore_modin))

difference = []
allowed_different = ["Interval", "datetime"]
allowed_different = ["Interval", "datetime", "StringDtype"]

# Check that we have all keywords and defaults in pandas
for m in set(pandas_dir) - set(ignore_pandas):
Expand Down
19 changes: 15 additions & 4 deletions modin/tests/pandas/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,9 @@
pytest.mark.filterwarnings(
"ignore:.*In a future version of pandas, the provided callable will be used directly.*:FutureWarning"
),
pytest.mark.filterwarnings(
"ignore:(DataFrameGroupBy|SeriesGroupBy).apply operated on the grouping columns:FutureWarning"
),
]


Expand Down Expand Up @@ -2166,7 +2169,10 @@ def test_mixed_columns(columns, drop_from_original_df, as_index):

df_equals(md_grp.size(), pd_grp.size())
df_equals(md_grp.sum(), pd_grp.sum())
df_equals(md_grp.apply(lambda df: df.sum()), pd_grp.apply(lambda df: df.sum()))
df_equals(
md_grp.apply(lambda df: df.sum(), include_groups=False),
pd_grp.apply(lambda df: df.sum(), include_groups=False),
)


@pytest.mark.parametrize("as_index", [True, False])
Expand Down Expand Up @@ -2263,7 +2269,9 @@ def test_mixed_columns_not_from_df(columns, as_index):

modin_groupby_equals_pandas(md_grp, pd_grp)
eval_general(md_grp, pd_grp, lambda grp: grp.size())
eval_general(md_grp, pd_grp, lambda grp: grp.apply(lambda df: df.sum()))
eval_general(
md_grp, pd_grp, lambda grp: grp.apply(lambda df: df.sum(), include_groups=False)
)
eval_general(md_grp, pd_grp, lambda grp: grp.first())


Expand Down Expand Up @@ -3277,9 +3285,12 @@ def test_groupby_apply_series_result(modify_config):
)
df["group"] = [1, 1, 2, 2, 3]

# res = df.groupby('group').apply(lambda x: x.name+2)
eval_general(
df, df._to_pandas(), lambda df: df.groupby("group").apply(lambda x: x.name + 2)
df,
df._to_pandas(),
lambda df: df.groupby("group").apply(
lambda x: x.name + 2, include_groups=False
),
)


Expand Down
2 changes: 1 addition & 1 deletion requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
## required dependencies
pandas>=2.2,<2.3
pandas>=2.2,<2.4
numpy>=1.22.4
fsspec>=2022.11.0
packaging>=21.0
Expand Down
2 changes: 1 addition & 1 deletion requirements/env_unidist_linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ dependencies:
- pip

# required dependencies
- pandas>=2.2,<2.3
- pandas>=2.2,<2.4
- numpy>=1.22.4
- unidist-mpi>=0.2.1
- mpich
Expand Down
2 changes: 1 addition & 1 deletion requirements/env_unidist_win.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ dependencies:
- pip

# required dependencies
- pandas>=2.2,<2.3
- pandas>=2.2,<2.4
- numpy>=1.22.4
- unidist-mpi>=0.2.1
- msmpi
Expand Down
2 changes: 1 addition & 1 deletion requirements/requirements-no-engine.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ dependencies:
- pip

# required dependencies
- pandas>=2.2,<2.3
- pandas>=2.2,<2.4
- numpy>=1.22.4
- fsspec>=2022.11.0
- packaging>=21.0
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def make_distribution(self):
long_description=long_description,
long_description_content_type="text/markdown",
install_requires=[
"pandas>=2.2,<2.3",
"pandas>=2.2,<2.4",
"packaging>=21.0",
"numpy>=1.22.4",
"fsspec>=2022.11.0",
Expand Down
Loading