Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion environment-dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ dependencies:
- pip

# required dependencies
- pandas>=2.2,<2.3
- pandas>=2.2,<2.4
- numpy>=1.22.4
- fsspec>=2022.11.0
- packaging>=21.0
Expand Down
9 changes: 6 additions & 3 deletions modin/core/storage_formats/base/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -6614,10 +6614,13 @@ def str_encode(self, encoding, errors):
refer_to="decode",
params="""
encoding : str,
errors : str, default = 'strict'""",
errors : str, default = 'strict'
dtype : str or dtype, optional""",
)
def str_decode(self, encoding, errors):
return StrDefault.register(pandas.Series.str.decode)(self, encoding, errors)
def str_decode(self, encoding, errors, dtype):
return StrDefault.register(pandas.Series.str.decode)(
self, encoding, errors, dtype
)

@doc_utils.doc_str_method(
refer_to="cat",
Expand Down
18 changes: 9 additions & 9 deletions modin/pandas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,20 @@
import pandas
from packaging import version

__pandas_version__ = "2.2"
__min_pandas_version__ = "2.2"
__max_pandas_version__ = "2.4"

if (
version.parse(pandas.__version__).release[:2]
!= version.parse(__pandas_version__).release[:2]
):
pandas_version = version.parse(pandas.__version__)
if pandas_version < version.parse(
__min_pandas_version__
) or pandas_version >= version.parse(__max_pandas_version__):
warnings.warn(
f"The pandas version installed ({pandas.__version__}) does not match the supported pandas version in"
+ f" Modin ({__pandas_version__}.X). This may cause undesired side effects!"
f"The pandas version installed ({pandas.__version__}) is outside the supported range in Modin"
+ f" ({__min_pandas_version__} to {__max_pandas_version__}). This may cause undesired side effects!"
)


# to not pollute namespace
del version
del version, pandas_version, __min_pandas_version__, __max_pandas_version__


with warnings.catch_warnings():
Expand Down
25 changes: 18 additions & 7 deletions modin/pandas/series_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

import numpy as np
import pandas
from pandas._libs import lib

from modin.logging import ClassLogger
from modin.utils import _inherit_docstrings
Expand Down Expand Up @@ -226,9 +227,9 @@ def cat(self, others=None, sep=None, na_rep=None, join="left"):
else self._Series(query_compiler=compiler_result)
)

def decode(self, encoding, errors="strict"):
def decode(self, encoding, errors="strict", dtype=None):
return self._Series(
query_compiler=self._query_compiler.str_decode(encoding, errors)
query_compiler=self._query_compiler.str_decode(encoding, errors, dtype)
)

def split(self, pat=None, *, n=-1, expand=False, regex=None):
Expand Down Expand Up @@ -277,9 +278,11 @@ def join(self, sep):
def get_dummies(self, sep="|"):
return self._Series(query_compiler=self._query_compiler.str_get_dummies(sep))

def contains(self, pat, case=True, flags=0, na=None, regex=True):
def contains(self, pat, case=True, flags=0, na=lib.no_default, regex=True):
if pat is None and not case:
raise AttributeError("'NoneType' object has no attribute 'upper'")
if na is lib.no_default:
na = None
return self._Series(
query_compiler=self._query_compiler.str_contains(
pat, case=case, flags=flags, na=na, regex=regex
Expand Down Expand Up @@ -358,7 +361,9 @@ def count(self, pat, flags=0):
query_compiler=self._query_compiler.str_count(pat, flags=flags)
)

def startswith(self, pat, na=None):
def startswith(self, pat, na=lib.no_default):
if na is lib.no_default:
na = None
return self._Series(
query_compiler=self._query_compiler.str_startswith(pat, na=na)
)
Expand All @@ -368,7 +373,9 @@ def encode(self, encoding, errors="strict"):
query_compiler=self._query_compiler.str_encode(encoding, errors)
)

def endswith(self, pat, na=None):
def endswith(self, pat, na=lib.no_default):
if na is lib.no_default:
na = None
return self._Series(
query_compiler=self._query_compiler.str_endswith(pat, na=na)
)
Expand All @@ -380,18 +387,22 @@ def findall(self, pat, flags=0):
query_compiler=self._query_compiler.str_findall(pat, flags=flags)
)

def fullmatch(self, pat, case=True, flags=0, na=None):
def fullmatch(self, pat, case=True, flags=0, na=lib.no_default):
if not isinstance(pat, (str, re.Pattern)):
raise TypeError("first argument must be string or compiled pattern")
if na is lib.no_default:
na = None
return self._Series(
query_compiler=self._query_compiler.str_fullmatch(
pat, case=case, flags=flags, na=na
)
)

def match(self, pat, case=True, flags=0, na=None):
def match(self, pat, case=True, flags=0, na=lib.no_default):
if not isinstance(pat, (str, re.Pattern)):
raise TypeError("first argument must be string or compiled pattern")
if na is lib.no_default:
na = None
return self._Series(
query_compiler=self._query_compiler.str_match(
pat, case=case, flags=flags, na=na
Expand Down
2 changes: 1 addition & 1 deletion modin/tests/pandas/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def test_top_level_api_equality():
), "Differences found in API: {}".format(extra_in_modin - set(ignore_modin))

difference = []
allowed_different = ["Interval", "datetime"]
allowed_different = ["Interval", "datetime", "StringDtype"]

# Check that we have all keywords and defaults in pandas
for m in set(pandas_dir) - set(ignore_pandas):
Expand Down
19 changes: 15 additions & 4 deletions modin/tests/pandas/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,9 @@
pytest.mark.filterwarnings(
"ignore:.*In a future version of pandas, the provided callable will be used directly.*:FutureWarning"
),
pytest.mark.filterwarnings(
"ignore:(DataFrameGroupBy|SeriesGroupBy)\.apply operated on the grouping columns:FutureWarning"
),
]


Expand Down Expand Up @@ -2166,7 +2169,10 @@ def test_mixed_columns(columns, drop_from_original_df, as_index):

df_equals(md_grp.size(), pd_grp.size())
df_equals(md_grp.sum(), pd_grp.sum())
df_equals(md_grp.apply(lambda df: df.sum()), pd_grp.apply(lambda df: df.sum()))
df_equals(
md_grp.apply(lambda df: df.sum(), include_groups=False),
pd_grp.apply(lambda df: df.sum(), include_groups=False),
)


@pytest.mark.parametrize("as_index", [True, False])
Expand Down Expand Up @@ -2263,7 +2269,9 @@ def test_mixed_columns_not_from_df(columns, as_index):

modin_groupby_equals_pandas(md_grp, pd_grp)
eval_general(md_grp, pd_grp, lambda grp: grp.size())
eval_general(md_grp, pd_grp, lambda grp: grp.apply(lambda df: df.sum()))
eval_general(
md_grp, pd_grp, lambda grp: grp.apply(lambda df: df.sum(), include_groups=False)
)
eval_general(md_grp, pd_grp, lambda grp: grp.first())


Expand Down Expand Up @@ -3277,9 +3285,12 @@ def test_groupby_apply_series_result(modify_config):
)
df["group"] = [1, 1, 2, 2, 3]

# res = df.groupby('group').apply(lambda x: x.name+2)
eval_general(
df, df._to_pandas(), lambda df: df.groupby("group").apply(lambda x: x.name + 2)
df,
df._to_pandas(),
lambda df: df.groupby("group").apply(
lambda x: x.name + 2, include_groups=False
),
)


Expand Down
2 changes: 1 addition & 1 deletion requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
## required dependencies
pandas>=2.2,<2.3
pandas>=2.2,<2.4
numpy>=1.22.4
fsspec>=2022.11.0
packaging>=21.0
Expand Down
2 changes: 1 addition & 1 deletion requirements/env_unidist_linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ dependencies:
- pip

# required dependencies
- pandas>=2.2,<2.3
- pandas>=2.2,<2.4
- numpy>=1.22.4
- unidist-mpi>=0.2.1
- mpich
Expand Down
2 changes: 1 addition & 1 deletion requirements/env_unidist_win.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ dependencies:
- pip

# required dependencies
- pandas>=2.2,<2.3
- pandas>=2.2,<2.4
- numpy>=1.22.4
- unidist-mpi>=0.2.1
- msmpi
Expand Down
2 changes: 1 addition & 1 deletion requirements/requirements-no-engine.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ dependencies:
- pip

# required dependencies
- pandas>=2.2,<2.3
- pandas>=2.2,<2.4
- numpy>=1.22.4
- fsspec>=2022.11.0
- packaging>=21.0
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def make_distribution(self):
long_description=long_description,
long_description_content_type="text/markdown",
install_requires=[
"pandas>=2.2,<2.3",
"pandas>=2.2,<2.4",
"packaging>=21.0",
"numpy>=1.22.4",
"fsspec>=2022.11.0",
Expand Down
Loading