-
Notifications
You must be signed in to change notification settings - Fork 665
Description
Modin version checks
-
I have checked that this issue has not already been reported.
-
I have confirmed this bug exists on the latest released version of Modin.
-
I have confirmed this bug exists on the main branch of Modin. (In order to do this you can follow this guide.)
Reproducible Example
In [1]: import modin.pandas as pd
In [2]: df = pd.DataFrame({'a': [0]})
In [3]: df['b'] = 'foobar'
In [4]: df['c'] = '2020-01-01'
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[4], line 1
----> 1 df['c'] = '2020-01-01'
File ~/polars-api-compat-dev/.venv/lib/python3.11/site-packages/modin/logging/logger_decorator.py:144, in enable_logging.<locals>.decorator.<locals>.run_and_log(*args, **kwargs)
129 """
130 Compute function with logging if Modin logging is enabled.
131
(...)
141 Any
142 """
143 if LogMode.get() == "disable":
--> 144 return obj(*args, **kwargs)
146 logger = get_logger()
147 logger.log(log_level, start_line)
File ~/polars-api-compat-dev/.venv/lib/python3.11/site-packages/modin/pandas/dataframe.py:2683, in DataFrame.__setitem__(self, key, value)
2681 return
2682 # Do new column assignment after error checks and possible value modifications
-> 2683 self.insert(loc=len(self.columns), column=key, value=value)
2684 return
2686 if not hashable(key):
File ~/polars-api-compat-dev/.venv/lib/python3.11/site-packages/modin/logging/logger_decorator.py:144, in enable_logging.<locals>.decorator.<locals>.run_and_log(*args, **kwargs)
129 """
130 Compute function with logging if Modin logging is enabled.
131
(...)
141 Any
142 """
143 if LogMode.get() == "disable":
--> 144 return obj(*args, **kwargs)
146 logger = get_logger()
147 logger.log(log_level, start_line)
File ~/polars-api-compat-dev/.venv/lib/python3.11/site-packages/modin/pandas/dataframe.py:1130, in DataFrame.insert(self, loc, column, value, allow_duplicates)
1128 if isinstance(value, (Series, array)):
1129 value = value._query_compiler
-> 1130 new_query_compiler = self._query_compiler.insert(loc, column, value)
1132 self._update_inplace(new_query_compiler=new_query_compiler)
File ~/polars-api-compat-dev/.venv/lib/python3.11/site-packages/modin/logging/logger_decorator.py:144, in enable_logging.<locals>.decorator.<locals>.run_and_log(*args, **kwargs)
129 """
130 Compute function with logging if Modin logging is enabled.
131
(...)
141 Any
142 """
143 if LogMode.get() == "disable":
--> 144 return obj(*args, **kwargs)
146 logger = get_logger()
147 logger.log(log_level, start_line)
File ~/polars-api-compat-dev/.venv/lib/python3.11/site-packages/modin/core/storage_formats/pandas/query_compiler.py:3271, in PandasQueryCompiler.insert(self, loc, column, value)
3268 df.insert(internal_idx, column, value)
3269 return df
-> 3271 value_dtype = extract_dtype(value)
3272 new_columns = self.columns.insert(loc, column)
3273 new_dtypes = ModinDtypes.concat(
3274 [
3275 self._modin_frame._dtypes,
(...)
3279 new_columns
3280 ) # get dtypes in a proper order
File ~/polars-api-compat-dev/.venv/lib/python3.11/site-packages/modin/core/dataframe/pandas/metadata/dtypes.py:1227, in extract_dtype(value)
1215 """
1216 Extract dtype(s) from the passed `value`.
1217
(...)
1224 DtypeObj or pandas.Series of DtypeObj
1225 """
1226 try:
-> 1227 dtype = pandas.api.types.pandas_dtype(value)
1228 except TypeError:
1229 dtype = pandas.Series(value).dtype
File ~/polars-api-compat-dev/.venv/lib/python3.11/site-packages/pandas/core/dtypes/common.py:1645, in pandas_dtype(dtype)
1640 with warnings.catch_warnings():
1641 # GH#51523 - Series.astype(np.integer) doesn't show
1642 # numpy deprecation warning of np.integer
1643 # Hence enabling DeprecationWarning
1644 warnings.simplefilter("always", DeprecationWarning)
-> 1645 npdtype = np.dtype(dtype)
1646 except SyntaxError as err:
1647 # np.dtype uses `eval` which can raise SyntaxError
1648 raise TypeError(f"data type '{dtype}' not understood") from err
File ~/polars-api-compat-dev/.venv/lib/python3.11/site-packages/numpy/_core/_internal.py:170, in _commastring(astr)
168 mo = sep_re.match(astr, pos=startindex)
169 if not mo:
--> 170 raise ValueError(
171 'format number %d of "%s" is not recognized' %
172 (len(result)+1, astr))
173 startindex = mo.end()
174 islist = True
ValueError: format number 1 of "2020-01-01" is not recognized
Issue Description
it raises unnecessarily
Expected Behavior
create a new column with '2020-01-01'
as value
Error Logs
Replace this line with the error backtrace (if applicable).
Installed Versions
In [5]: pd.show_versions()
UserWarning: Setuptools is replacing distutils. Support for replacing an already imported distutils is deprecated. In the future, this condition will fail. Register concerns at https://github.com/pypa/setuptools/issues/new?template=distutils-deprecation.yml
INSTALLED VERSIONS
commit : c8bbca8
python : 3.11.9.final.0
python-bits : 64
OS : Linux
OS-release : 5.15.153.1-microsoft-standard-WSL2
Version : #1 SMP Fri Mar 29 23:14:13 UTC 2024
machine : x86_64
processor : x86_64
byteorder : little
LC_ALL : None
LANG : C.UTF-8
LOCALE : en_US.UTF-8
Modin dependencies
modin : 0.31.0
ray : None
dask : 2024.8.0
distributed : 2024.8.0
pandas dependencies
pandas : 2.2.2
numpy : 2.0.1
pytz : 2024.1
dateutil : 2.9.0.post0
setuptools : 72.1.0
pip : 24.2
Cython : None
pytest : 8.3.2
hypothesis : 6.111.0
sphinx : None
blosc : None
feather : None
xlsxwriter : None
lxml.etree : None
html5lib : None
pymysql : None
psycopg2 : None
jinja2 : 3.1.4
IPython : 8.26.0
pandas_datareader : None
adbc-driver-postgresql: None
adbc-driver-sqlite : None
bs4 : None
bottleneck : None
dataframe-api-compat : None
fastparquet : None
fsspec : 2024.6.1
gcsfs : None
matplotlib : None
numba : None
numexpr : None
odfpy : None
openpyxl : None
pandas_gbq : None
pyarrow : 17.0.0
pyreadstat : None
python-calamine : None
pyxlsb : None
s3fs : None
scipy : 1.14.0
sqlalchemy : None
tables : None
tabulate : None
xarray : None
xlrd : None
zstandard : None
tzdata : 2024.1
qtpy : None
pyqt5 : None