2
2
import os
3
3
from functools import partial
4
4
from itertools import groupby
5
- from typing import TYPE_CHECKING , Callable , Iterator , List , Optional , Tuple , TypeVar , Union
5
+ from typing import TYPE_CHECKING , Any , Callable , Iterator , List , Optional , Tuple , TypeVar , Union
6
6
7
7
import numpy as np
8
8
import pyarrow as pa
@@ -1307,7 +1307,7 @@ def __init__(self, table: pa.Table, blocks: List[List[TableBlock]]):
1307
1307
if not isinstance (subtable , TableBlock ):
1308
1308
raise TypeError (
1309
1309
"The blocks of a ConcatenationTable must be InMemoryTable or MemoryMappedTable objects"
1310
- f", but got { subtable } ."
1310
+ f", but got { _short_str ( subtable ) } ."
1311
1311
)
1312
1312
1313
1313
def __getstate__ (self ):
@@ -1837,6 +1837,13 @@ def _storage_type(type: pa.DataType) -> pa.DataType:
1837
1837
return type
1838
1838
1839
1839
1840
+ def _short_str (value : Any ) -> str :
1841
+ out = str (value )
1842
+ if len (out ) > 3000 :
1843
+ out = out [:1500 ] + "\n ...\n " + out [- 1500 :]
1844
+ return out
1845
+
1846
+
1840
1847
@_wrap_for_chunked_arrays
1841
1848
def array_cast (
1842
1849
array : pa .Array , pa_type : pa .DataType , allow_primitive_to_str : bool = True , allow_decimal_to_str : bool = True
@@ -1943,18 +1950,18 @@ def array_cast(
1943
1950
if pa .types .is_string (pa_type ):
1944
1951
if not allow_primitive_to_str and pa .types .is_primitive (array .type ):
1945
1952
raise TypeError (
1946
- f"Couldn't cast array of type { array .type } to { pa_type } "
1953
+ f"Couldn't cast array of type { _short_str ( array .type ) } to { _short_str ( pa_type ) } "
1947
1954
f"since allow_primitive_to_str is set to { allow_primitive_to_str } "
1948
1955
)
1949
1956
if not allow_decimal_to_str and pa .types .is_decimal (array .type ):
1950
1957
raise TypeError (
1951
- f"Couldn't cast array of type { array .type } to { pa_type } "
1958
+ f"Couldn't cast array of type { _short_str ( array .type ) } to { _short_str ( pa_type ) } "
1952
1959
f"and allow_decimal_to_str is set to { allow_decimal_to_str } "
1953
1960
)
1954
1961
if pa .types .is_null (pa_type ) and not pa .types .is_null (array .type ):
1955
- raise TypeError (f"Couldn't cast array of type { array .type } to { pa_type } " )
1962
+ raise TypeError (f"Couldn't cast array of type { _short_str ( array .type ) } to { _short_str ( pa_type ) } " )
1956
1963
return array .cast (pa_type )
1957
- raise TypeError (f"Couldn't cast array of type\n { array .type } \n to \n { pa_type } " )
1964
+ raise TypeError (f"Couldn't cast array of type { _short_str ( array .type ) } to { _short_str ( pa_type ) } " )
1958
1965
1959
1966
1960
1967
@_wrap_for_chunked_arrays
@@ -2112,7 +2119,7 @@ def cast_array_to_feature(
2112
2119
allow_primitive_to_str = allow_primitive_to_str ,
2113
2120
allow_decimal_to_str = allow_decimal_to_str ,
2114
2121
)
2115
- raise TypeError (f"Couldn't cast array of type\n { array .type } \n to\n { feature } " )
2122
+ raise TypeError (f"Couldn't cast array of type\n { _short_str ( array .type ) } \n to\n { _short_str ( feature ) } " )
2116
2123
2117
2124
2118
2125
@_wrap_for_chunked_arrays
@@ -2180,7 +2187,7 @@ def embed_array_storage(array: pa.Array, feature: "FeatureType"):
2180
2187
return pa .FixedSizeListArray .from_arrays (embedded_array_values , feature .length , mask = array .is_null ())
2181
2188
if not isinstance (feature , (Sequence , dict , list , tuple )):
2182
2189
return array
2183
- raise TypeError (f"Couldn't embed array of type\n { array .type } \n with\n { feature } " )
2190
+ raise TypeError (f"Couldn't embed array of type\n { _short_str ( array .type ) } \n with\n { _short_str ( feature ) } " )
2184
2191
2185
2192
2186
2193
class CastError (ValueError ):
@@ -2201,11 +2208,11 @@ def details(self):
2201
2208
new_columns = set (self .table_column_names ) - set (self .requested_column_names )
2202
2209
missing_columns = set (self .requested_column_names ) - set (self .table_column_names )
2203
2210
if new_columns and missing_columns :
2204
- return f"there are { len (new_columns )} new columns ({ ', ' . join (new_columns )} ) and { len (missing_columns )} missing columns ({ ', ' . join (missing_columns )} )."
2211
+ return f"there are { len (new_columns )} new columns ({ _short_str (new_columns )} ) and { len (missing_columns )} missing columns ({ _short_str (missing_columns )} )."
2205
2212
elif new_columns :
2206
- return f"there are { len (new_columns )} new columns ({ new_columns } )"
2213
+ return f"there are { len (new_columns )} new columns ({ _short_str ( new_columns ) } )"
2207
2214
else :
2208
- return f"there are { len (missing_columns )} missing columns ({ missing_columns } )"
2215
+ return f"there are { len (missing_columns )} missing columns ({ _short_str ( missing_columns ) } )"
2209
2216
2210
2217
2211
2218
def cast_table_to_features (table : pa .Table , features : "Features" ):
@@ -2222,7 +2229,7 @@ def cast_table_to_features(table: pa.Table, features: "Features"):
2222
2229
"""
2223
2230
if sorted (table .column_names ) != sorted (features ):
2224
2231
raise CastError (
2225
- f"Couldn't cast\n { table .schema } \n to\n { features } \n because column names don't match" ,
2232
+ f"Couldn't cast\n { _short_str ( table .schema ) } \n to\n { _short_str ( features ) } \n because column names don't match" ,
2226
2233
table_column_names = table .column_names ,
2227
2234
requested_column_names = list (features ),
2228
2235
)
@@ -2247,7 +2254,7 @@ def cast_table_to_schema(table: pa.Table, schema: pa.Schema):
2247
2254
features = Features .from_arrow_schema (schema )
2248
2255
if sorted (table .column_names ) != sorted (features ):
2249
2256
raise CastError (
2250
- f"Couldn't cast\n { table .schema } \n to\n { features } \n because column names don't match" ,
2257
+ f"Couldn't cast\n { _short_str ( table .schema ) } \n to\n { _short_str ( features ) } \n because column names don't match" ,
2251
2258
table_column_names = table .column_names ,
2252
2259
requested_column_names = list (features ),
2253
2260
)
0 commit comments