Skip to content

Commit 3638183

Browse files
lhoestqalbertvillanova
authored andcommitted
Shorten long logs (#6875)
1 parent bb2664c commit 3638183

File tree

1 file changed

+20
-13
lines changed

1 file changed

+20
-13
lines changed

src/datasets/table.py

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import os
33
from functools import partial
44
from itertools import groupby
5-
from typing import TYPE_CHECKING, Callable, Iterator, List, Optional, Tuple, TypeVar, Union
5+
from typing import TYPE_CHECKING, Any, Callable, Iterator, List, Optional, Tuple, TypeVar, Union
66

77
import numpy as np
88
import pyarrow as pa
@@ -1307,7 +1307,7 @@ def __init__(self, table: pa.Table, blocks: List[List[TableBlock]]):
13071307
if not isinstance(subtable, TableBlock):
13081308
raise TypeError(
13091309
"The blocks of a ConcatenationTable must be InMemoryTable or MemoryMappedTable objects"
1310-
f", but got {subtable}."
1310+
f", but got {_short_str(subtable)}."
13111311
)
13121312

13131313
def __getstate__(self):
@@ -1837,6 +1837,13 @@ def _storage_type(type: pa.DataType) -> pa.DataType:
18371837
return type
18381838

18391839

1840+
def _short_str(value: Any) -> str:
1841+
out = str(value)
1842+
if len(out) > 3000:
1843+
out = out[:1500] + "\n...\n" + out[-1500:]
1844+
return out
1845+
1846+
18401847
@_wrap_for_chunked_arrays
18411848
def array_cast(
18421849
array: pa.Array, pa_type: pa.DataType, allow_primitive_to_str: bool = True, allow_decimal_to_str: bool = True
@@ -1943,18 +1950,18 @@ def array_cast(
19431950
if pa.types.is_string(pa_type):
19441951
if not allow_primitive_to_str and pa.types.is_primitive(array.type):
19451952
raise TypeError(
1946-
f"Couldn't cast array of type {array.type} to {pa_type} "
1953+
f"Couldn't cast array of type {_short_str(array.type)} to {_short_str(pa_type)} "
19471954
f"since allow_primitive_to_str is set to {allow_primitive_to_str} "
19481955
)
19491956
if not allow_decimal_to_str and pa.types.is_decimal(array.type):
19501957
raise TypeError(
1951-
f"Couldn't cast array of type {array.type} to {pa_type} "
1958+
f"Couldn't cast array of type {_short_str(array.type)} to {_short_str(pa_type)} "
19521959
f"and allow_decimal_to_str is set to {allow_decimal_to_str}"
19531960
)
19541961
if pa.types.is_null(pa_type) and not pa.types.is_null(array.type):
1955-
raise TypeError(f"Couldn't cast array of type {array.type} to {pa_type}")
1962+
raise TypeError(f"Couldn't cast array of type {_short_str(array.type)} to {_short_str(pa_type)}")
19561963
return array.cast(pa_type)
1957-
raise TypeError(f"Couldn't cast array of type\n{array.type}\nto\n{pa_type}")
1964+
raise TypeError(f"Couldn't cast array of type {_short_str(array.type)} to {_short_str(pa_type)}")
19581965

19591966

19601967
@_wrap_for_chunked_arrays
@@ -2112,7 +2119,7 @@ def cast_array_to_feature(
21122119
allow_primitive_to_str=allow_primitive_to_str,
21132120
allow_decimal_to_str=allow_decimal_to_str,
21142121
)
2115-
raise TypeError(f"Couldn't cast array of type\n{array.type}\nto\n{feature}")
2122+
raise TypeError(f"Couldn't cast array of type\n{_short_str(array.type)}\nto\n{_short_str(feature)}")
21162123

21172124

21182125
@_wrap_for_chunked_arrays
@@ -2180,7 +2187,7 @@ def embed_array_storage(array: pa.Array, feature: "FeatureType"):
21802187
return pa.FixedSizeListArray.from_arrays(embedded_array_values, feature.length, mask=array.is_null())
21812188
if not isinstance(feature, (Sequence, dict, list, tuple)):
21822189
return array
2183-
raise TypeError(f"Couldn't embed array of type\n{array.type}\nwith\n{feature}")
2190+
raise TypeError(f"Couldn't embed array of type\n{_short_str(array.type)}\nwith\n{_short_str(feature)}")
21842191

21852192

21862193
class CastError(ValueError):
@@ -2201,11 +2208,11 @@ def details(self):
22012208
new_columns = set(self.table_column_names) - set(self.requested_column_names)
22022209
missing_columns = set(self.requested_column_names) - set(self.table_column_names)
22032210
if new_columns and missing_columns:
2204-
return f"there are {len(new_columns)} new columns ({', '.join(new_columns)}) and {len(missing_columns)} missing columns ({', '.join(missing_columns)})."
2211+
return f"there are {len(new_columns)} new columns ({_short_str(new_columns)}) and {len(missing_columns)} missing columns ({_short_str(missing_columns)})."
22052212
elif new_columns:
2206-
return f"there are {len(new_columns)} new columns ({new_columns})"
2213+
return f"there are {len(new_columns)} new columns ({_short_str(new_columns)})"
22072214
else:
2208-
return f"there are {len(missing_columns)} missing columns ({missing_columns})"
2215+
return f"there are {len(missing_columns)} missing columns ({_short_str(missing_columns)})"
22092216

22102217

22112218
def cast_table_to_features(table: pa.Table, features: "Features"):
@@ -2222,7 +2229,7 @@ def cast_table_to_features(table: pa.Table, features: "Features"):
22222229
"""
22232230
if sorted(table.column_names) != sorted(features):
22242231
raise CastError(
2225-
f"Couldn't cast\n{table.schema}\nto\n{features}\nbecause column names don't match",
2232+
f"Couldn't cast\n{_short_str(table.schema)}\nto\n{_short_str(features)}\nbecause column names don't match",
22262233
table_column_names=table.column_names,
22272234
requested_column_names=list(features),
22282235
)
@@ -2247,7 +2254,7 @@ def cast_table_to_schema(table: pa.Table, schema: pa.Schema):
22472254
features = Features.from_arrow_schema(schema)
22482255
if sorted(table.column_names) != sorted(features):
22492256
raise CastError(
2250-
f"Couldn't cast\n{table.schema}\nto\n{features}\nbecause column names don't match",
2257+
f"Couldn't cast\n{_short_str(table.schema)}\nto\n{_short_str(features)}\nbecause column names don't match",
22512258
table_column_names=table.column_names,
22522259
requested_column_names=list(features),
22532260
)

0 commit comments

Comments
 (0)