11from unittest import TestCase
2+ from unittest .mock import patch
3+
4+ import numpy as np
5+ import pandas as pd
26
37from nlp .arrow_dataset import Dataset
4- from nlp .features import Features , Sequence , Value
8+ from nlp .features import Features , Sequence , Value , _cast_to_python_objects , cast_to_python_objects
9+
10+ from .utils import require_tf , require_torch
511
612
713class FeaturesTest (TestCase ):
@@ -24,3 +30,66 @@ def test_from_arrow_schema_with_sequence(self):
2430 self .assertEqual (original_features .type , new_features .type )
2531 self .assertDictEqual (dset [0 ], new_dset [0 ])
2632 self .assertDictEqual (dset [:], new_dset [:])
33+
34+ def test_cast_to_python_objects_list (self ):
35+ obj = {"col_1" : [{"vec" : [1 , 2 , 3 ], "txt" : "foo" }] * 3 , "col_2" : [[1 , 2 ], [3 , 4 ], [5 , 6 ]]}
36+ expected_obj = {"col_1" : [{"vec" : [1 , 2 , 3 ], "txt" : "foo" }] * 3 , "col_2" : [[1 , 2 ], [3 , 4 ], [5 , 6 ]]}
37+ casted_obj = cast_to_python_objects (obj )
38+ self .assertDictEqual (casted_obj , expected_obj )
39+
40+ def test_cast_to_python_objects_tuple (self ):
41+ obj = {"col_1" : [{"vec" : (1 , 2 , 3 ), "txt" : "foo" }] * 3 , "col_2" : [(1 , 2 ), (3 , 4 ), (5 , 6 )]}
42+ expected_obj = {"col_1" : [{"vec" : [1 , 2 , 3 ], "txt" : "foo" }] * 3 , "col_2" : [[1 , 2 ], [3 , 4 ], [5 , 6 ]]}
43+ casted_obj = cast_to_python_objects (obj )
44+ self .assertDictEqual (casted_obj , expected_obj )
45+
46+ def test_cast_to_python_objects_numpy (self ):
47+ obj = {"col_1" : [{"vec" : np .arange (1 , 4 ), "txt" : "foo" }] * 3 , "col_2" : np .arange (1 , 7 ).reshape (3 , 2 )}
48+ expected_obj = {"col_1" : [{"vec" : [1 , 2 , 3 ], "txt" : "foo" }] * 3 , "col_2" : [[1 , 2 ], [3 , 4 ], [5 , 6 ]]}
49+ casted_obj = cast_to_python_objects (obj )
50+ self .assertDictEqual (casted_obj , expected_obj )
51+
52+ def test_cast_to_python_objects_series (self ):
53+ obj = {
54+ "col_1" : pd .Series ([{"vec" : [1 , 2 , 3 ], "txt" : "foo" }] * 3 ),
55+ "col_2" : pd .Series ([[1 , 2 ], [3 , 4 ], [5 , 6 ]]),
56+ }
57+ expected_obj = {"col_1" : [{"vec" : [1 , 2 , 3 ], "txt" : "foo" }] * 3 , "col_2" : [[1 , 2 ], [3 , 4 ], [5 , 6 ]]}
58+ casted_obj = cast_to_python_objects (obj )
59+ self .assertDictEqual (casted_obj , expected_obj )
60+
61+ def test_cast_to_python_objects_dataframe (self ):
62+ obj = pd .DataFrame ({"col_1" : [{"vec" : [1 , 2 , 3 ], "txt" : "foo" }] * 3 , "col_2" : [[1 , 2 ], [3 , 4 ], [5 , 6 ]]})
63+ expected_obj = {"col_1" : [{"vec" : [1 , 2 , 3 ], "txt" : "foo" }] * 3 , "col_2" : [[1 , 2 ], [3 , 4 ], [5 , 6 ]]}
64+ casted_obj = cast_to_python_objects (obj )
65+ self .assertDictEqual (casted_obj , expected_obj )
66+
67+ @require_torch
68+ def test_cast_to_python_objects_torch (self ):
69+ import torch
70+
71+ obj = {
72+ "col_1" : [{"vec" : torch .Tensor (np .arange (1 , 4 )), "txt" : "foo" }] * 3 ,
73+ "col_2" : torch .Tensor (np .arange (1 , 7 ).reshape (3 , 2 )),
74+ }
75+ expected_obj = {"col_1" : [{"vec" : [1 , 2 , 3 ], "txt" : "foo" }] * 3 , "col_2" : [[1 , 2 ], [3 , 4 ], [5 , 6 ]]}
76+ casted_obj = cast_to_python_objects (obj )
77+ self .assertDictEqual (casted_obj , expected_obj )
78+
79+ @require_tf
80+ def test_cast_to_python_objects_tf (self ):
81+ import tensorflow as tf
82+
83+ obj = {
84+ "col_1" : [{"vec" : tf .constant (np .arange (1 , 4 )), "txt" : "foo" }] * 3 ,
85+ "col_2" : tf .constant (np .arange (1 , 7 ).reshape (3 , 2 )),
86+ }
87+ expected_obj = {"col_1" : [{"vec" : [1 , 2 , 3 ], "txt" : "foo" }] * 3 , "col_2" : [[1 , 2 ], [3 , 4 ], [5 , 6 ]]}
88+ casted_obj = cast_to_python_objects (obj )
89+ self .assertDictEqual (casted_obj , expected_obj )
90+
91+ @patch ("nlp.features._cast_to_python_objects" , side_effect = _cast_to_python_objects )
92+ def test_dont_iterate_over_each_element_in_a_list (self , mocked_cast ):
93+ obj = {"col_1" : [[1 , 2 ], [3 , 4 ], [5 , 6 ]]}
94+ cast_to_python_objects (obj )
95+ self .assertEqual (mocked_cast .call_count , 4 ) # 4 = depth of obj
0 commit comments