29
29
DATAFRAME_FRAMEWORK = 'pandas'
30
30
31
31
32
- class FileProcessor ( object ) :
32
+ class FileProcessor :
33
33
@abstractmethod
34
34
def format (self ):
35
35
pass
@@ -139,11 +139,10 @@ def __init__(self, sep=',', encoding: str = 'utf-8'):
139
139
def format (self ):
140
140
return TextFormat (encoding = self ._encoding )
141
141
142
- def load (self , file ):
143
- ...
142
+ def load (self , file ): ...
143
+
144
+ def dump (self , obj , file ): ...
144
145
145
- def dump (self , obj , file ):
146
- ...
147
146
148
147
class PolarsCsvFileProcessor (CsvFileProcessor ):
149
148
def load (self , file ):
@@ -191,11 +190,9 @@ def __init__(self, orient: str | None = None):
191
190
def format (self ):
192
191
return luigi .format .Nop
193
192
194
- def load (self , file ):
195
- ...
193
+ def load (self , file ): ...
196
194
197
- def dump (self , obj , file ):
198
- ...
195
+ def dump (self , obj , file ): ...
199
196
200
197
201
198
class PolarsJsonFileProcessor (JsonFileProcessor ):
@@ -215,7 +212,7 @@ def dump(self, obj, file):
215
212
obj = pl .from_dict (obj )
216
213
217
214
if self ._orient == 'records' :
218
- obj_write_ndjson (file )
215
+ obj . write_ndjson (file )
219
216
else :
220
217
obj .write_json (file )
221
218
@@ -272,11 +269,10 @@ def __init__(self, engine='pyarrow', compression=None):
272
269
def format (self ):
273
270
return luigi .format .Nop
274
271
275
- def load (self , file ):
276
- ...
272
+ def load (self , file ): ...
273
+
274
+ def dump (self , obj , file ): ...
277
275
278
- def dump (self , obj , file ):
279
- ...
280
276
281
277
class PolarsParquetFileProcessor (ParquetFileProcessor ):
282
278
def load (self , file ):
@@ -314,20 +310,17 @@ def __init__(self, store_index_in_feather: bool):
314
310
def format (self ):
315
311
return luigi .format .Nop
316
312
317
- def load (self , file ):
318
- ...
313
+ def load (self , file ): ...
319
314
320
- def dump (self , obj , file ):
321
- ...
315
+ def dump (self , obj , file ): ...
322
316
323
317
324
318
class PolarsFeatherFileProcessor (FeatherFileProcessor ):
325
319
def load (self , file ):
326
320
# Since polars' DataFrame doesn't have index, just load feather file
327
321
if ObjectStorage .is_buffered_reader (file ):
328
- loaded_df = pl .read_ipc (file .name )
329
- else :
330
- loaded_df = pl .read_ipc (BytesIO (file .read ()))
322
+ return pl .read_ipc (file .name )
323
+ return pl .read_ipc (BytesIO (file .read ()))
331
324
332
325
def dump (self , obj , file ):
333
326
assert isinstance (obj , (pl .DataFrame )), f'requires pl.DataFrame, but { type (obj )} is passed.'
@@ -388,6 +381,7 @@ def dump(self, obj, file):
388
381
ParquetFileProcessor = PandasParquetFileProcessor
389
382
FeatherFileProcessor = PandasFeatherFileProcessor
390
383
384
+
391
385
def make_file_processor (file_path : str , store_index_in_feather : bool ) -> FileProcessor :
392
386
extension2processor = {
393
387
'.txt' : TextFileProcessor (),
0 commit comments