28
28
import org .apache .hudi .common .model .WriteOperationType ;
29
29
import org .apache .hudi .common .table .HoodieTableMetaClient ;
30
30
import org .apache .hudi .common .table .HoodieTableVersion ;
31
- import org .apache .hudi .common .table .timeline .CompletionTimeQueryView ;
32
31
import org .apache .hudi .common .table .timeline .HoodieInstant ;
33
32
import org .apache .hudi .common .table .timeline .HoodieTimeline ;
34
33
import org .apache .hudi .common .util .Option ;
@@ -122,11 +121,10 @@ public List<HoodieRollbackRequest> getRollbackRequests(HoodieInstant instantToRo
122
121
return context .flatMap (partitionPaths , partitionPath -> {
123
122
List <HoodieRollbackRequest > hoodieRollbackRequests = new ArrayList <>(partitionPaths .size ());
124
123
125
- Supplier <List <StoragePathInfo >> filesToDelete = () -> {
124
+ Supplier <List <StoragePath >> filesToDelete = () -> {
126
125
try {
127
126
return fetchFilesFromInstant (instantToRollback , partitionPath , metaClient .getBasePath ().toString (), baseFileExtension ,
128
- metaClient .getStorage (),
129
- commitMetadataOptional , isCommitMetadataCompleted , tableType );
127
+ metaClient .getStorage (), commitMetadataOptional , isCommitMetadataCompleted , tableType , metaClient .getTableConfig ().getTableVersion ());
130
128
} catch (IOException e ) {
131
129
throw new HoodieIOException ("Fetching files to delete error" , e );
132
130
}
@@ -164,11 +162,10 @@ public List<HoodieRollbackRequest> getRollbackRequests(HoodieInstant instantToRo
164
162
} else {
165
163
// if this is part of a restore operation, we should rollback/delete entire file slice.
166
164
// For table version 6, the files can be directly fetched from the instant to rollback
167
- // For table version 8, the files are computed based on completion time. All files completed after
168
- // the requested time of instant to rollback are included
169
- hoodieRollbackRequests .addAll (getHoodieRollbackRequests (partitionPath , isTableVersionLessThanEight ? filesToDelete .get () :
170
- listAllFilesSinceCommit (instantToRollback .requestedTime (), baseFileExtension , partitionPath ,
171
- metaClient )));
165
+ // For table version 8, the log files are not directly associated with the base file.
166
+ // The rollback will iterate in reverse order based on completion time so the log files completed
167
+ // after the compaction will already be queued for removal and therefore, only the files from the compaction commit must be deleted.
168
+ hoodieRollbackRequests .addAll (getHoodieRollbackRequests (partitionPath , filesToDelete .get ()));
172
169
}
173
170
break ;
174
171
case HoodieTimeline .DELTA_COMMIT_ACTION :
@@ -280,32 +277,11 @@ public static List<HoodieRollbackRequest> getRollbackRequestToAppendForVersionSi
280
277
return hoodieRollbackRequests ;
281
278
}
282
279
283
- private List <StoragePathInfo > listAllFilesSinceCommit (String commit ,
284
- String baseFileExtension ,
285
- String partitionPath ,
286
- HoodieTableMetaClient metaClient ) throws IOException {
287
- LOG .info ("Collecting files to be cleaned/rolledback up for path " + partitionPath + " and commit " + commit );
288
- CompletionTimeQueryView completionTimeQueryView = metaClient .getTimelineLayout ().getTimelineFactory ().createCompletionTimeQueryView (metaClient );
289
- StoragePathFilter filter = (path ) -> {
290
- if (path .toString ().contains (baseFileExtension )) {
291
- String fileCommitTime = FSUtils .getCommitTime (path .getName ());
292
- return compareTimestamps (commit , LESSER_THAN_OR_EQUALS ,
293
- fileCommitTime );
294
- } else if (FSUtils .isLogFile (path )) {
295
- String fileCommitTime = FSUtils .getDeltaCommitTimeFromLogPath (path );
296
- return completionTimeQueryView .isSlicedAfterOrOn (commit , fileCommitTime );
297
- }
298
- return false ;
299
- };
300
- return metaClient .getStorage ()
301
- .listDirectEntries (FSUtils .constructAbsolutePath (config .getBasePath (), partitionPath ), filter );
302
- }
303
-
304
280
@ NotNull
305
- private List <HoodieRollbackRequest > getHoodieRollbackRequests (String partitionPath , List <StoragePathInfo > filesToDeletedStatus ) {
281
+ private List <HoodieRollbackRequest > getHoodieRollbackRequests (String partitionPath , List <StoragePath > filesToDeletedStatus ) {
306
282
return filesToDeletedStatus .stream ()
307
283
.map (pathInfo -> {
308
- String dataFileToBeDeleted = pathInfo .getPath (). toString ();
284
+ String dataFileToBeDeleted = pathInfo .toString ();
309
285
return formatDeletePath (dataFileToBeDeleted );
310
286
})
311
287
.map (s -> new HoodieRollbackRequest (partitionPath , EMPTY_STRING , EMPTY_STRING , Collections .singletonList (s ), Collections .emptyMap ()))
@@ -317,56 +293,45 @@ private static String formatDeletePath(String path) {
317
293
return path .substring (path .indexOf (":" ) + 1 );
318
294
}
319
295
320
- private List <StoragePathInfo > listBaseFilesToBeDeleted (String commit ,
321
- String basefileExtension ,
322
- String partitionPath ,
323
- HoodieStorage storage ) throws IOException {
324
- LOG .info ("Collecting files to be cleaned/rolledback up for path " + partitionPath + " and commit " + commit );
296
+ private List <StoragePath > listBaseFilesToBeDeleted (String commit ,
297
+ String basefileExtension ,
298
+ String partitionPath ,
299
+ HoodieStorage storage ) throws IOException {
300
+ LOG .info ("Collecting files to be cleaned/rolledback up for path {} and commit {}" , partitionPath , commit );
325
301
StoragePathFilter filter = (path ) -> {
326
302
if (path .toString ().contains (basefileExtension )) {
327
303
String fileCommitTime = FSUtils .getCommitTime (path .getName ());
328
304
return commit .equals (fileCommitTime );
329
305
}
330
306
return false ;
331
307
};
332
- return storage .listDirectEntries (FSUtils .constructAbsolutePath (config .getBasePath (), partitionPath ), filter );
308
+ return storage .listDirectEntries (FSUtils .constructAbsolutePath (config .getBasePath (), partitionPath ), filter ). stream (). map ( StoragePathInfo :: getPath ). collect ( Collectors . toList ()) ;
333
309
}
334
310
335
- private List <StoragePathInfo > fetchFilesFromInstant (HoodieInstant instantToRollback ,
336
- String partitionPath , String basePath ,
337
- String baseFileExtension , HoodieStorage storage ,
338
- Option <HoodieCommitMetadata > commitMetadataOptional ,
339
- Boolean isCommitMetadataCompleted ,
340
- HoodieTableType tableType ) throws IOException {
341
- // go w/ commit metadata only for COW table. for MOR, we need to get associated log files when commit corresponding to base file is rolledback.
342
- if ( isCommitMetadataCompleted && tableType == HoodieTableType . COPY_ON_WRITE ) {
343
- return fetchFilesFromCommitMetadata ( instantToRollback , partitionPath , basePath , commitMetadataOptional . get (),
344
- baseFileExtension , storage );
311
+ private List <StoragePath > fetchFilesFromInstant (HoodieInstant instantToRollback ,
312
+ String partitionPath , String basePath ,
313
+ String baseFileExtension , HoodieStorage storage ,
314
+ Option <HoodieCommitMetadata > commitMetadataOptional ,
315
+ boolean isCommitMetadataCompleted ,
316
+ HoodieTableType tableType ,
317
+ HoodieTableVersion tableVersion ) throws IOException {
318
+ // for MOR tables with version < 8, listing is required to fetch the log files associated with base files added by this commit.
319
+ if ( isCommitMetadataCompleted && ( tableType == HoodieTableType . COPY_ON_WRITE || tableVersion . greaterThanOrEquals ( HoodieTableVersion . EIGHT ))) {
320
+ return fetchFilesFromCommitMetadata ( instantToRollback , partitionPath , basePath , commitMetadataOptional . get (), baseFileExtension );
345
321
} else {
346
322
return fetchFilesFromListFiles (instantToRollback , partitionPath , basePath , baseFileExtension , storage );
347
323
}
348
324
}
349
325
350
- private List <StoragePathInfo > fetchFilesFromCommitMetadata (HoodieInstant instantToRollback ,
351
- String partitionPath ,
352
- String basePath ,
353
- HoodieCommitMetadata commitMetadata ,
354
- String baseFileExtension ,
355
- HoodieStorage storage ) throws IOException {
326
+ private List <StoragePath > fetchFilesFromCommitMetadata (HoodieInstant instantToRollback ,
327
+ String partitionPath ,
328
+ String basePath ,
329
+ HoodieCommitMetadata commitMetadata ,
330
+ String baseFileExtension ) {
356
331
StoragePathFilter pathFilter = getPathFilter (baseFileExtension ,
357
332
instantToRollback .requestedTime ());
358
- List <StoragePath > filePaths = getFilesFromCommitMetadata (basePath , commitMetadata , partitionPath )
359
- .filter (entry -> {
360
- try {
361
- return storage .exists (entry );
362
- } catch (Exception e ) {
363
- LOG .error ("Exists check failed for " + entry .toString (), e );
364
- }
365
- // if any Exception is thrown, do not ignore. let's try to add the file of interest to be deleted. we can't miss any files to be rolled back.
366
- return true ;
367
- }).collect (Collectors .toList ());
368
-
369
- return storage .listDirectEntries (filePaths , pathFilter );
333
+ return getFilesFromCommitMetadata (basePath , commitMetadata , partitionPath )
334
+ .filter (pathFilter ::accept ).collect (Collectors .toList ());
370
335
}
371
336
372
337
/**
@@ -379,15 +344,15 @@ private List<StoragePathInfo> fetchFilesFromCommitMetadata(HoodieInstant instant
379
344
* @return
380
345
* @throws IOException
381
346
*/
382
- private List <StoragePathInfo > fetchFilesFromListFiles (HoodieInstant instantToRollback ,
383
- String partitionPath ,
384
- String basePath ,
385
- String baseFileExtension ,
386
- HoodieStorage storage ) throws IOException {
347
+ private List <StoragePath > fetchFilesFromListFiles (HoodieInstant instantToRollback ,
348
+ String partitionPath ,
349
+ String basePath ,
350
+ String baseFileExtension ,
351
+ HoodieStorage storage ) throws IOException {
387
352
StoragePathFilter pathFilter = getPathFilter (baseFileExtension , instantToRollback .requestedTime ());
388
353
List <StoragePath > filePaths = listFilesToBeDeleted (basePath , partitionPath );
389
354
390
- return storage .listDirectEntries (filePaths , pathFilter );
355
+ return storage .listDirectEntries (filePaths , pathFilter ). stream (). map ( StoragePathInfo :: getPath ). collect ( Collectors . toList ()) ;
391
356
}
392
357
393
358
private Boolean checkCommitMetadataCompleted (HoodieInstant instantToRollback ,
0 commit comments