@@ -258,10 +258,9 @@ func cleanAndCopyLibrary(state *config.LibrarianState, repoDir, libraryID, outpu
258
258
}
259
259
}
260
260
261
- preservePatterns := append (library .PreserveRegex ,
262
- globalPreservePatterns ... )
261
+ preservePatterns := append (library .PreserveRegex , globalPreservePatterns ... )
263
262
264
- if err := clean (repoDir , removePatterns , preservePatterns ); err != nil {
263
+ if err := clean (repoDir , library . SourceRoots , removePatterns , preservePatterns ); err != nil {
265
264
return fmt .Errorf ("failed to clean library, %s: %w" , library .ID , err )
266
265
}
267
266
@@ -445,38 +444,67 @@ func copyFile(dst, src string) (err error) {
445
444
return err
446
445
}
447
446
448
- // clean removes files and directories from a root directory based on remove and preserve patterns.
447
+ // clean removes files and directories from source roots based on remove and preserve patterns.
448
+ // Limit the possible files when cleaning to those in source roots (not rootDir) as regex patterns
449
+ // for preserve and remove should *only* impact source root files.
449
450
//
450
451
// It first determines the paths to remove by applying the removePatterns and then excluding any paths
451
452
// that match the preservePatterns. It then separates the remaining paths into files and directories and
452
453
// removes them, ensuring that directories are removed last.
453
454
//
454
455
// This logic is ported from owlbot logic: https://github.com/googleapis/repo-automation-bots/blob/12dad68640960290910b660e4325630c9ace494b/packages/owl-bot/src/copy-code.ts#L1027
455
- func clean (rootDir string , removePatterns , preservePatterns []string ) error {
456
- slog .Info ("cleaning directory" , "path" , rootDir )
457
- finalPathsToRemove , err := deriveFinalPathsToRemove (rootDir , removePatterns , preservePatterns )
456
+ func clean (rootDir string , sourceRoots , removePatterns , preservePatterns []string ) error {
457
+ slog .Info ("cleaning directories" , "source roots" , sourceRoots )
458
+
459
+ // paths contains a list of relative paths and not absolute paths from rootDir
460
+ // because regex patterns apply to source root relative path
461
+ var paths []string
462
+ for _ , sourceRoot := range sourceRoots {
463
+ sourceRootAbsPath := filepath .Join (rootDir , sourceRoot )
464
+ sourceRootPaths , err := findAllDirRelPaths (rootDir , sourceRootAbsPath )
465
+ if err != nil {
466
+ slog .Warn ("unable to find files to clean for source root" , rootDir , sourceRoot )
467
+ return err
468
+ }
469
+ if len (sourceRootPaths ) == 0 {
470
+ slog .Warn ("unable to find any files to clean source root" , "source root" , sourceRoot )
471
+ break
472
+ }
473
+ paths = append (paths , sourceRootPaths ... )
474
+ }
475
+
476
+ pathsToRemove , err := filterPathsForRemoval (paths , removePatterns , preservePatterns )
458
477
if err != nil {
459
478
return err
460
479
}
461
480
462
- filesToRemove , dirsToRemove , err := separateFilesAndDirs (rootDir , finalPathsToRemove )
481
+ // prepend the repoDir to each path to ensure that os.Remove can find the file
482
+ var absPaths []string
483
+ for _ , path := range pathsToRemove {
484
+ absPaths = append (absPaths , filepath .Join (rootDir , path ))
485
+ }
486
+
487
+ filesToRemove , dirsToRemove , err := separateFilesAndDirs (absPaths )
463
488
if err != nil {
464
489
return err
465
490
}
466
491
467
492
// Remove files first, then directories.
468
493
for _ , file := range filesToRemove {
469
494
slog .Info ("removing file" , "path" , file )
470
- if err := os .Remove (filepath . Join ( rootDir , file ) ); err != nil {
495
+ if err := os .Remove (file ); err != nil {
471
496
return err
472
497
}
473
498
}
474
499
475
- sortDirsByDepth (dirsToRemove )
500
+ // Sort to remove the child directories first
501
+ slices .SortFunc (dirsToRemove , func (a , b string ) int {
502
+ return strings .Count (b , string (filepath .Separator )) - strings .Count (a , string (filepath .Separator ))
503
+ })
476
504
477
505
for _ , dir := range dirsToRemove {
478
506
slog .Info ("removing directory" , "path" , dir )
479
- if err := os .Remove (filepath . Join ( rootDir , dir ) ); err != nil {
507
+ if err := os .Remove (dir ); err != nil {
480
508
// It's possible the directory is not empty due to preserved files.
481
509
slog .Warn ("failed to remove directory, it may not be empty" , "dir" , dir , "err" , err )
482
510
}
@@ -485,35 +513,49 @@ func clean(rootDir string, removePatterns, preservePatterns []string) error {
485
513
return nil
486
514
}
487
515
488
- // sortDirsByDepth sorts directories by depth (descending) to remove children first.
489
- func sortDirsByDepth (dirs []string ) {
490
- slices .SortFunc (dirs , func (a , b string ) int {
491
- return strings .Count (b , string (filepath .Separator )) - strings .Count (a , string (filepath .Separator ))
492
- })
493
- }
516
+ // findAllDirRelPaths walks the subDir tree returns a slice of all file and directory paths
517
+ // relative to the dir. This is repeated for all nested directories. subDir must be under
518
+ // or the same as dir.
519
+ func findAllDirRelPaths (dir , subDir string ) ([]string , error ) {
520
+ dirRelPath , err := filepath .Rel (dir , subDir )
521
+ if err != nil {
522
+ return nil , fmt .Errorf ("cannot establish the relationship between %s and %s: %w" , dir , subDir , err )
523
+ }
524
+ // '..' signifies that the subDir exists outside of dir
525
+ if strings .HasPrefix (dirRelPath , ".." ) {
526
+ return nil , fmt .Errorf ("subDir %s is not nested within the dir %s" , subDir , dir )
527
+ }
528
+ // Rel() doesn't account for symlinks and the potential for symlinks to point
529
+ // to a non-existent location. Use Lstat() to confirm that the directory exists
530
+ if _ , err := os .Lstat (subDir ); err != nil {
531
+ // If the subDir does not exist, there are no rel paths
532
+ if os .IsNotExist (err ) {
533
+ return []string {}, nil
534
+ }
535
+ // For any other error (permissions, I/O, etc.)
536
+ return nil , fmt .Errorf ("failed to stat path %q: %w" , subDir , err )
537
+ }
494
538
495
- // allPaths walks the directory tree rooted at rootDir and returns a slice of all
496
- // file and directory paths, relative to rootDir.
497
- func allPaths (rootDir string ) ([]string , error ) {
498
- var paths []string
499
- err := filepath .WalkDir (rootDir , func (path string , d fs.DirEntry , err error ) error {
539
+ paths := []string {}
540
+ err = filepath .WalkDir (subDir , func (path string , d fs.DirEntry , err error ) error {
500
541
if err != nil {
501
542
return err
502
543
}
503
- relPath , err := filepath .Rel (rootDir , path )
504
- if err != nil {
505
- return err
544
+ // error is ignored as we have confirmed that subDir is child or equal to rootDir
545
+ relPath , _ := filepath .Rel (dir , path )
546
+ // Special case when subDir is equal to dir. Drop the "." as it references itself
547
+ if relPath != "." {
548
+ paths = append (paths , relPath )
506
549
}
507
- paths = append (paths , relPath )
508
550
return nil
509
551
})
510
552
return paths , err
511
553
}
512
554
513
- // filterPaths returns a new slice containing only the paths from the input slice
555
+ // filterPathsByRegex returns a new slice containing only the paths from the input slice
514
556
// that match at least one of the provided regular expressions.
515
- func filterPaths (paths []string , regexps []* regexp.Regexp ) []string {
516
- var filtered []string
557
+ func filterPathsByRegex (paths []string , regexps []* regexp.Regexp ) []string {
558
+ filtered := []string {}
517
559
for _ , path := range paths {
518
560
for _ , re := range regexps {
519
561
if re .MatchString (path ) {
@@ -525,10 +567,26 @@ func filterPaths(paths []string, regexps []*regexp.Regexp) []string {
525
567
return filtered
526
568
}
527
569
528
- // deriveFinalPathsToRemove determines the final set of paths to be removed. It
529
- // starts with all paths under rootDir, filters them based on removePatterns,
530
- // and then excludes any paths that match preservePatterns.
531
- func deriveFinalPathsToRemove (rootDir string , removePatterns , preservePatterns []string ) ([]string , error ) {
570
+ // compileRegexps takes a slice of string patterns and compiles each one into a
571
+ // regular expression. It returns a slice of compiled regexps or an error if any
572
+ // pattern is invalid.
573
+ func compileRegexps (patterns []string ) ([]* regexp.Regexp , error ) {
574
+ var regexps []* regexp.Regexp
575
+ for _ , pattern := range patterns {
576
+ re , err := regexp .Compile (pattern )
577
+ if err != nil {
578
+ return nil , fmt .Errorf ("invalid regex %q: %w" , pattern , err )
579
+ }
580
+ regexps = append (regexps , re )
581
+ }
582
+ return regexps , nil
583
+ }
584
+
585
+ // filterPathsForRemoval determines the list of paths to be removed. The logic runs as follows:
586
+ // 1. paths that match any removePatterns are marked for removal
587
+ // 2. paths that match the preservePatterns are kept (even if they match removePatterns)
588
+ // Paths that match both are kept as preserve has overrides
589
+ func filterPathsForRemoval (paths , removePatterns , preservePatterns []string ) ([]string , error ) {
532
590
removeRegexps , err := compileRegexps (removePatterns )
533
591
if err != nil {
534
592
return nil , err
@@ -538,61 +596,42 @@ func deriveFinalPathsToRemove(rootDir string, removePatterns, preservePatterns [
538
596
return nil , err
539
597
}
540
598
541
- allPaths , err := allPaths (rootDir )
542
- if err != nil {
543
- return nil , err
544
- }
545
-
546
- pathsToRemove := filterPaths (allPaths , removeRegexps )
547
- pathsToPreserve := filterPaths (pathsToRemove , preserveRegexps )
599
+ pathsToRemove := filterPathsByRegex (paths , removeRegexps )
600
+ pathsToPreserve := filterPathsByRegex (pathsToRemove , preserveRegexps )
548
601
549
- // delete pathsToPreserve from pathsToRemove.
550
- pathsToDelete := make (map [string ]bool )
602
+ // map for a quick lookup for any preserve paths
603
+ preserveMap := make (map [string ]bool )
551
604
for _ , p := range pathsToPreserve {
552
- pathsToDelete [p ] = true
605
+ preserveMap [p ] = true
553
606
}
554
607
finalPathsToRemove := slices .DeleteFunc (pathsToRemove , func (path string ) bool {
555
- return pathsToDelete [path ]
608
+ return preserveMap [path ]
556
609
})
557
610
return finalPathsToRemove , nil
558
611
}
559
612
560
613
// separateFilesAndDirs takes a list of paths and categorizes them into files
561
614
// and directories. It uses os.Lstat to avoid following symlinks, treating them
562
615
// as files. Paths that do not exist are silently ignored.
563
- func separateFilesAndDirs (rootDir string , paths []string ) ([]string , []string , error ) {
564
- var files , dirs []string
616
+ func separateFilesAndDirs (paths []string ) ([]string , []string , error ) {
617
+ var filePaths , dirPaths []string
565
618
for _ , path := range paths {
566
- info , err := os .Lstat (filepath . Join ( rootDir , path ) )
619
+ info , err := os .Lstat (path )
567
620
if err != nil {
621
+ // The file or directory may have already been removed.
568
622
if errors .Is (err , os .ErrNotExist ) {
569
- // The file or directory may have already been removed.
623
+ slog . Warn ( "unable to find path" , "path" , path )
570
624
continue
571
625
}
572
626
// For any other error (permissions, I/O, etc.)
573
627
return nil , nil , fmt .Errorf ("failed to stat path %q: %w" , path , err )
574
628
575
629
}
576
630
if info .IsDir () {
577
- dirs = append (dirs , path )
631
+ dirPaths = append (dirPaths , path )
578
632
} else {
579
- files = append (files , path )
633
+ filePaths = append (filePaths , path )
580
634
}
581
635
}
582
- return files , dirs , nil
583
- }
584
-
585
- // compileRegexps takes a slice of string patterns and compiles each one into a
586
- // regular expression. It returns a slice of compiled regexps or an error if any
587
- // pattern is invalid.
588
- func compileRegexps (patterns []string ) ([]* regexp.Regexp , error ) {
589
- var regexps []* regexp.Regexp
590
- for _ , pattern := range patterns {
591
- re , err := regexp .Compile (pattern )
592
- if err != nil {
593
- return nil , fmt .Errorf ("invalid regex %q: %w" , pattern , err )
594
- }
595
- regexps = append (regexps , re )
596
- }
597
- return regexps , nil
636
+ return filePaths , dirPaths , nil
598
637
}
0 commit comments