Skip to content

Commit b1d7164

Browse files
committed
fix: preserve and remove files from sourcesRoot
1 parent a00e91b commit b1d7164

File tree

3 files changed

+328
-195
lines changed

3 files changed

+328
-195
lines changed

internal/librarian/command.go

Lines changed: 105 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -258,10 +258,9 @@ func cleanAndCopyLibrary(state *config.LibrarianState, repoDir, libraryID, outpu
258258
}
259259
}
260260

261-
preservePatterns := append(library.PreserveRegex,
262-
globalPreservePatterns...)
261+
preservePatterns := append(library.PreserveRegex, globalPreservePatterns...)
263262

264-
if err := clean(repoDir, removePatterns, preservePatterns); err != nil {
263+
if err := clean(repoDir, library.SourceRoots, removePatterns, preservePatterns); err != nil {
265264
return fmt.Errorf("failed to clean library, %s: %w", library.ID, err)
266265
}
267266

@@ -445,38 +444,67 @@ func copyFile(dst, src string) (err error) {
445444
return err
446445
}
447446

448-
// clean removes files and directories from a root directory based on remove and preserve patterns.
447+
// clean removes files and directories from source roots based on remove and preserve patterns.
448+
// Limit the possible files when cleaning to those in source roots (not rootDir) as regex patterns
449+
// for preserve and remove should *only* impact source root files.
449450
//
450451
// It first determines the paths to remove by applying the removePatterns and then excluding any paths
451452
// that match the preservePatterns. It then separates the remaining paths into files and directories and
452453
// removes them, ensuring that directories are removed last.
453454
//
454455
// This logic is ported from owlbot logic: https://github.com/googleapis/repo-automation-bots/blob/12dad68640960290910b660e4325630c9ace494b/packages/owl-bot/src/copy-code.ts#L1027
455-
func clean(rootDir string, removePatterns, preservePatterns []string) error {
456-
slog.Info("cleaning directory", "path", rootDir)
457-
finalPathsToRemove, err := deriveFinalPathsToRemove(rootDir, removePatterns, preservePatterns)
456+
func clean(rootDir string, sourceRoots, removePatterns, preservePatterns []string) error {
457+
slog.Info("cleaning directories", "source roots", sourceRoots)
458+
459+
// paths contains a list of relative paths and not absolute paths from rootDir
460+
// because regex patterns apply to source root relative path
461+
var paths []string
462+
for _, sourceRoot := range sourceRoots {
463+
sourceRootAbsPath := filepath.Join(rootDir, sourceRoot)
464+
sourceRootPaths, err := findAllDirRelPaths(rootDir, sourceRootAbsPath)
465+
if err != nil {
466+
slog.Warn("unable to find files to clean for source root", rootDir, sourceRoot)
467+
return err
468+
}
469+
if len(sourceRootPaths) == 0 {
470+
slog.Warn("unable to find any files to clean source root", "source root", sourceRoot)
471+
break
472+
}
473+
paths = append(paths, sourceRootPaths...)
474+
}
475+
476+
pathsToRemove, err := filterPathsForRemoval(paths, removePatterns, preservePatterns)
458477
if err != nil {
459478
return err
460479
}
461480

462-
filesToRemove, dirsToRemove, err := separateFilesAndDirs(rootDir, finalPathsToRemove)
481+
// prepend the repoDir to each path to ensure that os.Remove can find the file
482+
var absPaths []string
483+
for _, path := range pathsToRemove {
484+
absPaths = append(absPaths, filepath.Join(rootDir, path))
485+
}
486+
487+
filesToRemove, dirsToRemove, err := separateFilesAndDirs(absPaths)
463488
if err != nil {
464489
return err
465490
}
466491

467492
// Remove files first, then directories.
468493
for _, file := range filesToRemove {
469494
slog.Info("removing file", "path", file)
470-
if err := os.Remove(filepath.Join(rootDir, file)); err != nil {
495+
if err := os.Remove(file); err != nil {
471496
return err
472497
}
473498
}
474499

475-
sortDirsByDepth(dirsToRemove)
500+
// Sort to remove the child directories first
501+
slices.SortFunc(dirsToRemove, func(a, b string) int {
502+
return strings.Count(b, string(filepath.Separator)) - strings.Count(a, string(filepath.Separator))
503+
})
476504

477505
for _, dir := range dirsToRemove {
478506
slog.Info("removing directory", "path", dir)
479-
if err := os.Remove(filepath.Join(rootDir, dir)); err != nil {
507+
if err := os.Remove(dir); err != nil {
480508
// It's possible the directory is not empty due to preserved files.
481509
slog.Warn("failed to remove directory, it may not be empty", "dir", dir, "err", err)
482510
}
@@ -485,35 +513,49 @@ func clean(rootDir string, removePatterns, preservePatterns []string) error {
485513
return nil
486514
}
487515

488-
// sortDirsByDepth sorts directories by depth (descending) to remove children first.
489-
func sortDirsByDepth(dirs []string) {
490-
slices.SortFunc(dirs, func(a, b string) int {
491-
return strings.Count(b, string(filepath.Separator)) - strings.Count(a, string(filepath.Separator))
492-
})
493-
}
516+
// findAllDirRelPaths walks the subDir tree returns a slice of all file and directory paths
517+
// relative to the dir. This is repeated for all nested directories. subDir must be under
518+
// or the same as dir.
519+
func findAllDirRelPaths(dir, subDir string) ([]string, error) {
520+
dirRelPath, err := filepath.Rel(dir, subDir)
521+
if err != nil {
522+
return nil, fmt.Errorf("cannot establish the relationship between %s and %s: %w", dir, subDir, err)
523+
}
524+
// '..' signifies that the subDir exists outside of dir
525+
if strings.HasPrefix(dirRelPath, "..") {
526+
return nil, fmt.Errorf("subDir %s is not nested within the dir %s", subDir, dir)
527+
}
528+
// Rel() doesn't account for symlinks and the potential for symlinks to point
529+
// to a non-existent location. Use Lstat() to confirm that the directory exists
530+
if _, err := os.Lstat(subDir); err != nil {
531+
// If the subDir does not exist, there are no rel paths
532+
if os.IsNotExist(err) {
533+
return []string{}, nil
534+
}
535+
// For any other error (permissions, I/O, etc.)
536+
return nil, fmt.Errorf("failed to stat path %q: %w", subDir, err)
537+
}
494538

495-
// allPaths walks the directory tree rooted at rootDir and returns a slice of all
496-
// file and directory paths, relative to rootDir.
497-
func allPaths(rootDir string) ([]string, error) {
498-
var paths []string
499-
err := filepath.WalkDir(rootDir, func(path string, d fs.DirEntry, err error) error {
539+
paths := []string{}
540+
err = filepath.WalkDir(subDir, func(path string, d fs.DirEntry, err error) error {
500541
if err != nil {
501542
return err
502543
}
503-
relPath, err := filepath.Rel(rootDir, path)
504-
if err != nil {
505-
return err
544+
// error is ignored as we have confirmed that subDir is child or equal to rootDir
545+
relPath, _ := filepath.Rel(dir, path)
546+
// Special case when subDir is equal to dir. Drop the "." as it references itself
547+
if relPath != "." {
548+
paths = append(paths, relPath)
506549
}
507-
paths = append(paths, relPath)
508550
return nil
509551
})
510552
return paths, err
511553
}
512554

513-
// filterPaths returns a new slice containing only the paths from the input slice
555+
// filterPathsByRegex returns a new slice containing only the paths from the input slice
514556
// that match at least one of the provided regular expressions.
515-
func filterPaths(paths []string, regexps []*regexp.Regexp) []string {
516-
var filtered []string
557+
func filterPathsByRegex(paths []string, regexps []*regexp.Regexp) []string {
558+
filtered := []string{}
517559
for _, path := range paths {
518560
for _, re := range regexps {
519561
if re.MatchString(path) {
@@ -525,10 +567,26 @@ func filterPaths(paths []string, regexps []*regexp.Regexp) []string {
525567
return filtered
526568
}
527569

528-
// deriveFinalPathsToRemove determines the final set of paths to be removed. It
529-
// starts with all paths under rootDir, filters them based on removePatterns,
530-
// and then excludes any paths that match preservePatterns.
531-
func deriveFinalPathsToRemove(rootDir string, removePatterns, preservePatterns []string) ([]string, error) {
570+
// compileRegexps takes a slice of string patterns and compiles each one into a
571+
// regular expression. It returns a slice of compiled regexps or an error if any
572+
// pattern is invalid.
573+
func compileRegexps(patterns []string) ([]*regexp.Regexp, error) {
574+
var regexps []*regexp.Regexp
575+
for _, pattern := range patterns {
576+
re, err := regexp.Compile(pattern)
577+
if err != nil {
578+
return nil, fmt.Errorf("invalid regex %q: %w", pattern, err)
579+
}
580+
regexps = append(regexps, re)
581+
}
582+
return regexps, nil
583+
}
584+
585+
// filterPathsForRemoval determines the list of paths to be removed. The logic runs as follows:
586+
// 1. paths that match any removePatterns are marked for removal
587+
// 2. paths that match the preservePatterns are kept (even if they match removePatterns)
588+
// Paths that match both are kept as preserve has overrides
589+
func filterPathsForRemoval(paths, removePatterns, preservePatterns []string) ([]string, error) {
532590
removeRegexps, err := compileRegexps(removePatterns)
533591
if err != nil {
534592
return nil, err
@@ -538,61 +596,42 @@ func deriveFinalPathsToRemove(rootDir string, removePatterns, preservePatterns [
538596
return nil, err
539597
}
540598

541-
allPaths, err := allPaths(rootDir)
542-
if err != nil {
543-
return nil, err
544-
}
545-
546-
pathsToRemove := filterPaths(allPaths, removeRegexps)
547-
pathsToPreserve := filterPaths(pathsToRemove, preserveRegexps)
599+
pathsToRemove := filterPathsByRegex(paths, removeRegexps)
600+
pathsToPreserve := filterPathsByRegex(pathsToRemove, preserveRegexps)
548601

549-
// delete pathsToPreserve from pathsToRemove.
550-
pathsToDelete := make(map[string]bool)
602+
// map for a quick lookup for any preserve paths
603+
preserveMap := make(map[string]bool)
551604
for _, p := range pathsToPreserve {
552-
pathsToDelete[p] = true
605+
preserveMap[p] = true
553606
}
554607
finalPathsToRemove := slices.DeleteFunc(pathsToRemove, func(path string) bool {
555-
return pathsToDelete[path]
608+
return preserveMap[path]
556609
})
557610
return finalPathsToRemove, nil
558611
}
559612

560613
// separateFilesAndDirs takes a list of paths and categorizes them into files
561614
// and directories. It uses os.Lstat to avoid following symlinks, treating them
562615
// as files. Paths that do not exist are silently ignored.
563-
func separateFilesAndDirs(rootDir string, paths []string) ([]string, []string, error) {
564-
var files, dirs []string
616+
func separateFilesAndDirs(paths []string) ([]string, []string, error) {
617+
var filePaths, dirPaths []string
565618
for _, path := range paths {
566-
info, err := os.Lstat(filepath.Join(rootDir, path))
619+
info, err := os.Lstat(path)
567620
if err != nil {
621+
// The file or directory may have already been removed.
568622
if errors.Is(err, os.ErrNotExist) {
569-
// The file or directory may have already been removed.
623+
slog.Warn("unable to find path", "path", path)
570624
continue
571625
}
572626
// For any other error (permissions, I/O, etc.)
573627
return nil, nil, fmt.Errorf("failed to stat path %q: %w", path, err)
574628

575629
}
576630
if info.IsDir() {
577-
dirs = append(dirs, path)
631+
dirPaths = append(dirPaths, path)
578632
} else {
579-
files = append(files, path)
633+
filePaths = append(filePaths, path)
580634
}
581635
}
582-
return files, dirs, nil
583-
}
584-
585-
// compileRegexps takes a slice of string patterns and compiles each one into a
586-
// regular expression. It returns a slice of compiled regexps or an error if any
587-
// pattern is invalid.
588-
func compileRegexps(patterns []string) ([]*regexp.Regexp, error) {
589-
var regexps []*regexp.Regexp
590-
for _, pattern := range patterns {
591-
re, err := regexp.Compile(pattern)
592-
if err != nil {
593-
return nil, fmt.Errorf("invalid regex %q: %w", pattern, err)
594-
}
595-
regexps = append(regexps, re)
596-
}
597-
return regexps, nil
636+
return filePaths, dirPaths, nil
598637
}

0 commit comments

Comments
 (0)