Skip to content

Commit 63c2908

Browse files
committed
rootfs: try to scope MkdirAll to stay inside the rootfs
While we use SecureJoin to try to make all of our target paths inside the container safe, SecureJoin is not safe against an attacker than can change the path after we "resolve" it. os.MkdirAll can inadvertently follow symlinks and thus an attacker could end up tricking runc into creating empty directories on the host (note that the container doesn't get access to these directories, and the host just sees empty directories). However, this could potentially cause DoS issues by (for instance) creating a directory in a conf.d directory for a daemon that doesn't handle subdirectories properly. In addition, the handling for creating file bind-mounts did a plain open(O_CREAT) on the SecureJoin'd path, which is even more obviously unsafe (luckily we didn't use O_TRUNC, or this bug could've allowed an attacker to cause data loss...). Regardless of the symlink issue, opening an untrusted file could result in a DoS if the file is a hung tty or some other "nasty" file. We can use mknodat to safely create a regular file without opening anything anyway (O_CREAT|O_EXCL would also work but it makes the logic a bit more complicated, and we don't want to open the file for any particular reason anyway). libpathrs[1] is the long-term solution for these kinds of problems, but for now we can patch this particular issue by creating a more restricted MkdirAll that refuses to resolve symlinks and does the creation using file descriptors. This is loosely based on a more secure version that filepath-securejoin now has[2] and will be added to libpathrs soon[3]. [1]: https://github.com/openSUSE/libpathrs [2]: https://github.com/cyphar/filepath-securejoin/releases/tag/v0.3.0 [3]: cyphar/libpathrs#10 Fixes: CVE-2024-45310 Signed-off-by: Aleksa Sarai <[email protected]>
1 parent 346b818 commit 63c2908

File tree

3 files changed

+174
-10
lines changed

3 files changed

+174
-10
lines changed

libcontainer/rootfs_linux.go

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -313,7 +313,7 @@ func mountCgroupV1(m *configs.Mount, c *mountConfig) error {
313313
// inside the tmpfs, so we don't want to resolve symlinks).
314314
subsystemPath := filepath.Join(c.root, b.Destination)
315315
subsystemName := filepath.Base(b.Destination)
316-
if err := os.MkdirAll(subsystemPath, 0o755); err != nil {
316+
if err := utils.MkdirAllInRoot(c.root, subsystemPath, 0o755); err != nil {
317317
return err
318318
}
319319
if err := utils.WithProcfd(c.root, b.Destination, func(dstFd string) error {
@@ -505,15 +505,26 @@ func createMountpoint(rootfs string, m mountEntry) (string, error) {
505505
return "", fmt.Errorf("%w: file bind mount over rootfs", errRootfsToFile)
506506
}
507507
// Make the parent directory.
508-
if err := os.MkdirAll(filepath.Dir(dest), 0o755); err != nil {
508+
destDir, destBase := filepath.Split(dest)
509+
destDirFd, err := utils.MkdirAllInRootOpen(rootfs, destDir, 0o755)
510+
if err != nil {
509511
return "", fmt.Errorf("make parent dir of file bind-mount: %w", err)
510512
}
511-
// Make the target file.
512-
f, err := os.OpenFile(dest, os.O_CREATE, 0o755)
513-
if err != nil {
514-
return "", fmt.Errorf("create target of file bind-mount: %w", err)
513+
defer destDirFd.Close()
514+
// Make the target file. We want to avoid opening any file that is
515+
// already there because it could be a "bad" file like an invalid
516+
// device or hung tty that might cause a DoS, so we use mknodat.
517+
// destBase does not contain any "/" components, and mknodat does
518+
// not follow trailing symlinks, so we can safely just call mknodat
519+
// here.
520+
if err := unix.Mknodat(int(destDirFd.Fd()), destBase, unix.S_IFREG|0o644, 0); err != nil {
521+
// If we get EEXIST, there was already an inode there and
522+
// we can consider that a success.
523+
if !errors.Is(err, unix.EEXIST) {
524+
err = &os.PathError{Op: "mknod regular file", Path: dest, Err: err}
525+
return "", fmt.Errorf("create target of file bind-mount: %w", err)
526+
}
515527
}
516-
_ = f.Close()
517528
// Nothing left to do.
518529
return dest, nil
519530
}
@@ -532,7 +543,7 @@ func createMountpoint(rootfs string, m mountEntry) (string, error) {
532543
}
533544
}
534545

535-
if err := os.MkdirAll(dest, 0o755); err != nil {
546+
if err := utils.MkdirAllInRoot(rootfs, dest, 0o755); err != nil {
536547
return "", err
537548
}
538549
return dest, nil
@@ -565,7 +576,7 @@ func mountToRootfs(c *mountConfig, m mountEntry) error {
565576
} else if !fi.IsDir() {
566577
return fmt.Errorf("filesystem %q must be mounted on ordinary directory", m.Device)
567578
}
568-
if err := os.MkdirAll(dest, 0o755); err != nil {
579+
if err := utils.MkdirAllInRoot(rootfs, dest, 0o755); err != nil {
569580
return err
570581
}
571582
// Selinux kernels do not support labeling of /proc or /sys.
@@ -928,7 +939,7 @@ func createDeviceNode(rootfs string, node *devices.Device, bind bool) error {
928939
if dest == rootfs {
929940
return fmt.Errorf("%w: mknod over rootfs", errRootfsToFile)
930941
}
931-
if err := os.MkdirAll(filepath.Dir(dest), 0o755); err != nil {
942+
if err := utils.MkdirAllInRoot(rootfs, filepath.Dir(dest), 0o755); err != nil {
932943
return err
933944
}
934945
if bind {

libcontainer/system/linux.go

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@ import (
66
"fmt"
77
"io"
88
"os"
9+
"runtime"
910
"strconv"
11+
"strings"
1012
"syscall"
1113
"unsafe"
1214

@@ -214,3 +216,42 @@ func SetLinuxPersonality(personality int) error {
214216
}
215217
return nil
216218
}
219+
220+
func prepareAt(dir *os.File, path string) (int, string) {
221+
if dir == nil {
222+
return unix.AT_FDCWD, path
223+
}
224+
225+
// Rather than just filepath.Join-ing path here, do it manually so the
226+
// error and handle correctly indicate cases like path=".." as being
227+
// relative to the correct directory. The handle.Name() might end up being
228+
// wrong but because this is (currently) only used in MkdirAllInRoot, that
229+
// isn't a problem.
230+
dirName := dir.Name()
231+
if !strings.HasSuffix(dirName, "/") {
232+
dirName += "/"
233+
}
234+
fullPath := dirName + path
235+
236+
return int(dir.Fd()), fullPath
237+
}
238+
239+
func Openat(dir *os.File, path string, flags int, mode uint32) (*os.File, error) {
240+
dirFd, fullPath := prepareAt(dir, path)
241+
fd, err := unix.Openat(dirFd, path, flags, mode)
242+
if err != nil {
243+
return nil, &os.PathError{Op: "openat", Path: fullPath, Err: err}
244+
}
245+
runtime.KeepAlive(dir)
246+
return os.NewFile(uintptr(fd), fullPath), nil
247+
}
248+
249+
func Mkdirat(dir *os.File, path string, mode uint32) error {
250+
dirFd, fullPath := prepareAt(dir, path)
251+
err := unix.Mkdirat(dirFd, path, mode)
252+
if err != nil {
253+
err = &os.PathError{Op: "mkdirat", Path: fullPath, Err: err}
254+
}
255+
runtime.KeepAlive(dir)
256+
return err
257+
}

libcontainer/utils/utils_unix.go

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
package utils
44

55
import (
6+
"errors"
67
"fmt"
78
"math"
89
"os"
@@ -13,6 +14,8 @@ import (
1314
"sync"
1415
_ "unsafe" // for go:linkname
1516

17+
"github.com/opencontainers/runc/libcontainer/system"
18+
1619
securejoin "github.com/cyphar/filepath-securejoin"
1720
"github.com/sirupsen/logrus"
1821
"golang.org/x/sys/unix"
@@ -275,3 +278,112 @@ func IsLexicallyInRoot(root, path string) bool {
275278
}
276279
return strings.HasPrefix(path, root)
277280
}
281+
282+
// MkdirAllInRootOpen attempts to make
283+
//
284+
// path, _ := securejoin.SecureJoin(root, unsafePath)
285+
// os.MkdirAll(path, mode)
286+
// os.Open(path)
287+
//
288+
// safer against attacks where components in the path are changed between
289+
// SecureJoin returning and MkdirAll (or Open) being called. In particular, we
290+
// try to detect any symlink components in the path while we are doing the
291+
// MkdirAll.
292+
//
293+
// NOTE: Unlike os.MkdirAll, mode is not Go's os.FileMode, it is the unix mode
294+
// (the suid/sgid/sticky bits are not the same as for os.FileMode).
295+
//
296+
// NOTE: If unsafePath is a subpath of root, we assume that you have already
297+
// called SecureJoin and so we use the provided path verbatim without resolving
298+
// any symlinks (this is done in a way that avoids symlink-exchange races).
299+
// This means that the path also must not contain ".." elements, otherwise an
300+
// error will occur.
301+
//
302+
// This is a somewhat less safe alternative to
303+
// <https://github.com/cyphar/filepath-securejoin/pull/13>, but it should
304+
// detect attempts to trick us into creating directories outside of the root.
305+
// We should migrate to securejoin.MkdirAll once it is merged.
306+
func MkdirAllInRootOpen(root, unsafePath string, mode uint32) (_ *os.File, Err error) {
307+
// If the path is already "within" the root, use it verbatim.
308+
fullPath := unsafePath
309+
if !IsLexicallyInRoot(root, unsafePath) {
310+
var err error
311+
fullPath, err = securejoin.SecureJoin(root, unsafePath)
312+
if err != nil {
313+
return nil, err
314+
}
315+
}
316+
subPath, err := filepath.Rel(root, fullPath)
317+
if err != nil {
318+
return nil, err
319+
}
320+
321+
// Check for any silly mode bits.
322+
if mode&^0o7777 != 0 {
323+
return nil, fmt.Errorf("tried to include non-mode bits in MkdirAll mode: 0o%.3o", mode)
324+
}
325+
326+
currentDir, err := os.OpenFile(root, unix.O_DIRECTORY|unix.O_CLOEXEC, 0)
327+
if err != nil {
328+
return nil, fmt.Errorf("open root handle: %w", err)
329+
}
330+
defer func() {
331+
if Err != nil {
332+
currentDir.Close()
333+
}
334+
}()
335+
336+
for _, part := range strings.Split(subPath, string(filepath.Separator)) {
337+
switch part {
338+
case "", ".":
339+
// Skip over no-op components.
340+
continue
341+
case "..":
342+
return nil, fmt.Errorf("possible breakout detected: found %q component in SecureJoin subpath %s", part, subPath)
343+
}
344+
345+
nextDir, err := system.Openat(currentDir, part, unix.O_DIRECTORY|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0)
346+
switch {
347+
case err == nil:
348+
// Update the currentDir.
349+
_ = currentDir.Close()
350+
currentDir = nextDir
351+
352+
case errors.Is(err, unix.ENOTDIR):
353+
// This might be a symlink or some other random file. Either way,
354+
// error out.
355+
return nil, fmt.Errorf("cannot mkdir in %s/%s: %w", currentDir.Name(), part, unix.ENOTDIR)
356+
357+
case errors.Is(err, os.ErrNotExist):
358+
// Luckily, mkdirat will not follow trailing symlinks, so this is
359+
// safe to do as-is.
360+
if err := system.Mkdirat(currentDir, part, mode); err != nil {
361+
return nil, err
362+
}
363+
// Open the new directory. There is a race here where an attacker
364+
// could swap the directory with a different directory, but
365+
// MkdirAll's fuzzy semantics mean we don't care about that.
366+
nextDir, err := system.Openat(currentDir, part, unix.O_DIRECTORY|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0)
367+
if err != nil {
368+
return nil, fmt.Errorf("open newly created directory: %w", err)
369+
}
370+
// Update the currentDir.
371+
_ = currentDir.Close()
372+
currentDir = nextDir
373+
374+
default:
375+
return nil, err
376+
}
377+
}
378+
return currentDir, nil
379+
}
380+
381+
// MkdirAllInRoot is a wrapper around MkdirAllInRootOpen which closes the
382+
// returned handle, for callers that don't need to use it.
383+
func MkdirAllInRoot(root, unsafePath string, mode uint32) error {
384+
f, err := MkdirAllInRootOpen(root, unsafePath, mode)
385+
if err == nil {
386+
_ = f.Close()
387+
}
388+
return err
389+
}

0 commit comments

Comments
 (0)