@@ -832,6 +832,25 @@ void send_mountsources(int sockfd, pid_t child, char *mountsources, size_t mount
832832 bail ("failed to close container mount namespace fd %d" , container_mntns_fd );
833833}
834834
835+ void try_unshare (int flags , const char * msg )
836+ {
837+ write_log (DEBUG , "unshare %s" , msg );
838+ /*
839+ * Kernels prior to v4.3 may return EINVAL on unshare when another process
840+ * reads runc's /proc/$PID/status or /proc/$PID/maps. To work around this,
841+ * retry on EINVAL a few times.
842+ */
843+ int retries = 5 ;
844+ for (; retries > 0 ; retries -- ) {
845+ if (unshare (flags ) == 0 ) {
846+ return ;
847+ }
848+ if (errno != EINVAL )
849+ break ;
850+ }
851+ bail ("failed to unshare %s" , msg );
852+ }
853+
835854void nsexec (void )
836855{
837856 int pipenum ;
@@ -1170,9 +1189,7 @@ void nsexec(void)
11701189 * problem.
11711190 */
11721191 if (config .cloneflags & CLONE_NEWUSER ) {
1173- write_log (DEBUG , "unshare user namespace" );
1174- if (unshare (CLONE_NEWUSER ) < 0 )
1175- bail ("failed to unshare user namespace" );
1192+ try_unshare (CLONE_NEWUSER , "user namespace" );
11761193 config .cloneflags &= ~CLONE_NEWUSER ;
11771194
11781195 /*
@@ -1224,9 +1241,7 @@ void nsexec(void)
12241241 * some old kernel versions where clone(CLONE_PARENT | CLONE_NEWPID)
12251242 * was broken, so we'll just do it the long way anyway.
12261243 */
1227- write_log (DEBUG , "unshare remaining namespace (except cgroupns)" );
1228- if (unshare (config .cloneflags & ~CLONE_NEWCGROUP ) < 0 )
1229- bail ("failed to unshare remaining namespaces (except cgroupns)" );
1244+ try_unshare (config .cloneflags & ~CLONE_NEWCGROUP , "remaining namespaces (except cgroupns)" );
12301245
12311246 /* Ask our parent to send the mount sources fds. */
12321247 if (config .mountsources ) {
@@ -1344,8 +1359,7 @@ void nsexec(void)
13441359 }
13451360
13461361 if (config .cloneflags & CLONE_NEWCGROUP ) {
1347- if (unshare (CLONE_NEWCGROUP ) < 0 )
1348- bail ("failed to unshare cgroup namespace" );
1362+ try_unshare (CLONE_NEWCGROUP , "cgroup namespace" );
13491363 }
13501364
13511365 write_log (DEBUG , "signal completion to stage-0" );
0 commit comments