--
John O'Meara
(typed on a cell phone; please excuse auto correct errors)
On Sat, Jul 15, 2017, 2:48 PM Jesse Young <jlyo_at_jlyo.org> wrote:
> This patch modifies s6-supervise to use the Linux specific clone()
> system call to enable the child process to become the pid 1 of a new
> pid namespace. To enable it, compile with -DWANT_CLONE_NEWPID and make
> the ./clone-newpid file readable to s6-supervise in the desired service
> directories.
>
> I ask that this be included in s6-supervise.c because doing
> unshare(CLONE_NEWPID) in the child process doesn't change the process's
> pid to 1. Rather, it runs the next spawned child as pid 1. After
> spawning that first process, the parent is prevented from spawning any
> future children, subsequent attempts will fail with ENOMEM. Changing
> s6-supervise to use clone() avoids these limitations as well as avoiding
> extending the supervision chain, which would make exit/signal proxying
> necessary.
>
> To see correct ps output, /proc needs to be remounted. To avoid
> conflicts with the parent pid namespace's /proc, this is done in a new
> mount namespace.
>
> For example:
> #!/bin/execlineb -P
> unshare -m --
> foreground { umount /proc }
> if -- { mount -t proc proc /proc }
> exec ...
>
> The functions added in this patch could be migrated into skalibs or
> libs6, but I wanted to start with this as a PoC without making API
> changes.
>
> Jesse
>
> ---
> src/supervision/s6-supervise.c | 87
> +++++++++++++++++++++++++++++++----------- 1 file changed, 65
> insertions(+), 22 deletions(-)
>
> diff --git a/src/supervision/s6-supervise.c
> b/src/supervision/s6-supervise.c index 2e8fa38..7605a82 100644
> --- a/src/supervision/s6-supervise.c
> +++ b/src/supervision/s6-supervise.c
> _at__at_ -9,6 +9,9 @@
> #include <errno.h>
> #include <fcntl.h>
> #include <signal.h>
> +#ifdef WANT_CLONE_NEWPID
> +# include <sched.h>
> +#endif
> #include <skalibs/allreadwrite.h>
> #include <skalibs/bytestr.h>
> #include <skalibs/uint.h>
> _at__at_ -203,6 +206,67 @@ static int maybesetsid (void)
> return 1 ;
> }
>
> +static void exec_run(int p[2], int notifyp[2], int fd)
> gccattr_noreturn ; +static void exec_run(int p[2], int notifyp[2], int
> fd) +{
> + char const *cargv[2] = { "run", 0 } ;
> + PROG = "s6-supervise (child)" ;
> + selfpipe_finish() ;
> + if (notifyp[0] >= 0) close(notifyp[0]) ;
> + close(p[0]) ;
> + if (notifyp[1] >= 0 && fd_move(fd, notifyp[1]) < 0)
> + {
> + failcoe(p[1]) ;
> + strerr_diefu1sys(127, "move notification descriptor") ;
> + }
> + if (!maybesetsid())
> + {
> + failcoe(p[1]) ;
> + strerr_diefu1sys(127, "access ./nosetsid") ;
> + }
> + execve("./run", (char *const *)cargv, (char *const *)environ) ;
> + failcoe(p[1]) ;
> + strerr_dieexec(127, "run") ;
> +}
> +
> +static pid_t spawn_run_fork(int p[2], int notifyp[2], int fd)
> +{
> + pid_t pid = fork() ;
> + if (!pid) exec_run(p, notifyp, fd) ;
> + return pid ;
> +}
> +
> +#ifdef WANT_CLONE_NEWPID
> +typedef struct
> +{
> + int p[2] ;
> + int notifyp[2] ;
> + int fd ;
> +} exec_run_t ;
> +
> +static int exec_run_shim(void *ctx) gccattr_noreturn ;
> +static int exec_run_shim(void *ctx)
> +{
> + exec_run_t *er = (exec_run_t *) ctx ;
> + exec_run(er->p, er->notifyp, er->fd) ;
> +}
> +
> +static pid_t spawn_run(int p[2], int notifyp[2], int fd)
> +{
> + exec_run_t arg = { { p[0], p[1] }, { notifyp[0], notifyp[1] }, fd } ;
> + char child_stack[SIGSTKSZ] ;
> + if (access("clone-newpid", F_OK) < 0 && errno == ENOENT)
> + return spawn_run_fork(p, notifyp, fd) ;
> + return (pid_t) clone(&exec_run_shim, child_stack +
> sizeof(child_stack),
> + CLONE_NEWPID | SIGCHLD, &arg) ;
> +}
> +#else /* if !defined(WANT_CLONE_NEWPID) */
> +static pid_t spawn_run(int p[2], int notifyp[2], int fd)
> +{
> + return spawn_run_fork(p, notifyp, fd) ;
> +}
> +#endif /* defined(WANT_CLONE_NEWPID) */
> +
> static void trystart (void)
> {
> int p[2] ;
> _at__at_ -222,7 +286,7 @@ static void trystart (void)
> fd_close(p[1]) ; fd_close(p[0]) ;
> return ;
> }
> - pid = fork() ;
> + pid = spawn_run(p, notifyp, (int)fd) ;
> if (pid < 0)
> {
> settimeout(60) ;
> _at__at_ -232,27 +296,6 @@ static void trystart (void)
> fd_close(p[1]) ; fd_close(p[0]) ;
> return ;
> }
> - else if (!pid)
> - {
> - char const *cargv[2] = { "run", 0 } ;
> - PROG = "s6-supervise (child)" ;
> - selfpipe_finish() ;
> - if (notifyp[0] >= 0) close(notifyp[0]) ;
> - close(p[0]) ;
> - if (notifyp[1] >= 0 && fd_move((int)fd, notifyp[1]) < 0)
> - {
> - failcoe(p[1]) ;
> - strerr_diefu1sys(127, "move notification descriptor") ;
> - }
> - if (!maybesetsid())
> - {
> - failcoe(p[1]) ;
> - strerr_diefu1sys(127, "access ./nosetsid") ;
> - }
> - execve("./run", (char *const *)cargv, (char *const *)environ) ;
> - failcoe(p[1]) ;
> - strerr_dieexec(127, "run") ;
> - }
> if (notifyp[1] >= 0) fd_close(notifyp[1]) ;
> fd_close(p[1]) ;
> {
> --
> 2.13.1
>
Received on Sat Jul 15 2017 - 20:24:25 UTC
This archive was generated by hypermail 2.3.0 : Sun May 09 2021 - 19:38:49 UTC