clinuxwaitpid

waitpid(WNOHANG) returns 0 even though child process should have terminated


I have a function which uses fork(), dup2() and execvpe() to execute some external application, and provide 3 pipes to its standard file descriptors (stdin, stdout and stderr). After the function returns, the parent will use those pipes to write() the contents of a provided buffer to stdin as well as read() stdout and stderr into two other buffers. The parent does that in a loop (using epoll_wait()) until all three pipes return EOF, meaning that those file descriptors have been closed by the child. This all works just fine, and the contents read from the stdout and stderr pipes are exactly what i would expect, given the input provided to the stdin pipe.

However, as the title suggests, when the parent then tries to check the exit status of that child using waitpid() with the WNOHANG flag, it returns 0. Apparently the child is still alive, even though those file descriptors have already been closed.

Here's the relevant code, which is unfortunately rather long due to error handling:

typedef struct
{
    int fd;
    char* name;
    char* buf;
    size_t buf_size;
    size_t buf_idx;
    bool eof;
    ssize_t (*func)(int, void*, size_t);
} pipe_info_t;

static pid_t execute(char* argv[], char* envp[], int* in, int* out, int* err);

int run_process(char* argv[], char* envp[],
                char* in_buf,
                char* out_buf, size_t* out_size,
                char* err_buf, size_t* err_size)
{
    int evt_cnt;
    int result = -1;
    int efd_pipe = -1;

    if (!argv || !in_buf
    ||  !out_buf || !out_size
    ||  !err_buf || !err_size)
    {
        fprintf(stderr, "%s() Invalid argument", __func__);
        goto ERR_ARG;
    }

    pipe_info_t in =
    {
        .name = "stdin",
        .buf = in_buf,
        .buf_size = strlen(in_buf),
        .buf_idx = 0,
        .eof = false,
        .func = (ssize_t (*)(int, void*, size_t))write
    };

    pipe_info_t out =
    {
        .name = "stdout",
        .buf = out_buf,
        .buf_size = *out_size,
        .buf_idx = 0,
        .eof = false,
        .func = read
    };

    pipe_info_t err =
    {
        .name = "stderr",
        .buf = err_buf,
        .buf_size = *err_size,
        .buf_idx = 0,
        .eof = false,
        .func = read
    };

    *out_size = 0;
    *err_size = 0;

    efd_pipe = epoll_create1(0);
    if (efd_pipe == -1)
    {
        fprintf(stderr, "%s() epoll_create1(): %s", __func__, strerror(errno));
        goto ERR_EPOLL_CREATE;
    }

    pid_t pid = execute(argv, envp, &in.fd, &out.fd, &err.fd);
    if (pid == -1)
    {
        fprintf(stderr, "%s() Failed to create child process", __func__);
        goto ERR_EXEC;
    }

    struct epoll_event in_evt = {.data.ptr = &in, .events = EPOLLOUT};
    struct epoll_event out_evt = {.data.ptr = &out, .events = EPOLLIN};
    struct epoll_event err_evt = {.data.ptr = &err, .events = EPOLLIN};
    struct epoll_event events[8];

    if (epoll_ctl(efd_pipe, EPOLL_CTL_ADD, in.fd, &in_evt)
    ||  epoll_ctl(efd_pipe, EPOLL_CTL_ADD, out.fd, &out_evt)
    ||  epoll_ctl(efd_pipe, EPOLL_CTL_ADD, err.fd, &err_evt))
    {
        fprintf(stderr, "%s() epoll_ctl(): %s", __func__, strerror(errno));
        goto ERR_EPOLL_CTL;
    }

    while (!(in.eof && out.eof && err.eof))
    {
        int n;
        evt_cnt = epoll_wait(efd_pipe, events, sizeof(events)/sizeof(events[0]), 5000);

        if (evt_cnt == -1)
        {
            fprintf(stderr, "%s() epoll_wait(): %s", __func__, strerror(errno));
            goto WAIT_CHILD;
        }

        if (evt_cnt == 0)
        {
            fprintf(stderr, "%s() epoll_wait(): timeout", __func__);
            goto WAIT_CHILD;
        }

        for (n=0; n<evt_cnt; ++n)
        {
            int size = 0;
            pipe_info_t* pipe = events[n].data.ptr;
            if (pipe->eof)
            {
                continue;
            }

            if (events[n].events & EPOLLERR)
            {
                fprintf(stderr, "%s() epoll_wait() %s error 0x%04X", __func__, pipe->name, events[n].events);
                goto WAIT_CHILD;
            }

            size = pipe->func(pipe->fd,
                              &pipe->buf[pipe->buf_idx],
                              pipe->buf_size - pipe->buf_idx);

            if (size == -1)
            {
                fprintf(stderr, "%s() %s %s", __func__, pipe->name, strerror(errno));
                goto WAIT_CHILD;
            }
            else if (!size)
            {
                pipe->eof = true;
            }

            pipe->buf_idx += size;
        }
    }

WAIT_CHILD:
    switch (waitpid(pid, &result, WNOHANG))
    {
        case -1:
            fprintf(stderr, "%s() waitpid(): %s", __func__, strerror(errno));
            result = -1;
        break;

        case 0:
            fprintf(stderr, "%s() Child process still alive", __func__);
            kill(pid, SIGKILL);
            waitpid(pid, &result, 0);
            result = -1;
        break;

        default:
            result = WEXITSTATUS(result);
        break;
    }

    *out_size = out.buf_idx;
    *err_size = err.buf_idx;

ERR_EPOLL_CTL:
    close(in.fd);
    close(out.fd);
    close(err.fd);
ERR_EXEC:
    close(efd_pipe);
ERR_EPOLL_CREATE:
ERR_ARG:
    return result;
}

static pid_t execute(char* argv[], char* envp[], int* in, int* out, int* err)
{
    pid_t pid = -1;

    int in_pipe[2];
    int out_pipe[2];
    int err_pipe[2];

    char path[strlen(argv[0])+1];
    memcpy(path, argv[0], sizeof(path));

    char* cmd = basename(path);

    if (pipe(in_pipe))
    {
        fprintf(stderr, "%s() pipe(stdin): %s", __func__, strerror(errno));
        goto ERR_STDIN;
    }

    if (pipe(out_pipe))
    {
        fprintf(stderr, "%s() pipe(stdout): %s", __func__, strerror(errno));
        goto ERR_STDOUT;
    }

    if (pipe(err_pipe))
    {
        fprintf(stderr, "%s() pipe(stderr): %s", __func__, strerror(errno));
        goto ERR_STDERR;
    }

    pid = fork();

    if (pid > 0)
    {
        close(in_pipe[0]);
        close(out_pipe[1]);
        close(err_pipe[1]);

        *in = in_pipe[1];
        *out = out_pipe[0];
        *err = err_pipe[0];
    }
    else if (pid == 0)
    {
        char err_str[1024];
        size_t err_size = 0;
        int err_fd = err_pipe[1];

        if (close(in_pipe[1])
        ||  close(out_pipe[0])
        ||  close(err_pipe[0]))
        {
            err_size = snprintf(err_str, sizeof(err_str), "%s(child) close(): %s\n", __func__, strerror(errno));
            goto ERR_CHILD;
        }

        if ((dup2(in_pipe[0], STDIN_FILENO) == -1)
        ||  (dup2(out_pipe[1], STDOUT_FILENO) == -1)
        ||  (dup2(err_pipe[1], STDERR_FILENO) == -1))
        {
            err_size = snprintf(err_str, sizeof(err_str), "%s(child) dup2(): %s\n", __func__, strerror(errno));
            goto ERR_CHILD;
        }

        err_fd = STDERR_FILENO;

        if (close(in_pipe[0])
        ||  close(out_pipe[1])
        ||  close(err_pipe[1]))
        {
            err_size = snprintf(err_str, sizeof(err_str), "%s(child) close(): %s\n", __func__, strerror(errno));
            goto ERR_CHILD;
        }

        if (execvpe(cmd, argv, envp?envp:__environ))
        {
            err_size = snprintf(err_str, sizeof(err_str), "%s(child) execvpe(): %s\n", __func__, strerror(errno));
        }

ERR_CHILD:
        write(err_fd, err_str, err_size);
        _exit(1);
    }
    else
    {
        fprintf(stderr, "%s() fork(): %s", __func__, strerror(errno));
        goto ERR_FORK;
    }

    return pid;

ERR_FORK:
    close(err_pipe[0]);
    close(err_pipe[1]);

ERR_STDERR:
    close(out_pipe[0]);
    close(out_pipe[1]);

ERR_STDOUT:
    close(in_pipe[0]);
    close(in_pipe[1]);

ERR_STDIN:
    return -1;
}

So my question is: should a child process be a zombie after it has closed all three standard file descriptors? Always assuming of course that they're closed automatically during process termination (i think it's safe to assume that no one would bother closing them manually)

In case you're wondering why i don't just use waitpid() as part of the loop condition: that's because that loop is waiting for events on the file descriptors using epoll_wait(). So if there are no further events (because the descriptors have been closed), i would have to wait for the timeout to expire.


Solution

  • The process may need some time after closing all file descriptors until its state changes to terminated, so you cannot expect it to be terminated immediately after detecting EOF on all pipes.

    Only if epoll_wait is interrupted by SIGCHLD you can be sure that waitpid will return the state of the terminated child process immediately.

    I don't think there is any guarantee about the time it takes for an OS to change the state of a terminated process, although normally this should be a "short time".

    Also, if you kill the process it may again take some time until it terminates.

    When you know that your program successfully created a child process you should actually wait for the end of the child process.

    If your parent process will terminate after the end of the child process you can even omit the waitpid and let the init process (or the responsible reaper) do this.

    BTW:

    The return code -1 from epoll_wait does not necessarily mean that it was interrupted by a signal. Even with errno==EINTR you cannot be sure that it was a SIGCHLD, so you should implement more checks, maybe in combination with a signal handler that sets a flag.

    A timeout on epoll_wait does not necessarily mean that your child process has terminated, it might only be slow for any reason, so you might have to kill the child process before waiting for it.

    Edit:

    I don't recommend sending SIGKILL unconditionally because this may prevent the child process from doing its clean-up. (Do you know that closing the file descriptors is the last step the child process has to do or that there is nothing to clean up?) SIGKILL should only be used as the last option if you cannot cleanly terminate the process.