cprocesspipewaitpid

Creating a new child process when old one is terminated in C


I've included code that creates a series of child processes to divide the work for a task. There's a random chance for it to terminate (handled by the word_count function from which it calls abort()) and on this event, it should create a new child process to replace it. However, the program is being blocked on the read. I know this code is messy, but I want understand the problem before cleaning it up.

    int pipes[nChildProc][2]; //pipe fd[0] is read end, fd[1] is write end
    long child_f_size = fsize / nChildProc;
    pid_t pids[nChildProc];

    //start dividing the work among child processes
    for(int i = 0; i < nChildProc; ++i) {
        //srand(time(NULL));
        //int crash = ((rand() / RAND_MAX + 1.0) < crashRate) ? 1 : 0;
        if(pipe(pipes[i]) != 0) {
            printf("Failed to create pipe.\n");
            exit(1);
        }

        pid_t pid = fork();
        FILE *child_fp;
        pids[i] = pid;

        if(pid < 0) {
            printf("Failed to create child process.\n");
            exit(1);
        }
        else if(pid == 0) { //child process
            count_t temp_count = readFromFile(child_fp, fsize, child_f_size, char* name, int i, int nChildProc);

            //IPC with the main process
            if(write(pipes[i][1], &temp_count, sizeof(temp_count)) == -1)
                printf("failed to write to pipe.\n");

            close(pipes[i][1]);
            close(pipes[i][0]);
            exit(0); //deallocate process' memory space
        }
    }

    //wait for a children to finish
    int ret, status, i = 0;
    while(wait(NULL) != -1) { // while there are children to wait on
        ret = waitpid(pids[i], &status, WUNTRACED);

        if(ret == -1) {
            continue;
        }

        if(ret != 0) {// didn't exit normally
            if(pipe(pipes[i]) != 0) {
                printf("Failed to create pipe.\n");
                exit(1);
            }

            pid_t pid = fork();
            FILE *child_fp;
            pids[i] = pid;

            if(pid < 0) {
                printf("Failed to create child process.\n");
                exit(1);
            }
            else if(pid == 0) { //child process
                count_t temp_count = readFromFile(child_fp, fsize, child_f_size, char* name, int i, int nChildProc);

                //IPC with the main process
                if(write(pipes[i][1], &temp_count, sizeof(temp_count)) == -1)
                    printf("failed to write to pipe.\n");

                close(pipes[i][1]);
                close(pipes[i][0]);
                exit(0); //deallocate process' memory space
            }
        }

        i = (i + 1) % nChildProc;//loop back to detect more processes that were terminated
    }

    long bytes;
    count_t temp;
    temp.linecount = 0;
    temp.wordcount = 0;
    temp.charcount = 0;

    //add up all the values from children to count
    printf("time to read.\n");
    for(unsigned int j = 0; j < nChildProc; ++j) {
        if((bytes = read(pipes[j][0], &temp, sizeof(temp))) < 0) {//blocked here
            printf("Failed to read from pipe {%d}.\n", j);
            exit(1);
        }

        if(bytes != 0) {
            count.linecount += temp.linecount;
            count.wordcount += temp.wordcount;
            count.charcount += temp.charcount;
        }

        close(pipes[j][1]);
        close(pipes[j][0]);
    }

Solution

  • A couple of issues jump out:

    1. if(ret != 0) {// didn't exit normally you've confused ret (which is the pid) for status (which is the exit code of the child)

    2. You can't call wait on a process twice, since calling wait allows the system to release the resources associated with the process. You have several options on how to rewrite this code:

            while(wait(NULL) != -1) { // while there are children to wait on
                ret = waitpid(pids[i], &status, WUNTRACED);
    

    One easy way is to use wait then lookup in the array which index it belongs to.

        while((pid = wait(&status)) {
            if (pid == -1) {  // no children to wait on
                break;
            }
            for(int i = 0; i < nChildProc; ++i) {
                if (pid == pids[i]) break;
            }
            if (i >= nChildProc) {
                unexpected_pid_do_something_smart();
            }
            // Leave the rest of the loop the same
    

    Note: I didn't compile or test the above code.