I've included code that creates a series of child processes to divide the work for a task. There's a random chance for it to terminate (handled by the word_count function from which it calls abort()) and on this event, it should create a new child process to replace it. However, the program is being blocked on the read. I know this code is messy, but I want understand the problem before cleaning it up.
int pipes[nChildProc][2]; //pipe fd[0] is read end, fd[1] is write end
long child_f_size = fsize / nChildProc;
pid_t pids[nChildProc];
//start dividing the work among child processes
for(int i = 0; i < nChildProc; ++i) {
//srand(time(NULL));
//int crash = ((rand() / RAND_MAX + 1.0) < crashRate) ? 1 : 0;
if(pipe(pipes[i]) != 0) {
printf("Failed to create pipe.\n");
exit(1);
}
pid_t pid = fork();
FILE *child_fp;
pids[i] = pid;
if(pid < 0) {
printf("Failed to create child process.\n");
exit(1);
}
else if(pid == 0) { //child process
count_t temp_count = readFromFile(child_fp, fsize, child_f_size, char* name, int i, int nChildProc);
//IPC with the main process
if(write(pipes[i][1], &temp_count, sizeof(temp_count)) == -1)
printf("failed to write to pipe.\n");
close(pipes[i][1]);
close(pipes[i][0]);
exit(0); //deallocate process' memory space
}
}
//wait for a children to finish
int ret, status, i = 0;
while(wait(NULL) != -1) { // while there are children to wait on
ret = waitpid(pids[i], &status, WUNTRACED);
if(ret == -1) {
continue;
}
if(ret != 0) {// didn't exit normally
if(pipe(pipes[i]) != 0) {
printf("Failed to create pipe.\n");
exit(1);
}
pid_t pid = fork();
FILE *child_fp;
pids[i] = pid;
if(pid < 0) {
printf("Failed to create child process.\n");
exit(1);
}
else if(pid == 0) { //child process
count_t temp_count = readFromFile(child_fp, fsize, child_f_size, char* name, int i, int nChildProc);
//IPC with the main process
if(write(pipes[i][1], &temp_count, sizeof(temp_count)) == -1)
printf("failed to write to pipe.\n");
close(pipes[i][1]);
close(pipes[i][0]);
exit(0); //deallocate process' memory space
}
}
i = (i + 1) % nChildProc;//loop back to detect more processes that were terminated
}
long bytes;
count_t temp;
temp.linecount = 0;
temp.wordcount = 0;
temp.charcount = 0;
//add up all the values from children to count
printf("time to read.\n");
for(unsigned int j = 0; j < nChildProc; ++j) {
if((bytes = read(pipes[j][0], &temp, sizeof(temp))) < 0) {//blocked here
printf("Failed to read from pipe {%d}.\n", j);
exit(1);
}
if(bytes != 0) {
count.linecount += temp.linecount;
count.wordcount += temp.wordcount;
count.charcount += temp.charcount;
}
close(pipes[j][1]);
close(pipes[j][0]);
}
A couple of issues jump out:
if(ret != 0) {// didn't exit normally
you've confused ret
(which is the pid) for status
(which is the exit code of the child)
You can't call wait on a process twice, since calling wait allows the system to release the resources associated with the process. You have several options on how to rewrite this code:
while(wait(NULL) != -1) { // while there are children to wait on
ret = waitpid(pids[i], &status, WUNTRACED);
One easy way is to use wait
then lookup in the array which index it belongs to.
while((pid = wait(&status)) {
if (pid == -1) { // no children to wait on
break;
}
for(int i = 0; i < nChildProc; ++i) {
if (pid == pids[i]) break;
}
if (i >= nChildProc) {
unexpected_pid_do_something_smart();
}
// Leave the rest of the loop the same
Note: I didn't compile or test the above code.