i want that my loop intercept the child process finished or a message sent in the queue by a child. It work if a child sent a message and terminate immediatly...but if i wanna send a message and do other things? I put here my code that work and the variant where i'm working on...
master.c not work: catch pid child despite is not finish for the loop
#include "config.h"
int main(int argc, char const *argv[]) {
char * args[3] = {CHILD_NAME};
int q_id,num_bytes;
struct msgUser mt_msg;
int sh_id = shmget(IPC_PRIVATE,sizeof(int),0644 | IPC_CREAT);
int *sh_value = shmat(sh_id,NULL,0);
shmctl(sh_id, IPC_RMID, NULL);
*sh_value = 0;
#ifndef SO_USERS_NUM
printf("Error in main: SO_USERS_NUM not defined");
#endif
/*
Creation users, main do forks as SO_USERS_NUM is setting on confing library.
A child do a execve to the same file .c with the required operation.
args: array of string that contains data usefull to user
*/
q_id = msgget(KEY_QUEUE, IPC_CREAT | 0600);
for (int i = 0; i < SO_USERS_NUM; i++)
{
switch (fork())
{
case 0:
#ifdef CHILD_NAME
char* test;
*sh_value = *sh_value +1;
sprintf(test,"%d",*sh_value);
args[1] = test;
args[2] = NULL;
execve(CHILD_NAME,args,NULL);
#endif
exit(EXIT_FAILURE);
break;
case -1:
printf("Error in main: forkkodio");
break;
default:
break;
}
}
int k_pid = 0;
while (((k_pid = wait(NULL))>0)){
printf("\nfiglio: %d",k_pid); }
return 0; }
master work: wait correctly forever when i put the infinity loop in the user
#include "config.h"
int main(int argc, char const *argv[])
{
char * args[3] = {CHILD_NAME};
int q_id,num_bytes;
struct msgUser mt_msg;
int sh_id = shmget(IPC_PRIVATE,sizeof(int),0644 | IPC_CREAT);
int *sh_value = shmat(sh_id,NULL,0);
shmctl(sh_id, IPC_RMID, NULL);
*sh_value = 0;
#ifndef SO_USERS_NUM
printf("Error in main: SO_USERS_NUM not defined");
#endif
/*
Creation users, main do forks as SO_USERS_NUM is setting on confing library.
A child do a execve to the same file .c with the required operation.
args: array of string that contains data usefull to user
*/
q_id = msgget(KEY_QUEUE, IPC_CREAT | 0600);
for (int i = 0; i < SO_USERS_NUM; i++)
{
switch (fork())
{
case 0:
#ifdef CHILD_NAME
char* test;
*sh_value = *sh_value +1;
sprintf(test,"%d",*sh_value);
args[1] = test;
args[2] = NULL;
execve(CHILD_NAME,args,NULL);
#endif
exit(EXIT_FAILURE);
break;
case -1:
printf("Error in main: forkkodio");
break;
default:
break;
}
}
while (1) {
for (int i = 0; i < SO_USERS_NUM; i++)
{
printf("\nHo finito di aspettare:%d sono il padre: %d\n",wait(NULL),getppid());
/* now receiving the message */
num_bytes = msgrcv(q_id, &mt_msg, 120, 3, 0);
if (num_bytes >= 0) {
/* received a good message (possibly of zero length) */
printf("\nmessaggio ricevuto: %d \n",mt_msg.numero);
}
}
}
return 0;
}
user.c :
#include "user_manager.h"
#include "config.h"
int main(int argc, char const *argv[])
{
int q_id;
struct msgUser my_msg;
printf("\n user id: %s",argv[1]);
q_id = msgget(KEY_QUEUE, IPC_CREAT | 0600);
my_msg.mtype = 3;
my_msg.numero = 33;
msgsnd(q_id, &my_msg, 120, 0);
while (1)
{
/* code */
}
return 0;
}
config.h:
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include <sys/types.h>
#include <sys/msg.h>
#include <sys/ipc.h>
#include <sys/shm.h>
#include <sys/sem.h>
#include <sys/wait.h>
struct msgUser {
long mtype; /* message type, must be > 0 */
int numero;
};
/*
KEY_QUEUE define the start key for the master process.
Children nodes will increase this value to create owner's queue
*/
#define KEY_QUEUE 0x200800
/*
CHILD_NAME name of users's code lauched by execve
*/
#define CHILD_NAME "user"
/*
SO_USERS_NUM define the maximum number of users
*/
#define SO_USERS_NUM 3
You can do both wait
and msgrcv
in the same loop, but you'll have to keep a count of the number of children terminated. And, you'll have to make the calls non-blocking:
int pid_done = 0;
while (pid_done < SO_USERS_NUM) {
num_bytes = sizeof(mt_msg) - sizeof(mt_msg.mtype);
/* now receiving the message */
num_bytes = msgrcv(q_id, &mt_msg, num_bytes, 3, IPC_NOWAIT);
if (num_bytes >= 0) {
printf("received message");
/* received a good message (possibly of zero length) */
if (num_bytes >= sizeof(mt_msg.numero))
printf(" numero: %d", mt_msg.numero);
printf("\n");
}
pid_t pidnow = waitpid(-1,NULL,WNOHANG);
if (pidnow > 0) {
printf("reaped: pid %d\n",pidnow);
++pid_done;
}
// optional: sleep a bit if no change in state occurred to prevent us from
// "hammering" the system
if ((num_bytes < 0) && (pidnow < 0))
usleep(100);
}
Note: As I mentioned in my top comments, using 120
isn't just bad form. But, it is UB (undefined behavior). With it, you'd put data way past the end of the struct. So, the fix is required.
The above is the simplest way. But, if we establish a signal handler for SIGCHLD
, we can use blocking calls:
int chld_sig = 0; // # of children terminated (SIGCHLD)
int chld_reap = 0; // # of children reaped (parent loop)
// sigchld -- signal handler for SIGCHLD
void
sigchld(int signo)
{
// NOTE: we can _not_ do printf inside a signal handler
// increment number of completed pids
atomic_fetch_add(&chld_sig,1);
}
void
parent_loop(void)
{
// using sigaction/sigprocmask, enable signal handler for SIGCHLD ...
while (chld_reap < SO_USERS_NUM) {
/* now receiving the message */
do {
num_bytes = sizeof(mt_msg) - sizeof(mt_msg.mtype);
num_bytes = msgrcv(q_id, &mt_msg, num_bytes, 3, 0);
// probably got EINTR
if (num_bytes < 0)
break;
printf("received message");
/* received a good message (possibly of zero length) */
if (num_bytes >= sizeof(mt_msg.numero))
printf(" numero: %d", mt_msg.numero);
printf("\n");
} while (0)
// check for completed children
while (1) {
// get number of signals seen
int count = atomic_load(&chld_sig);
// no new child reaped
if (count <= chld_reap)
break;
// reap a child
pid_t pidnow = waitpid(-1,NULL,0);
// show the pid of the reaped child
if (pidnow > 0) {
printf("reaped: pid %d\n",pidnow);
++chld_reap;
}
}
}
}
UPDATE:
i tried the first implementation and it wait for the child but doesn't print anything like the children doesn't write the message on the queue.
But, if i start i child it would add a message on the queue and after that, if i start the master it would read the previous message but no new message generated and terminate the child whit their pid printed. – Matteo Pagliarello 8 hours ago
Because the queue persists across executions of master, if it fails to process all messages on the first invocation, the subsequent invocation will start with "stale" messages.
Thus, it will become further "desynchronized".
You didn't provide the full user code, but I suspect it sent several messages.
There was a bug in the loop I provided:
Here is the full and complete refactored code. I've combined all files into a single .c
file:
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include <time.h>
#include <sys/types.h>
#include <sys/msg.h>
#include <sys/ipc.h>
#include <sys/shm.h>
#include <sys/sem.h>
#include <sys/wait.h>
struct msgUser {
long mtype; /* message type, must be > 0 */
int xid; // process id (sequential)
pid_t pid; // process id (real)
int numero;
};
#define MSGUSER_SIZE (sizeof(struct msgUser) - sizeof(long))
/*
KEY_QUEUE define the start key for the master process.
Children nodes will increase this value to create owner's queue
*/
#define KEY_QUEUE 0x200800
/*
CHILD_NAME name of users's code lauched by execve
*/
#define CHILD_NAME "./user"
/*
SO_USERS_NUM define the maximum number of users
*/
#ifndef SO_USERS_NUM
#define SO_USERS_NUM 3
#endif
// max number of messages
#ifndef MAXMSG
#define MAXMSG 50
#endif
// set this to 1 to force old [broken] behavior in wait_loop
#ifndef FORCE_STALE
#define FORCE_STALE 0
#endif
int q_id;
pid_t pidlist[SO_USERS_NUM];
void
wait_loop(void)
{
int chld_done = 0;
int num_bytes;
struct msgUser mt_msg;
while (1) {
num_bytes = MSGUSER_SIZE;
// NOTE: we can get messages from a given child even after the child
// has terminated
num_bytes = msgrcv(q_id, &mt_msg, num_bytes, 3, IPC_NOWAIT);
/* now receiving the message */
if (num_bytes >= 0) {
printf("wait_loop: received message (%d)",num_bytes);
/* received a good message (possibly of zero length) */
if (num_bytes >= sizeof(mt_msg.numero)) {
printf(" xid:%d",mt_msg.xid);
printf(" numero:%d", mt_msg.numero);
printf(" pid:%d",mt_msg.pid);
}
printf("\n");
}
// no pending message
// stop loop if:
// (1) all users reaped
// (2) no pending messages (from any child)
if (chld_done >= SO_USERS_NUM) {
if (FORCE_STALE || (num_bytes < 0))
break;
}
// has a child just terminated?
pid_t pidnow = waitpid(-1, NULL, WNOHANG);
// yes, remember the count
if (pidnow > 0) {
++chld_done;
// get the sequential id number for this pid
int xid = -1;
for (int idx = 0; idx < SO_USERS_NUM; ++idx) {
if (pidnow == pidlist[idx]) {
xid = idx;
break;
}
}
printf("reaped: xid:%d done:%d pid:%d\n", xid, chld_done, pidnow);
}
// optional: sleep a bit if no change in state occurred to prevent us
// from "hammering" the system
if ((num_bytes < 0) && (pidnow < 0))
usleep(100);
}
}
// douser -- do user/child
int
douser(int argc, char **argv)
{
int q_id;
struct msgUser my_msg;
q_id = msgget(KEY_QUEUE, IPC_CREAT | 0600);
my_msg.mtype = 3;
my_msg.xid = atoi(argv[1]);
my_msg.pid = getpid();
my_msg.numero = 0;
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC,&ts);
srand(ts.tv_nsec);
int count = (rand() % MAXMSG) + 1;
printf("user:%d pid:%d (%d messages to send)\n",
my_msg.xid, my_msg.pid, count);
fflush(stdout);
time_t osec = ts.tv_sec;
while (1) {
clock_gettime(CLOCK_MONOTONIC,&ts);
if ((ts.tv_sec - osec) >= 1)
break;
usleep(1000);
}
for (int mno = 0; mno < count; ++mno) {
my_msg.numero = mno;
msgsnd(q_id, &my_msg, MSGUSER_SIZE, 0);
}
return 0;
}
// domaster -- do master/parent
int
domaster(int argc, char **argv)
{
char *args[3] = { CHILD_NAME };
// no need for shared memory -- see below
#if 0
int sh_id = shmget(IPC_PRIVATE, sizeof(int), 0644 | IPC_CREAT);
int *sh_value = shmat(sh_id, NULL, 0);
shmctl(sh_id, IPC_RMID, NULL);
*sh_value = 0;
#else
int sh_value = -1;
#endif
#ifndef SO_USERS_NUM
printf("Error in main: SO_USERS_NUM not defined");
#endif
q_id = msgget(KEY_QUEUE, IPC_CREAT | 0600);
// drain all "stale" messages
// NOTE: if we're working correctly, this should never happen
int drain = 0;
while (1) {
struct msgUser mt_msg;
int num_bytes = MSGUSER_SIZE;
/* now receiving the message */
num_bytes = msgrcv(q_id, &mt_msg, num_bytes, 0, IPC_NOWAIT);
if (num_bytes < 0)
break;
++drain;
}
if (drain > 0)
printf("master: DRAIN %d\n",drain);
/*
Creation users, main do forks as SO_USERS_NUM is setting on confing
library. A child do a execve to the same file .c with the required
operation. args: array of string that contains data usefull to user */
for (int i = 0; i < SO_USERS_NUM; i++) {
// NOTE: by doing this in the parent (before the fork), sh_value does
// _not_ need to be in shared memory
++sh_value;
char test[100];
pid_t pid = fork();
switch (pid) {
case 0:
sprintf(test, "%d", sh_value);
args[0] = argv[0];
args[1] = test;
args[2] = NULL;
execvp(args[0], args);
exit(EXIT_FAILURE);
break;
case -1:
printf("Error in main: forkkodio");
break;
default:
printf("master: launch %d (pid %d)\n",sh_value,pid);
pidlist[sh_value] = pid;
break;
}
}
wait_loop();
return 0;
}
int
main(int argc, char **argv)
{
setlinebuf(stdout);
int code;
if (argc > 1)
code = douser(argc,argv);
else
code = domaster(argc,argv);
return code;
}
Here is the program output:
master: launch 0 (pid 3321553)
master: launch 1 (pid 3321554)
master: launch 2 (pid 3321555)
user:0 pid:3321553 (40 messages to send)
user:1 pid:3321554 (44 messages to send)
user:2 pid:3321555 (11 messages to send)
wait_loop: received message (16) xid:0 numero:0 pid:3321553
wait_loop: received message (16) xid:2 numero:0 pid:3321555
wait_loop: received message (16) xid:0 numero:1 pid:3321553
wait_loop: received message (16) xid:2 numero:1 pid:3321555
wait_loop: received message (16) xid:0 numero:2 pid:3321553
wait_loop: received message (16) xid:2 numero:2 pid:3321555
wait_loop: received message (16) xid:0 numero:3 pid:3321553
wait_loop: received message (16) xid:2 numero:3 pid:3321555
wait_loop: received message (16) xid:0 numero:4 pid:3321553
wait_loop: received message (16) xid:2 numero:4 pid:3321555
wait_loop: received message (16) xid:0 numero:5 pid:3321553
wait_loop: received message (16) xid:2 numero:5 pid:3321555
wait_loop: received message (16) xid:0 numero:6 pid:3321553
wait_loop: received message (16) xid:1 numero:0 pid:3321554
wait_loop: received message (16) xid:2 numero:6 pid:3321555
wait_loop: received message (16) xid:0 numero:7 pid:3321553
wait_loop: received message (16) xid:1 numero:1 pid:3321554
wait_loop: received message (16) xid:2 numero:7 pid:3321555
wait_loop: received message (16) xid:0 numero:8 pid:3321553
reaped: xid:2 done:1 pid:3321555
wait_loop: received message (16) xid:1 numero:2 pid:3321554
wait_loop: received message (16) xid:2 numero:8 pid:3321555
wait_loop: received message (16) xid:0 numero:9 pid:3321553
wait_loop: received message (16) xid:1 numero:3 pid:3321554
wait_loop: received message (16) xid:2 numero:9 pid:3321555
wait_loop: received message (16) xid:0 numero:10 pid:3321553
wait_loop: received message (16) xid:1 numero:4 pid:3321554
wait_loop: received message (16) xid:2 numero:10 pid:3321555
wait_loop: received message (16) xid:0 numero:11 pid:3321553
wait_loop: received message (16) xid:1 numero:5 pid:3321554
wait_loop: received message (16) xid:0 numero:12 pid:3321553
wait_loop: received message (16) xid:1 numero:6 pid:3321554
wait_loop: received message (16) xid:0 numero:13 pid:3321553
wait_loop: received message (16) xid:1 numero:7 pid:3321554
wait_loop: received message (16) xid:0 numero:14 pid:3321553
wait_loop: received message (16) xid:1 numero:8 pid:3321554
wait_loop: received message (16) xid:0 numero:15 pid:3321553
wait_loop: received message (16) xid:1 numero:9 pid:3321554
reaped: xid:0 done:2 pid:3321553
wait_loop: received message (16) xid:0 numero:16 pid:3321553
wait_loop: received message (16) xid:1 numero:10 pid:3321554
wait_loop: received message (16) xid:0 numero:17 pid:3321553
wait_loop: received message (16) xid:1 numero:11 pid:3321554
wait_loop: received message (16) xid:0 numero:18 pid:3321553
wait_loop: received message (16) xid:1 numero:12 pid:3321554
wait_loop: received message (16) xid:0 numero:19 pid:3321553
reaped: xid:1 done:3 pid:3321554
wait_loop: received message (16) xid:1 numero:13 pid:3321554
wait_loop: received message (16) xid:0 numero:20 pid:3321553
wait_loop: received message (16) xid:1 numero:14 pid:3321554
wait_loop: received message (16) xid:0 numero:21 pid:3321553
wait_loop: received message (16) xid:1 numero:15 pid:3321554
wait_loop: received message (16) xid:0 numero:22 pid:3321553
wait_loop: received message (16) xid:1 numero:16 pid:3321554
wait_loop: received message (16) xid:0 numero:23 pid:3321553
wait_loop: received message (16) xid:1 numero:17 pid:3321554
wait_loop: received message (16) xid:0 numero:24 pid:3321553
wait_loop: received message (16) xid:1 numero:18 pid:3321554
wait_loop: received message (16) xid:0 numero:25 pid:3321553
wait_loop: received message (16) xid:1 numero:19 pid:3321554
wait_loop: received message (16) xid:0 numero:26 pid:3321553
wait_loop: received message (16) xid:1 numero:20 pid:3321554
wait_loop: received message (16) xid:0 numero:27 pid:3321553
wait_loop: received message (16) xid:1 numero:21 pid:3321554
wait_loop: received message (16) xid:0 numero:28 pid:3321553
wait_loop: received message (16) xid:1 numero:22 pid:3321554
wait_loop: received message (16) xid:0 numero:29 pid:3321553
wait_loop: received message (16) xid:1 numero:23 pid:3321554
wait_loop: received message (16) xid:0 numero:30 pid:3321553
wait_loop: received message (16) xid:1 numero:24 pid:3321554
wait_loop: received message (16) xid:0 numero:31 pid:3321553
wait_loop: received message (16) xid:1 numero:25 pid:3321554
wait_loop: received message (16) xid:0 numero:32 pid:3321553
wait_loop: received message (16) xid:1 numero:26 pid:3321554
wait_loop: received message (16) xid:0 numero:33 pid:3321553
wait_loop: received message (16) xid:1 numero:27 pid:3321554
wait_loop: received message (16) xid:0 numero:34 pid:3321553
wait_loop: received message (16) xid:1 numero:28 pid:3321554
wait_loop: received message (16) xid:0 numero:35 pid:3321553
wait_loop: received message (16) xid:1 numero:29 pid:3321554
wait_loop: received message (16) xid:0 numero:36 pid:3321553
wait_loop: received message (16) xid:1 numero:30 pid:3321554
wait_loop: received message (16) xid:0 numero:37 pid:3321553
wait_loop: received message (16) xid:1 numero:31 pid:3321554
wait_loop: received message (16) xid:0 numero:38 pid:3321553
wait_loop: received message (16) xid:1 numero:32 pid:3321554
wait_loop: received message (16) xid:0 numero:39 pid:3321553
wait_loop: received message (16) xid:1 numero:33 pid:3321554
wait_loop: received message (16) xid:1 numero:34 pid:3321554
wait_loop: received message (16) xid:1 numero:35 pid:3321554
wait_loop: received message (16) xid:1 numero:36 pid:3321554
wait_loop: received message (16) xid:1 numero:37 pid:3321554
wait_loop: received message (16) xid:1 numero:38 pid:3321554
wait_loop: received message (16) xid:1 numero:39 pid:3321554
wait_loop: received message (16) xid:1 numero:40 pid:3321554
wait_loop: received message (16) xid:1 numero:41 pid:3321554
wait_loop: received message (16) xid:1 numero:42 pid:3321554
wait_loop: received message (16) xid:1 numero:43 pid:3321554