I need to use robust mutexes for IPC in case one of my processes crashes while holding a mutex locked. Definition is clear from e.g. man pthread_mutexattr_setrobust
and I will not repeat it here.
I have created a demo of 2 processes simulating the 1st process crash and 2nd process recovery:
1st process lock_and_crash.c
// lock_and_crash.c
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <pthread.h>
#include <string.h>
#include <unistd.h>
typedef struct {
pthread_mutex_t mutex;
int data;
} shared_data_t;
int main() {
const char *shm_name = "/robust_mutex_shm";
// Open or create shared memory
int shm_fd = shm_open(shm_name, O_CREAT | O_RDWR, 0600);
if (shm_fd == -1) {
perror("shm_open");
return 1;
}
// Resize shared memory to hold shared_data_t
ftruncate(shm_fd, sizeof(shared_data_t));
// Map shared memory
shared_data_t *shared = mmap(NULL, sizeof(shared_data_t),
PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, 0);
if (shared == MAP_FAILED) {
perror("mmap");
return 1;
}
close(shm_fd);
// Zero-initialize shared memory once (mutex and data)
memset(shared, 0, sizeof(shared_data_t));
// Initialize robust mutex only once
pthread_mutexattr_t attr;
pthread_mutexattr_init(&attr);
pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
pthread_mutexattr_setrobust(&attr, PTHREAD_MUTEX_ROBUST);
if (pthread_mutex_init(&shared->mutex, &attr) != 0) {
perror("pthread_mutex_init");
return 1;
}
pthread_mutexattr_destroy(&attr);
printf("Process 1: locking mutex...\n");
if (pthread_mutex_lock(&shared->mutex) != 0) {
perror("pthread_mutex_lock");
return 1;
}
printf("Process 1: mutex locked, simulating crash (exit without unlock)...\n");
// Crash by exiting without unlocking
_exit(1);
}
2nd process recover_mutex.c
// recover_mutex.c
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <pthread.h>
#include <string.h>
#include <errno.h>
#include <unistd.h>
typedef struct {
pthread_mutex_t mutex;
int data;
} shared_data_t;
int main() {
const char *shm_name = "/robust_mutex_shm";
int shm_fd = shm_open(shm_name, O_RDWR, 0600);
if (shm_fd == -1) {
perror("shm_open");
return 1;
}
shared_data_t *shared = mmap(NULL, sizeof(shared_data_t),
PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, 0);
if (shared == MAP_FAILED) {
perror("mmap");
return 1;
}
close(shm_fd);
printf("Process 2: trying to lock mutex...\n");
int ret = pthread_mutex_lock(&shared->mutex);
if (ret == EOWNERDEAD) {
printf("Process 2: EOWNERDEAD received, recovering...\n");
// Optionally recover shared data
shared->data = 12345;
// Mark mutex consistent
pthread_mutex_consistent(&shared->mutex);
pthread_mutex_unlock(&shared->mutex);
printf("Process 2: recovery complete and mutex unlocked.\n");
} else if (ret != 0) {
fprintf(stderr, "pthread_mutex_lock failed with %d (%s)\n", ret, strerror(ret));
return 1;
} else {
printf("Process 2: locked mutex normally.\n");
pthread_mutex_unlock(&shared->mutex);
}
munmap(shared, sizeof(shared_data_t));
shm_unlink(shm_name);
return 0;
}
You can build and run them with e.g.
gcc lock_and_crash.c -o lock_and_crash -pthread
gcc recover_mutex.c -o recover_mutex -pthread
./lock_and_crash
./recover_mutex
However, when I add a single munmap before exit at the end of 1st process lock_and_crash.c
:
...
printf("Process 1: mutex locked, simulating crash (exit without unlock)...\n");
munmap(shared, sizeof(shared_data_t));
// Crash by exiting without unlocking
_exit(1);
2nd process hangs, pthread_mutex_lock(&shared->mutex);
never returns. Why? Is this documented somewhere?
Robust futexes are kept per-process in a list. The head of this list is communicated to the kernel. In the case of a crash, the futexes referenced in this list are updated by the kernel on process exit. This update of course cannot take place when the memory segment is unmapped prior to process exit.
Details can be found in the Kernel Documentation for robust futexes. Note that there is an older and a newer method, depending on the kernel version. However, both require the mutex to be mapped in userspace memory at process exit time.