linuxsemaphorefutex

How could futex_wake return 0


I implemented semaphore using futex. The following program often fails at the assertion in sem_post(). While the return value is supposed to be 1, it sometimes returns 0. How can this happen?

When I use POSIX semaphore the program always finishes successfully.

I'm using Linux 2.6.32-642.6.1.el6.x86_64

#include <cstdio>
#include <cstdlib>
#include <cassert>
#include <ctime>
#include <linux/futex.h>
#include <sys/syscall.h>
#include <sys/mman.h>
#include <sys/wait.h>
#include <unistd.h>
#include <errno.h>

using namespace std;

#if 0
 #include <semaphore.h>
#else
typedef volatile int sem_t;

void sem_init(sem_t* sem, int shared, int value)
{
    *sem = value;
}

void sem_post(sem_t* sem)
{
    while (1)
    {
        int value = *sem;
        if (__sync_bool_compare_and_swap(sem, value, value >= 0 ? value+1 : 1))
        {
            if (value < 0)      // had contender
            {
                int r = syscall(SYS_futex, sem, FUTEX_WAKE, 1, NULL, 0, 0);
                if (r != 1)
                    fprintf(stderr, "post r=%d err=%d sem=%d %d\n", r,errno,value,*sem);
                assert(r == 1);
            }
            return;
        }
    }
}

int sem_wait(sem_t* sem)
{
    while (1)
    {
        int value = *sem;
        if (value > 0   // positive means no contender
            && __sync_bool_compare_and_swap(sem, value, value-1))
            return 0;
        if (value <= 0
            && __sync_bool_compare_and_swap(sem, value, -1))
        {
            int r= syscall(SYS_futex, sem, FUTEX_WAIT, -1, NULL, 0, 0);
            if (!r) {
                assert(__sync_fetch_and_sub(sem, 1) > 0);
                return 0;
            }
            printf("wait r=%d errno=%d sem=%d %d\n", r,errno, value,*sem);
        }
    }
}

void sem_getvalue(sem_t* sem, int* value)
{
    *value = *sem;
}

#endif

// return current time in ns
unsigned long GetTime()
{
    struct timespec ts;
    clock_gettime(CLOCK_REALTIME, &ts);
    return ts.tv_sec*1000000000ul + ts.tv_nsec;
}

void Send(sem_t* sem, unsigned count)
{
    while (count--)
        sem_post(sem);
}

void Receive(sem_t* sem, unsigned count)
{
    while (count--)
        sem_wait(sem);
}


int main()
{
    sem_t* sem = reinterpret_cast<sem_t*>(mmap(NULL, sizeof(sem_t), PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_SHARED, -1, 0));
    assert(sem != MAP_FAILED);
    sem_init(sem, 1, 0);
    unsigned count = 10485760;

    int pid = fork();
    assert(pid != -1);
    if (!pid)   // child
    {
        Send(sem, count);
        _exit(EXIT_SUCCESS);
    }
    else    // parent
    {
        unsigned long t0 = GetTime();
        Receive(sem, count);
        printf("t=%g ms\n", (GetTime()-t0)*1e-6);
        wait(NULL);
        int v;
        sem_getvalue(sem, &v);
        assert(v == 0);
    }
}

Solution

  • The call to syscall(SYS_futex, sem, FUTEX_WAKE, 1, NULL, 0, 0) will return 0 when there is no thread waiting on sem. In your code this is possible because you call that futex line in sem_post when *sem is negative which can be the case without that any thread is actually sleeping:

    If *sem is zero when calling sem_wait you continue to execute __sync_bool_compare_and_swap(sem, value, -1) which sets *sem to -1. At that point this thread is not yet sleeping however. So, when another thread calls sem_post at that point (before the thread that is calling sem_wait enters the futex syscall) your assert failure will happen.