I'm running my below C++ code on ARM Mac machine.
The idea is that I have two independent std::atomic<int>
variables x
& y
. These variables are initially 0
and then are updated to 1
.
now, there are multiple reader threads. Each thread reads the value of x
and y
. The idea is that if there is
"single-global-order"
then {x=0, y=1} and {x=1, y=0} cannot be seen together.
Now, to get some synchronization, i use a std::atomic<bool> ready
. The ready flag is like a trigger. All the threads can start their respective logic only after ready is true
But surprisingly, I get the {0,1} and {1,0} result on my machine.
#include <atomic>
#include <iostream>
#include <thread>
#include <vector>
const char *machine()
{
std::cout << "Compiled on/for: ";
#if defined(__x86_64__) || defined(_M_X64) || defined(_M_AMD64)
return "x86-64 (64-bit Intel/AMD)";
#elif defined(__i386__) || defined(_M_IX86)
return "x86 (32-bit Intel/AMD)";
#elif defined(__aarch64__) || defined(_M_ARM64)
return "ARM64 (AArch64)";
#elif defined(__arm__) || defined(_M_ARM)
return "ARM (32-bit)";
#elif defined(__powerpc__) || defined(__powerpc64__) || defined(_M_PPC)
return "PowerPC";
#else
return "Unknown architecture";
#endif
}
static constexpr std::size_t ITERATIONS = 1000000;
static constexpr std::size_t NUM_THREADS = 10;
using result = std::pair<int, int>;
std::vector<result> results;
std::atomic_bool ready;
std::atomic<int> x, y;
void initialize()
{
results.clear();
results.resize(NUM_THREADS, {0, 0});
ready.store(false, std::memory_order_seq_cst);
x.store(0, std::memory_order_seq_cst);
y.store(0, std::memory_order_seq_cst);
}
void modify_x()
{
while (!ready.load(std::memory_order_seq_cst))
;
x.store(1, std::memory_order_seq_cst);
}
void modify_y()
{
while (!ready.load(std::memory_order_seq_cst))
;
y.store(1, std::memory_order_seq_cst);
}
void check(int i)
{
while (!ready.load(std::memory_order_seq_cst))
;
int xl = x.load(std::memory_order_seq_cst);
int yl = y.load(std::memory_order_seq_cst);
results[i] = {xl, yl};
}
bool check_results()
{
// stores count of {0, 0}, {0, 1}, {1, 0}, {1, 1}
int arr[4] = {0, 0, 0, 0};
for (const auto &res : results)
{
if (res.first == 0 && res.second == 0)
arr[0]++;
else if (res.first == 0 && res.second == 1)
arr[1]++;
else if (res.first == 1 && res.second == 0)
arr[2]++;
else if (res.first == 1 && res.second == 1)
arr[3]++;
}
bool failure = arr[1] != 0 && arr[2] != 0;
if (failure)
std::cout << "Results: {0, 0}: " << arr[0] << ", {0, 1}: " << arr[1]
<< ", {1, 0}: " << arr[2] << ", {1, 1}: " << arr[3] << " failure = " << failure
<< "\n";
return failure;
}
bool iterate(std::size_t iter)
{
initialize();
std::vector<std::jthread> threads_;
threads_.reserve(NUM_THREADS + 2);
for (std::size_t i = 0; i < NUM_THREADS; ++i)
threads_.emplace_back(check, i);
threads_.emplace_back(modify_y);
threads_.emplace_back(modify_x);
ready.store(1, std::memory_order_seq_cst);
for (std::size_t i = 0; i < NUM_THREADS + 2; ++i)
threads_[i].join();
return check_results();
}
int main()
{
std::cout << "Compiled on/for: " << machine() << "\n";
std::cout << "Testing sequential consistency semantics\n";
std::size_t total_failures = 0;
for (std::size_t i = 0; i < ITERATIONS; ++i)
total_failures += iterate(i);
std::cout << "Total failed iterations: " << total_failures << " out of " << ITERATIONS
<< "\n";
return 0;
}
the run command and the output are
g++ rough2.cpp -std=c++20 -o prog && ./prog
Compiled on/for: Compiled on/for: ARM64 (AArch64)
Testing sequential consistency semantics
Results: {0, 0}: 4, {0, 1}: 1, {1, 0}: 1, {1, 1}: 4 failure = 1
Results: {0, 0}: 4, {0, 1}: 1, {1, 0}: 1, {1, 1}: 4 failure = 1
Results: {0, 0}: 5, {0, 1}: 1, {1, 0}: 1, {1, 1}: 3 failure = 1
Results: {0, 0}: 4, {0, 1}: 1, {1, 0}: 1, {1, 1}: 4 failure = 1
Results: {0, 0}: 3, {0, 1}: 1, {1, 0}: 1, {1, 1}: 5 failure = 1
Results: {0, 0}: 2, {0, 1}: 1, {1, 0}: 2, {1, 1}: 5 failure = 1
Results: {0, 0}: 3, {0, 1}: 1, {1, 0}: 1, {1, 1}: 5 failure = 1
Results: {0, 0}: 5, {0, 1}: 1, {1, 0}: 1, {1, 1}: 3 failure = 1
Results: {0, 0}: 4, {0, 1}: 1, {1, 0}: 1, {1, 1}: 4 failure = 1
Results: {0, 0}: 4, {0, 1}: 1, {1, 0}: 2, {1, 1}: 3 failure = 1
Results: {0, 0}: 2, {0, 1}: 1, {1, 0}: 1, {1, 1}: 6 failure = 1
Results: {0, 0}: 5, {0, 1}: 1, {1, 0}: 1, {1, 1}: 3 failure = 1
Results: {0, 0}: 3, {0, 1}: 1, {1, 0}: 1, {1, 1}: 5 failure = 1
Results: {0, 0}: 3, {0, 1}: 2, {1, 0}: 1, {1, 1}: 4 failure = 1
Results: {0, 0}: 4, {0, 1}: 1, {1, 0}: 1, {1, 1}: 4 failure = 1
Total failed iterations: 15 out of 1000000
I expect this to happen with std::memory_order_acquire
and std::memory_order_release
, but not with std::memory_order_seq_cst
The idea is that if there is "single-global-order" then {x=0, y=1} and {x=1, y=0} cannot be seen together.
This statement is false.
Simplifying OP's code, let's assume we have 4 concurrent threads:
modify_x
running:x.store(1); // #1
modify_y
running:y.store(1); // #1
reader_1
and reader_2
running:int xl = x.load(); // #1
int yl = y.load(); // #2
If these instructions are executed in the following total order:
/* reader_1 #1 */ int x1 = x.load(); // reader_1 sees x == 0
/* modify_x #1 */ x.store(1);
/* reader_2 #1 */ int x1 = x.load(); // reader_2 sees x == 1
/* reader_2 #2 */ int yl = y.load(); // reader_2 sees y == 0
/* modify_y #1 */ y.store(1);
/* reader_1 #2 */ int yl = y.load(); // reader_1 sees y == 1
Then reader_1
will have x1 == 0 && y1 == 1
, and reader_2
will have x1 == 1 && y1 == 0
.