I have a setup where I have a couple dozen web workers postMessage
requests to the main thread and then synchronously block for the response:
Since this is synchronous, the web worker never makes a postMessage
request until it has successfully extracted the response from the shared array buffer. However, what I'm observing is that very rarely (after thousands of messages), the SharedArrayBuffer has a corrupted response in it. I was able to exacerbate the problem so it happens pretty frequently with this 80-line reproducible example:
//////////////////// WORKER
function workerFn() {
const sab = new SharedArrayBuffer(1024 * 1024) // [0:4 Atomics.wait signal][4:8 data length][8:8+data length JSON data]
const vi32 = new Int32Array(sab); // View needed for Atomics.wait
const vui8 = new Uint8Array(sab); // View needed for TextDecoder
const sbuf = { sab, vi32, vui8 };
postMessage({ type: "sab", sab });
let pl = 0;
while (true) {
postMessage({ type: "sync", payload: pl++ });
// This ostensibly synchronously blocks until the first int32 of the SharedArrayBuffer changes
// The main thread is responsible for changing this value and calling Atomics.notify()
Atomics.wait(sbuf.vi32, 0, 0); // Wait for expected value to change
Atomics.store(sbuf.vi32, 0, 0); // Reset expected value to 0 for next iteration
// The data is JSON as utf-8 encoded uint8
let data_length = sbuf.vi32[1];
let data = new TextDecoder().decode(sbuf.vui8.slice(8, 8 + data_length)); // 8 byte offset for header
let m;
try {
m = JSON.parse(data);
} catch (e) {
// This should never happen, yet it does
throw new Error("How is this possible? Bad JSON:" + data);
}
if (m.cooldown > 0) {
// Since this should never change until the next postMessage, we should be able to wait on it
Atomics.wait(sbuf.vi32, 0, 0, m.cooldown); // Sometimes this returns something other than "timed-out" which should be impossible!
}
}
}
//////////////////// MAIN THREAD
let processedMessages = 0;
function onWorkerMessage(workerName, data) {
if (data.type === 'sab') {
console.log('Received SAB from', workerName)
workers[workerName].sbuf = {
sab: data.sab,
vi32: new Int32Array(data.sab), // View needed for Atomics.store
vui8: new Uint8Array(data.sab), // View needed for TextEncoder
};
} else if (data.type === 'sync') {
processedMessages++;
if (processedMessages % 10000 === 0) {
console.log('Processed', processedMessages, 'messages')
}
// Do a little fake work
for (let i = 0; i < 100; i++)
Math.random();
// Send a message back to the worker
let m = { rv: data.payload % 2 === 0, cooldown: data.payload % 2 === 0 ? 0 : 0.5 };
const rui8 = new TextEncoder().encode(JSON.stringify(m));
const sbuf = workers[workerName].sbuf;
Atomics.store(sbuf.vi32, 1, rui8.length);
sbuf.vui8.set(rui8, 8);
// Signal the worker that the data is ready
Atomics.store(sbuf.vi32, 0, 1);
Atomics.notify(sbuf.vi32, 0);
}
}
//////////////////// INIT
let workers = {}
for (let i = 0; i < 20; i++) {
console.log('Starting worker', i)
let wf = workerFn.toString();
wf = wf.substring(wf.indexOf('{') + 1, wf.lastIndexOf('}'))
const blob = new Blob([wf], { type: 'application/javascript' })
const worker = new Worker(URL.createObjectURL(blob), { name: `worker${i}` })
worker.onmessage = e => onWorkerMessage(`worker${i}`, e.data)
workers[`worker${i}`] = worker
}
In order to run this code, it has to be served from a web server that sets specific response headers to allow SharedArrayBuffer, which can be done with this python script:
#!/usr/bin/env python
from http import server
class MyHTTPRequestHandler(server.SimpleHTTPRequestHandler):
def end_headers(self):
self.send_my_headers()
server.SimpleHTTPRequestHandler.end_headers(self)
def send_my_headers(self):
self.send_header("Cross-Origin-Opener-Policy", "same-origin")
self.send_header("Cross-Origin-Embedder-Policy", "require-corp")
if __name__ == "__main__":
server.test(HandlerClass=MyHTTPRequestHandler)
Sample HTML:
<!DOCTYPE html>
<html><script src="./atomics_bug.js"></script></html>
If you run this, you'll see output on the console like this:
This behavior happens in both Firefox and Chrome.
I've gone though this minimal snippet for hours and it seems airtight to me, which leads me to believe that it is either a misunderstanding on my part or (less likely) a bug in JavaScript itself.
Another clue is that if I modify the code so the cooldown section has this:
if (m.cooldown > 0) {
// Since this should never change until the next postMessage, we should be able to wait on it
let rv = Atomics.wait(sbuf.vi32, 0, 0, m.cooldown);
if (rv !== 'timed-out') {
// !!! This should never happen, yet it does
throw new Error("How is this possible? Atomics.wait returned: " + rv);
}
}
Then I see lots of console errors that the Atomics.wait
in the cooldown condition is not actually timing out, which again should be impossible since nothing should be able to modify sbuf.vi32[0]
until the next postMessage
has been sent.
Deleting the entire if (m.cooldown > 0)
block makes the issue unreproducible, so it's clearly at the crux, which is leading me to question whether Atomics.wait
has some gotcha or undefined behavior or a bug in its implementation.
The fundamental issue is a misunderstanding of what can cause Atomics.wait()
to return which value.
As of 21 Jan 2025, the Atomics.wait() docs on MDN say in the examples section:
A reading thread is sleeping and waiting on location 0 which is expected to be 0. As long as that is true, it will not go on.
However, this is not exactly true. It turns out that Atomics.notify()
will always cause a sleeping Atomics.wait()
to return ok
, even if the expected value has not changed:
"ok"
is returned if woken up by a call to Atomics.notify()
, regardless of if the expected value has changed"not-equal"
is returned immediately if the initial value
does not equal what is stored at index
"timed-out"
is returned if a sleeping wait exceeds the specified timeout
without being woken up by Atomics.notify()
Thus, the following execution interleaving can cause the observed behavior:
Main Worker
postMessage();
Atomics.store(sbuf.vi32, 0, 1);
Atomics.wait(sbuf.vi32, 0, 0); // No wait (not-equal)
Atomics.store(sbuf.vi32, 0, 0);
m = JSON.parse(data);
// skip cooldown because (m.cooldown === 0)
postMessage();
Atomics.wait(sbuf.vi32, 0, 0); // Wait
Atomics.notify(sbuf.vi32, 0);
...
Atomics.store(sbuf.vi32, 1, rui8.length);
m = JSON.parse(data); // Length has been updated, but not the data, this will fail!
In an effort to build a bridge for those that follow me, I opened a PR to update MDN docs here: https://github.com/mdn/content/pull/37742