After learning std::atomic
and std::memory_order
I wanted to experiment with writing a thread safe shared_ptr<T>
and weak_ptr<T>
implementation using atomics based on Microsoft Blog: Inside STL: Smart pointers. I'm not sure what memory order to use when implementing weak_ptr<T>::lock()
for the compare_exchange_weak
loop.
Control Block:
class control_block {
public:
constexpr control_block() : strong(1), weak(1) {}
void inc_strong() { strong.fetch_add(1, std::memory_order_relaxed); }
void inc_weak() { weak.fetch_add(1, std::memory_order_relaxed); }
int32_t dec_strong() { return strong.fetch_sub(1, std::memory_order_release); }
int32_t dec_weak() { return weak.fetch_sub(1, std::memory_order_release); }
int32_t use_count() { return strong.load(std::memory_order_relaxed); }
public:
std::atomic<int32_t> strong;
std::atomic<int32_t> weak;
};
Shared Pointer
template<class T>
class shared_ptr {
public:
using element_type = T;
using weak_type = weak_ptr<T>;
friend class weak_ptr<T>;
public:
constexpr shared_ptr() noexcept: object(nullptr), blk(nullptr) {}
explicit shared_ptr(T *ptr) : object(ptr), blk(new control_block()) {}
shared_ptr(const shared_ptr &other) noexcept {
object = other.object;
blk = other.blk;
if (blk) {
blk->inc_strong();
}
}
shared_ptr(shared_ptr &&other) noexcept {
object = other.object;
blk = other.blk;
other.object = nullptr;
other.blk = nullptr;
}
shared_ptr<T> &operator=(const shared_ptr &other) noexcept {
if (this == &other) return *this;
release_strong_ref();
object = other.object;
blk = other.blk;
if (blk) {
blk->inc_strong();
}
return *this;
};
shared_ptr<T> &operator=(shared_ptr &&other) noexcept {
if (this == &other) return *this;
object = other.object;
blk = other.blk;
other.object = nullptr;
other.blk = nullptr;
return *this;
};
~shared_ptr() {
release_strong_ref();
}
public:
explicit operator bool() const noexcept { return blk != nullptr; }
[[nodiscard]] element_type *get() const noexcept { return object; }
[[nodiscard]] int32_t use_count() const noexcept { return blk ? blk->use_count() : 0; }
T &operator*() const noexcept { return *object; }
T *operator->() const noexcept { return object; }
private:
void release_strong_ref() noexcept {
if (!blk) return;
if (blk->dec_strong() == 1) {
std::atomic_thread_fence(std::memory_order_acquire);
delete object;
if (blk->dec_weak() == 1) {
std::atomic_thread_fence(std::memory_order_acquire);
delete blk;
}
}
}
private:
T *object;
control_block *blk;
};
and Weak Pointer:
template<class T>
class weak_ptr {
public:
using element_type = T;
friend class shared_ptr<T>;
public:
constexpr weak_ptr() noexcept: object(nullptr), blk(nullptr) {};
weak_ptr(const weak_ptr &other) noexcept {
release_weak_ref();
object = other.object;
blk = other.blk;
if (blk) {
blk->inc_weak();
}
}
weak_ptr(weak_ptr &&other) noexcept {
object = other.object;
blk = other.blk;
other.object = nullptr;
other.blk = nullptr;
}
template<class Y>
weak_ptr(const shared_ptr<Y> &other) noexcept {
if (!other) {
object = nullptr;
blk = nullptr;
return;
}
object = other.object;
blk = other.blk;
blk->inc_weak();
}
~weak_ptr() {
release_weak_ref();
}
template<class Y>
weak_ptr<T> &operator=(const shared_ptr<Y> &other) noexcept {
release_weak_ref();
if (!other) {
object = nullptr;
blk = nullptr;
return *this;
}
object = other.object;
blk = other.blk;
blk->inc_weak();
return *this;
};
weak_ptr<T> &operator=(const weak_ptr &other) noexcept {
if (this == &other) return *this;
release_weak_ref();
object = other.object;
blk = other.blk;
if (blk) {
blk->inc_weak();
}
return *this;
};
weak_ptr<T> &operator=(weak_ptr &&other) noexcept {
if (this == &other) return *this;
object = other.object;
blk = other.blk;
other.object = nullptr;
other.blk = nullptr;
return *this;
};
public:
explicit operator bool() const noexcept { return blk != nullptr; }
shared_ptr<T> lock() noexcept {
if (!blk) return shared_ptr<T>();
int32_t old = blk->strong.load(std::memory_order_relaxed);
while (old > 0) {
if (blk->strong.compare_exchange_weak(old, old + 1, std::memory_order_acquire)) {
shared_ptr<T> ptr;
ptr.object = object;
ptr.blk = blk;
return ptr;
}
old = blk->strong.load(std::memory_order_relaxed);
}
return shared_ptr<T>();
}
[[nodiscard]]int32_t use_count() const noexcept { return blk ? blk->use_count() : 0; }
[[nodiscard]]bool expired() const noexcept { return use_count() == 0; }
private:
void release_weak_ref() noexcept {
if (!blk) return;
if (blk->dec_weak() == 1) {
std::atomic_thread_fence(std::memory_order_acquire);
delete blk;
}
}
private:
T *object;
control_block *blk;
};
When doing the compare exchange loop to check if the strong reference is non zero, what should be the success and failure memory order? And, in the destructor for the shared_ptr<T>
and weak_ptr<T>
, is the memory order of the std::atomic_thread_fence(std::memory_order_acquire)
correct to fence the deletion of the control block and the object?
I have tried using it as a drop in replacement in some multithreaded app on x86 and it seems to behave fine. But I'm worried due to the memory arch of other platforms, there are bugs within this code that is not showing up.
On a further note: How would one go about testing the threading safety beyond trying to reason about the memory order.
This is only a partial answer to the problem:
You can use libstdc++ as reference (code reformatted):
template<> inline bool _Sp_counted_base<_S_atomic>::_M_add_ref_lock_nothrow() noexcept { // Perform lock-free add-if-not-zero operation. _Atomic_word __count = _M_get_use_count(); do { if (__count == 0) return false; // Replace the current counter value with the old value + 1, as // long as it's not changed meanwhile. } while (!__atomic_compare_exchange_n(&_M_use_count, &__count, __count + 1, true, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED)); return true; }
Firstly, note that you have a pointless load
operation in your loop:
old = blk->strong.load(std::memory_order_relaxed);
When std::compare_exchange
fails, the current value is loaded into old
anyway, so this load does nothing for you.
Secondly:
relaxed
order. The compare_exchange
is a read-modify-write operation anyway and you don't do anything with the value between loading it and retrying the compare_exchange
, so any stronger memory order is useless.relaxed
seems sufficient to me (MSVC uses relaxed
here, although libstdc++ uses acq_rel
). I'm not confident in saying whether acq_rel
is absolutely necessary, but you cold use it to be on the safe side.