jiaoyk · April 30, 2023 05:23 · Dec 22, 2019
diff --git a/ordering.cpp b/ordering.cpp
@@ -0,0 +1,231 @@
+// Jeff Preshing's example that shows the reordering of Intel CPUs made to
+// work on both Linux and macOS.
+//
+// Original source:
+//
+//   https://preshing.com/20120515/memory-reordering-caught-in-the-act/
+//
+// Apple semaphore change is based on the answer by dho at:
+//
+//   https://stackoverflow.com/questions/27736618/why-are-sem-init-sem-getvalue-sem-destroy-deprecated-on-mac-os-x-and-w
+//
+
+#include <errno.h>
+#include <pthread.h>
+#include <stdint.h>
+#include <stdio.h>
+
+// Set either of these to 1 to prevent CPU reordering
+#define USE_CPU_FENCE         0
+#define USE_SINGLE_HW_THREAD  0  // Supported on Linux, but not Cygwin or PS3
+
+#if USE_SINGLE_HW_THREAD
+#include <sched.h>
+#endif
+
+#ifdef __APPLE__
+#include <dispatch/dispatch.h>
+#else
+#include <semaphore.h>
+#endif
+
+struct rk_sema {
+#ifdef __APPLE__
+    dispatch_semaphore_t    sem;
+#else
+    sem_t                   sem;
+#endif
+};
+
+
+static inline void
+rk_sema_init(struct rk_sema *s, uint32_t value)
+{
+#ifdef __APPLE__
+    dispatch_semaphore_t *sem = &s->sem;
+
+    *sem = dispatch_semaphore_create(value);
+#else
+    sem_init(&s->sem, 0, value);
+#endif
+}
+
+static inline void
+rk_sema_wait(struct rk_sema *s)
+{
+
+#ifdef __APPLE__
+    dispatch_semaphore_wait(s->sem, DISPATCH_TIME_FOREVER);
+#else
+    int r;
+
+    do {
+            r = sem_wait(&s->sem);
+    } while (r == -1 && errno == EINTR);
+#endif
+}
+
+static inline void
+rk_sema_post(struct rk_sema *s)
+{
+
+#ifdef __APPLE__
+    dispatch_semaphore_signal(s->sem);
+#else
+    sem_post(&s->sem);
+#endif
+}
+
+
+//-------------------------------------
+//  MersenneTwister
+//  A thread-safe random number generator with good randomness
+//  in a small number of instructions. We'll use it to introduce
+//  random timing delays.
+//-------------------------------------
+#define MT_IA  397
+#define MT_LEN 624
+
+class MersenneTwister
+{
+    unsigned int m_buffer[MT_LEN];
+    int m_index;
+
+public:
+    MersenneTwister(unsigned int seed);
+    // Declare noinline so that the function call acts as a compiler barrier:
+    unsigned int integer() __attribute__((noinline));
+};
+
+MersenneTwister::MersenneTwister(unsigned int seed)
+{
+    // Initialize by filling with the seed, then iterating
+    // the algorithm a bunch of times to shuffle things up.
+    for (int i = 0; i < MT_LEN; i++)
+        m_buffer[i] = seed;
+    m_index = 0;
+    for (int i = 0; i < MT_LEN * 100; i++)
+        integer();
+}
+
+unsigned int MersenneTwister::integer()
+{
+    // Indices
+    int i = m_index;
+    int i2 = m_index + 1; if (i2 >= MT_LEN) i2 = 0; // wrap-around
+    int j = m_index + MT_IA; if (j >= MT_LEN) j -= MT_LEN; // wrap-around
+
+    // Twist
+    unsigned int s = (m_buffer[i] & 0x80000000) | (m_buffer[i2] & 0x7fffffff);
+    unsigned int r = m_buffer[j] ^ (s >> 1) ^ ((s & 1) * 0x9908B0DF);
+    m_buffer[m_index] = r;
+    m_index = i2;
+
+    // Swizzle
+    r ^= (r >> 11);
+    r ^= (r << 7) & 0x9d2c5680UL;
+    r ^= (r << 15) & 0xefc60000UL;
+    r ^= (r >> 18);
+    return r;
+}
+
+
+//-------------------------------------
+//  Main program, as decribed in the post
+//-------------------------------------
+rk_sema beginSema1;
+rk_sema beginSema2;
+rk_sema endSema;
+
+int X;
+int Y;
+int r1, r2;
+
+void *thread1Func(void *param)
+{
+    MersenneTwister random(1);
+    for (;;)
+    {
+        rk_sema_wait(&beginSema1);  // Wait for signal
+        while (random.integer() % 8 != 0) {}  // Random delay
+
+        // ----- THE TRANSACTION! -----
+        X = 1;
+#if USE_CPU_FENCE
+        asm volatile("mfence" ::: "memory");  // Prevent CPU reordering
+#else
+        asm volatile("" ::: "memory");  // Prevent compiler reordering
+#endif
+        r1 = Y;
+
+        rk_sema_post(&endSema);  // Notify transaction complete
+    }
+    return NULL;  // Never returns
+};
+
+void *thread2Func(void *param)
+{
+    MersenneTwister random(2);
+    for (;;)
+    {
+        rk_sema_wait(&beginSema2);  // Wait for signal
+        while (random.integer() % 8 != 0) {}  // Random delay
+
+        // ----- THE TRANSACTION! -----
+        Y = 1;
+#if USE_CPU_FENCE
+        asm volatile("mfence" ::: "memory");  // Prevent CPU reordering
+#else
+        asm volatile("" ::: "memory");  // Prevent compiler reordering
+#endif
+        r2 = X;
+
+        rk_sema_post(&endSema);  // Notify transaction complete
+    }
+    return NULL;  // Never returns
+};
+
+int main()
+{
+    // Initialize the semaphores
+    rk_sema_init(&beginSema1, 0);
+    rk_sema_init(&beginSema2, 0);
+    rk_sema_init(&endSema, 0);
+
+    // Spawn the threads
+    pthread_t thread1, thread2;
+    pthread_create(&thread1, NULL, thread1Func, NULL);
+    pthread_create(&thread2, NULL, thread2Func, NULL);
+
+#if USE_SINGLE_HW_THREAD
+    // Force thread affinities to the same cpu core.
+    cpu_set_t cpus;
+    CPU_ZERO(&cpus);
+    CPU_SET(0, &cpus);
+    pthread_setaffinity_np(thread1, sizeof(cpu_set_t), &cpus);
+    pthread_setaffinity_np(thread2, sizeof(cpu_set_t), &cpus);
+#endif
+
+    // Repeat the experiment ad infinitum
+    int detected = 0;
+    for (int iterations = 1; ; iterations++)
+    {
+        // Reset X and Y
+        X = 0;
+        Y = 0;
+        // Signal both threads
+        rk_sema_post(&beginSema1);
+        rk_sema_post(&beginSema2);
+        // Wait for both threads
+        rk_sema_wait(&endSema);
+        rk_sema_wait(&endSema);
+        // Check if there was a simultaneous reorder
+        if (r1 == 0 && r2 == 0)
+        {
+            detected++;
+            printf("%d reorders detected after %d iterations\n", detected,
+                   iterations);
+        }
+    }
+    return 0;  // Never returns
+}