From:Davi Arnaut Date:September 17 2010 8:34pm
Subject:bzr commit into mysql-5.5 branch (davi:3093) Bug#52419
# At a local mysql-5.5 repository of davi

 3093 Davi Arnaut	2010-09-17
      Bug#52419: x86 assembly based atomic CAS causes test failures
      The problem was that the x86 assembly based atomic CAS
      (compare and swap) implementation could copy the wrong
      value to the ebx register, where the cmpxchg8b expects
      to see part of the "comparand" value. Since the original
      value in the ebx register is saved in the stack (that is,
      the push instruction causes the stack pointer to change),
      a wrong offset could be used if the compiler decides to
      put the source of the comparand value in the stack.
      The solution is to copy the comparand value directly from
      memory. Since the comparand value is 64-bits wide, it is
      copied in two steps over to the ebx and ecx registers.
     @ include/atomic/x86-gcc.h
        For reference, an excerpt from a faulty binary follows.
        It is a disassembly of my_atomic-t, compiled at -O3 with
        ICC 11.0. Most of the code deals with preparations for
        a atomic cmpxchg8b operation. This instruction compares
        the value in edx:eax with the destination operand. If the
        values are equal, the value in ecx:ebx is stored in the
        destination, otherwise the value in the destination operand
        is copied into edx:eax.
        In this case, my_atomic_add64 is implemented as a compare
        and exchange. The addition is done over temporary storage
        and loaded into the destination if the original term value
        is still valid.
          volatile int64 a64;
          int64 b=0x1000200030004000LL;
              mov    0xfffffda8(%ebx),%eax
              xor    %ebp,%ebp
              mov    %ebp,(%eax)
              mov    %ebp,0x4(%eax)
          my_atomic_add64(&a64, b);
              mov    0xfffffda8(%ebx),%ebp      # Load address of a64
              mov    0x0(%ebp),%edx             # Copy value
              mov    0x4(%ebp),%ecx
              mov    %edx,0xc(%esp)             # Assign to tmp var in the stack
              mov    %ecx,0x10(%esp)
              add    $0x30004000,%edx           # Sum values
              adc    $0x10002000,%ecx
              mov    %edx,0x8(%esp)             # Save part of result for later
              mov    0x0(%ebp),%esi             # Copy value of a64 again
              mov    0x4(%ebp),%edi
              mov    0xc(%esp),%eax             # Load the value of a64 used
              mov    0x10(%esp),%edx            # for comparison
              mov    %esi,(%esp)
              mov    %edi,0x4(%esp)
              push   %ebx                       # Push %ebx into stack. Changes esp.
              mov    0x8(%esp),%ebx             # Wrong restore of the result.
              lock cmpxchg8b 0x0(%ebp)
              sete   %cl
              pop    %ebx

=== modified file 'include/atomic/x86-gcc.h'
--- a/include/atomic/x86-gcc.h	2010-07-23 12:37:10 +0000
+++ b/include/atomic/x86-gcc.h	2010-09-17 20:34:15 +0000
@@ -111,9 +111,9 @@
   On some platforms (e.g. Mac OS X and Solaris) the ebx register
   is held as a pointer to the global offset table. Thus we're not
   allowed to use the b-register on those platforms when compiling
-  PIC code, to avoid this we push ebx and pop ebx and add a movl
-  instruction to avoid having ebx in the interface of the assembler
-  instruction.
+  PIC code, to avoid this we push ebx and pop ebx. The new value
+  is copied directly from memory to avoid problems with a implicit
+  manipulation of the stack pointer by the push.
   cmpxchg8b works on both 32-bit platforms and 64-bit platforms but
   the code here is only used on 32-bit platforms, on 64-bit
@@ -121,11 +121,13 @@
 #define make_atomic_cas_body64                                    \
-  int32 ebx=(set & 0xFFFFFFFF), ecx=(set >> 32);                  \
-  asm volatile ("push %%ebx; movl %3, %%ebx;"                     \
-                LOCK_prefix "; cmpxchg8b %0; setz %2; pop %%ebx"  \
-                : "=m" (*a), "+A" (*cmp), "=c" (ret)               \
-                : "m" (ebx), "c" (ecx), "m" (*a)                  \
+  asm volatile ("push %%ebx;"                                     \
+                "movl (%%ecx), %%ebx;"                            \
+                "movl 4(%%ecx), %%ecx;"                           \
+                LOCK_prefix "; cmpxchg8b %0;"                     \
+                "setz %2; pop %%ebx"                              \
+                : "=m" (*a), "+A" (*cmp), "=c" (ret)              \
+                : "c" (&set), "m" (*a)                            \
                 : "memory", "esp")

