2 * Copyright (C) 2005-2009 Team XBMC
5 * This Program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2, or (at your option)
10 * This Program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with XBMC; see the file COPYING. If not, write to
17 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
18 * http://www.gnu.org/copyleft/gpl.html
24 // the only safe way to be absolutly sure that
25 // gcc intrinsics are present when using an unknown GCC
26 #if defined(__GNUC__) && defined(__GNUC_MINOR__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4))
27 #define HAS_GCC_INTRINSICS
28 #elif defined(TARGET_DARWIN)
29 // safe under darwin gcc-4.2, llvm-gcc-4.2 and clang
30 #define HAS_GCC_INTRINSICS
31 #elif defined(TARGET_FREEBSD)
32 // safe under freebsd gcc-4.2 and clang
33 #define HAS_GCC_INTRINSICS
35 ///////////////////////////////////////////////////////////////////////////
36 // 32-bit atomic compare-and-swap
37 // Returns previous value of *pAddr
38 ///////////////////////////////////////////////////////////////////////////
39 long cas(volatile long *pAddr, long expectedVal, long swapVal)
41 #if defined(HAS_GCC_INTRINSICS)
42 return(__sync_val_compare_and_swap(pAddr, expectedVal, swapVal));
43 #elif defined(__ppc__) || defined(__powerpc__) // PowerPC
45 __asm__ __volatile__ (
46 " 1: lwarx %0,0,%2 \n" /* Load the current value of *pAddr(%2) into prev (%0) and lock pAddr, */
47 " cmpw 0,%0,%3 \n" /* Verify that the current value (%2) == old value (%3) */
48 " bne- 2f \n" /* Bail if the two values are not equal [not as expected] */
49 " stwcx. %4,0,%2 \n" /* Attempt to store swapVal (%4) value into *pAddr (%2) [p must still be reserved] */
50 " bne- 1b \n" /* Loop if p was no longer reserved */
51 " isync \n" /* Reconcile multiple processors [if present] */
53 : "=&r" (prev), "+m" (*pAddr) /* Outputs [prev, *pAddr] */
54 : "r" (pAddr), "r" (expectedVal), "r" (swapVal) /* Inputs [pAddr, expectedVal, swapVal] */
55 : "cc", "memory"); /* Clobbers */
58 #elif defined(__arm__)
61 "dmb ish \n" // Memory barrier. Make sure all memory accesses appearing before this complete before any that appear after
63 "ldrex %0, [%1] \n" // Load the current value of *pAddr(%1) into prev (%0) and lock pAddr,
64 "cmp %0, %2 \n" // Verify that the current value (%0) == old value (%2)
65 "bne 2f \n" // Bail if the two values are not equal [not as expected]
66 "strex r1, %3, [%1] \n"
69 "dmb ish \n" // Memory barrier.
72 : "r"(pAddr), "r"(expectedVal),"r"(swapVal)
77 #elif defined(__mips__)
80 #error atomic cas undefined for mips
88 mov eax, expectedVal ;
93 lock cmpxchg dword ptr [ebx], ecx ;
95 // Store the return value
100 #else // Linux / OSX86 (GCC)
102 __asm__ __volatile__ (
103 "lock/cmpxchg %1, %2"
105 : "r" (swapVal), "m" (*pAddr), "0" (expectedVal)
112 ///////////////////////////////////////////////////////////////////////////
113 // 64-bit atomic compare-and-swap
114 // Returns previous value of *pAddr
115 ///////////////////////////////////////////////////////////////////////////
116 long long cas2(volatile long long* pAddr, long long expectedVal, long long swapVal)
118 #if defined(__ppc__) || defined(__powerpc__) || defined(__arm__) || defined(__mips__) // PowerPC, ARM, and MIPS
119 // Not available/required
120 // Hack to allow compilation
121 throw "cas2 is not implemented";
128 mov eax, dword ptr [expectedVal] ;
129 mov edx, dword ptr expectedVal[4] ;
130 mov ebx, dword ptr [swapVal] ;
131 mov ecx, dword ptr swapVal[4] ;
132 lock cmpxchg8b qword ptr [esi] ;
133 mov dword ptr [prev], eax ;
134 mov dword ptr prev[4], edx ;
138 #else // Linux / OSX86 (GCC)
139 #if !defined (__x86_64)
142 " push %%ebx \n" // We have to manually handle ebx, because PIC uses it and the compiler refuses to build anything that touches it
144 " lock/cmpxchg8b (%%esi) \n"
147 : "c" ((unsigned long)(swapVal >> 32)), "0" (expectedVal), "S" (pAddr), "m" (swapVal)
151 // Hack to allow compilation on x86_64
152 throw "cas2 is not implemented on x86_64!";
157 ///////////////////////////////////////////////////////////////////////////
158 // 32-bit atomic increment
159 // Returns new value of *pAddr
160 ///////////////////////////////////////////////////////////////////////////
161 long AtomicIncrement(volatile long* pAddr)
163 #if defined(HAS_GCC_INTRINSICS)
164 return __sync_add_and_fetch(pAddr, 1);
166 #elif defined(__ppc__) || defined(__powerpc__) // PowerPC
168 __asm__ __volatile__ (
170 "1: lwarx %0, 0, %1 \n"
172 "stwcx. %0, 0, %1 \n"
177 : "cc", "xer", "memory");
180 #elif defined(__arm__) && !defined(__ARM_ARCH_5__)
183 "dmb ish \n" // Memory barrier. Make sure all memory accesses appearing before this complete before any that appear after
185 "ldrex %0, [%1] \n" // (val = *pAddr)
186 "add %0, #1 \n" // (val += 1)
187 "strex r1, %0, [%1] \n"
190 "dmb ish \n" // Memory barrier.
197 #elif defined(__mips__)
200 #error AtomicIncrement undefined for mips
208 lock inc dword ptr [eax] ;
214 #elif defined(__x86_64__)
215 register long result;
216 __asm__ __volatile__ (
218 : "=r" (result), "=m" (*pAddr)
219 : "0" ((long) (1)), "m" (*pAddr));
222 #else // Linux / OSX86 (GCC)
223 register long reg __asm__ ("eax") = 1;
224 __asm__ __volatile__ (
225 "lock/xadd %0, %1 \n"
235 ///////////////////////////////////////////////////////////////////////////
237 // Returns new value of *pAddr
238 ///////////////////////////////////////////////////////////////////////////
239 long AtomicAdd(volatile long* pAddr, long amount)
241 #if defined(HAS_GCC_INTRINSICS)
242 return __sync_add_and_fetch(pAddr, amount);
244 #elif defined(__ppc__) || defined(__powerpc__) // PowerPC
246 __asm__ __volatile__ (
248 "1: lwarx %0, 0, %1 \n"
250 "stwcx. %0, 0, %1 \n"
254 : "r" (pAddr), "r" (amount)
258 #elif defined(__arm__) && !defined(__ARM_ARCH_5__)
261 "dmb ish \n" // Memory barrier. Make sure all memory accesses appearing before this complete before any that appear after
263 "ldrex %0, [%1] \n" // (val = *pAddr)
264 "add %0, %2 \n" // (val += amount)
265 "strex r1, %0, [%1] \n"
268 "dmb ish \n" // Memory barrier.
270 : "r"(pAddr), "r"(amount)
275 #elif defined(__mips__)
278 #error AtomicAdd undefined for mips
286 lock xadd dword ptr [ebx], eax;
292 #elif defined(__x86_64__)
293 register long result;
294 __asm__ __volatile__ (
296 : "=r" (result), "=m" (*pAddr)
297 : "0" ((long) (amount)), "m" (*pAddr));
300 #else // Linux / OSX86 (GCC)
301 register long reg __asm__ ("eax") = amount;
302 __asm__ __volatile__ (
303 "lock/xadd %0, %1 \n"
313 ///////////////////////////////////////////////////////////////////////////
314 // 32-bit atomic decrement
315 // Returns new value of *pAddr
316 ///////////////////////////////////////////////////////////////////////////
317 long AtomicDecrement(volatile long* pAddr)
319 #if defined(HAS_GCC_INTRINSICS)
320 return __sync_sub_and_fetch(pAddr, 1);
322 #elif defined(__ppc__) || defined(__powerpc__) // PowerPC
324 __asm__ __volatile__ (
326 "1: lwarx %0, 0, %1 \n"
327 "addic %0, %0, -1 \n"
328 "stwcx. %0, 0, %1 \n"
333 : "cc", "xer", "memory");
336 #elif defined(__arm__)
339 "dmb ish \n" // Memory barrier. Make sure all memory accesses appearing before this complete before any that appear after
341 "ldrex %0, [%1] \n" // (val = *pAddr)
342 "sub %0, #1 \n" // (val -= 1)
343 "strex r1, %0, [%1] \n"
346 "dmb ish \n" // Memory barrier.
353 #elif defined(__mips__)
356 #error AtomicDecrement undefined for mips
364 lock dec dword ptr [eax] ;
370 #elif defined(__x86_64__)
371 register long result;
372 __asm__ __volatile__ (
374 : "=r" (result), "=m" (*pAddr)
375 : "0" ((long) (-1)), "m" (*pAddr));
378 #else // Linux / OSX86 (GCC)
379 register long reg __asm__ ("eax") = -1;
380 __asm__ __volatile__ (
381 "lock/xadd %0, %1 \n"
391 ///////////////////////////////////////////////////////////////////////////
392 // 32-bit atomic subtract
393 // Returns new value of *pAddr
394 ///////////////////////////////////////////////////////////////////////////
395 long AtomicSubtract(volatile long* pAddr, long amount)
397 #if defined(HAS_GCC_INTRINSICS)
398 return __sync_sub_and_fetch(pAddr, amount);
400 #elif defined(__ppc__) || defined(__powerpc__) // PowerPC
403 __asm__ __volatile__ (
405 "1: lwarx %0, 0, %1 \n"
407 "stwcx. %0, 0, %1 \n"
411 : "r" (pAddr), "r" (amount)
415 #elif defined(__arm__)
418 "dmb ish \n" // Memory barrier. Make sure all memory accesses appearing before this complete before any that appear after
420 "ldrex %0, [%1] \n" // (val = *pAddr)
421 "sub %0, %2 \n" // (val -= amount)
422 "strex r1, %0, [%1] \n"
425 "dmb ish \n" // Memory barrier.
427 : "r"(pAddr), "r"(amount)
432 #elif defined(__mips__)
434 #error AtomicSubtract undefined for mips
443 lock xadd dword ptr [ebx], eax;
449 #elif defined(__x86_64__)
450 register long result;
451 __asm__ __volatile__ (
453 : "=r" (result), "=m" (*pAddr)
454 : "0" ((long) (-1 * amount)), "m" (*pAddr));
457 #else // Linux / OSX86 (GCC)
458 register long reg __asm__ ("eax") = -1 * amount;
459 __asm__ __volatile__ (
460 "lock/xadd %0, %1 \n"
470 ///////////////////////////////////////////////////////////////////////////
471 // Fast spinlock implmentation. No backoff when busy
472 ///////////////////////////////////////////////////////////////////////////
473 CAtomicSpinLock::CAtomicSpinLock(long& lock) : m_Lock(lock)
475 while (cas(&m_Lock, 0, 1) != 0) {} // Lock
477 CAtomicSpinLock::~CAtomicSpinLock()
479 m_Lock = 0; // Unlock