2 * Copyright (C) 2005-2009 Team XBMC
5 * This Program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2, or (at your option)
10 * This Program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with XBMC; see the file COPYING. If not, see
17 * <http://www.gnu.org/licenses/>.
23 // the only safe way to be absolutly sure that
24 // gcc intrinsics are present when using an unknown GCC
25 #if defined(__GNUC__) && defined(__GNUC_MINOR__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4))
26 #define HAS_GCC_INTRINSICS
27 #elif defined(TARGET_DARWIN)
28 // safe under darwin gcc-4.2, llvm-gcc-4.2 and clang
29 #define HAS_GCC_INTRINSICS
30 #elif defined(TARGET_FREEBSD)
31 // safe under freebsd gcc-4.2 and clang
32 #define HAS_GCC_INTRINSICS
34 ///////////////////////////////////////////////////////////////////////////
35 // 32-bit atomic compare-and-swap
36 // Returns previous value of *pAddr
37 ///////////////////////////////////////////////////////////////////////////
38 long cas(volatile long *pAddr, long expectedVal, long swapVal)
40 #if defined(HAS_GCC_INTRINSICS)
41 return(__sync_val_compare_and_swap(pAddr, expectedVal, swapVal));
42 #elif defined(__ppc__) || defined(__powerpc__) // PowerPC
44 __asm__ __volatile__ (
45 " 1: lwarx %0,0,%2 \n" /* Load the current value of *pAddr(%2) into prev (%0) and lock pAddr, */
46 " cmpw 0,%0,%3 \n" /* Verify that the current value (%2) == old value (%3) */
47 " bne- 2f \n" /* Bail if the two values are not equal [not as expected] */
48 " stwcx. %4,0,%2 \n" /* Attempt to store swapVal (%4) value into *pAddr (%2) [p must still be reserved] */
49 " bne- 1b \n" /* Loop if p was no longer reserved */
50 " isync \n" /* Reconcile multiple processors [if present] */
52 : "=&r" (prev), "+m" (*pAddr) /* Outputs [prev, *pAddr] */
53 : "r" (pAddr), "r" (expectedVal), "r" (swapVal) /* Inputs [pAddr, expectedVal, swapVal] */
54 : "cc", "memory"); /* Clobbers */
57 #elif defined(__arm__)
60 "dmb ish \n" // Memory barrier. Make sure all memory accesses appearing before this complete before any that appear after
62 "ldrex %0, [%1] \n" // Load the current value of *pAddr(%1) into prev (%0) and lock pAddr,
63 "cmp %0, %2 \n" // Verify that the current value (%0) == old value (%2)
64 "bne 2f \n" // Bail if the two values are not equal [not as expected]
65 "strex r1, %3, [%1] \n"
68 "dmb ish \n" // Memory barrier.
71 : "r"(pAddr), "r"(expectedVal),"r"(swapVal)
76 #elif defined(__mips__)
79 #error atomic cas undefined for mips
87 mov eax, expectedVal ;
92 lock cmpxchg dword ptr [ebx], ecx ;
94 // Store the return value
99 #else // Linux / OSX86 (GCC)
101 __asm__ __volatile__ (
102 "lock/cmpxchg %1, %2"
104 : "r" (swapVal), "m" (*pAddr), "0" (expectedVal)
111 ///////////////////////////////////////////////////////////////////////////
112 // 64-bit atomic compare-and-swap
113 // Returns previous value of *pAddr
114 ///////////////////////////////////////////////////////////////////////////
115 long long cas2(volatile long long* pAddr, long long expectedVal, long long swapVal)
117 #if defined(__ppc__) || defined(__powerpc__) || defined(__arm__) || defined(__mips__) // PowerPC, ARM, and MIPS
118 // Not available/required
119 // Hack to allow compilation
120 throw "cas2 is not implemented";
127 mov eax, dword ptr [expectedVal] ;
128 mov edx, dword ptr expectedVal[4] ;
129 mov ebx, dword ptr [swapVal] ;
130 mov ecx, dword ptr swapVal[4] ;
131 lock cmpxchg8b qword ptr [esi] ;
132 mov dword ptr [prev], eax ;
133 mov dword ptr prev[4], edx ;
137 #else // Linux / OSX86 (GCC)
138 #if !defined (__x86_64)
141 " push %%ebx \n" // We have to manually handle ebx, because PIC uses it and the compiler refuses to build anything that touches it
143 " lock/cmpxchg8b (%%esi) \n"
146 : "c" ((unsigned long)(swapVal >> 32)), "0" (expectedVal), "S" (pAddr), "m" (swapVal)
150 // Hack to allow compilation on x86_64
151 throw "cas2 is not implemented on x86_64!";
156 ///////////////////////////////////////////////////////////////////////////
157 // 32-bit atomic increment
158 // Returns new value of *pAddr
159 ///////////////////////////////////////////////////////////////////////////
160 long AtomicIncrement(volatile long* pAddr)
162 #if defined(HAS_GCC_INTRINSICS)
163 return __sync_add_and_fetch(pAddr, 1);
165 #elif defined(__ppc__) || defined(__powerpc__) // PowerPC
167 __asm__ __volatile__ (
169 "1: lwarx %0, 0, %1 \n"
171 "stwcx. %0, 0, %1 \n"
176 : "cc", "xer", "memory");
179 #elif defined(__arm__) && !defined(__ARM_ARCH_5__)
182 "dmb ish \n" // Memory barrier. Make sure all memory accesses appearing before this complete before any that appear after
184 "ldrex %0, [%1] \n" // (val = *pAddr)
185 "add %0, #1 \n" // (val += 1)
186 "strex r1, %0, [%1] \n"
189 "dmb ish \n" // Memory barrier.
196 #elif defined(__mips__)
199 #error AtomicIncrement undefined for mips
207 lock inc dword ptr [eax] ;
213 #elif defined(__x86_64__)
214 register long result;
215 __asm__ __volatile__ (
217 : "=r" (result), "=m" (*pAddr)
218 : "0" ((long) (1)), "m" (*pAddr));
221 #else // Linux / OSX86 (GCC)
222 register long reg __asm__ ("eax") = 1;
223 __asm__ __volatile__ (
224 "lock/xadd %0, %1 \n"
234 ///////////////////////////////////////////////////////////////////////////
236 // Returns new value of *pAddr
237 ///////////////////////////////////////////////////////////////////////////
238 long AtomicAdd(volatile long* pAddr, long amount)
240 #if defined(HAS_GCC_INTRINSICS)
241 return __sync_add_and_fetch(pAddr, amount);
243 #elif defined(__ppc__) || defined(__powerpc__) // PowerPC
245 __asm__ __volatile__ (
247 "1: lwarx %0, 0, %1 \n"
249 "stwcx. %0, 0, %1 \n"
253 : "r" (pAddr), "r" (amount)
257 #elif defined(__arm__) && !defined(__ARM_ARCH_5__)
260 "dmb ish \n" // Memory barrier. Make sure all memory accesses appearing before this complete before any that appear after
262 "ldrex %0, [%1] \n" // (val = *pAddr)
263 "add %0, %2 \n" // (val += amount)
264 "strex r1, %0, [%1] \n"
267 "dmb ish \n" // Memory barrier.
269 : "r"(pAddr), "r"(amount)
274 #elif defined(__mips__)
277 #error AtomicAdd undefined for mips
285 lock xadd dword ptr [ebx], eax;
291 #elif defined(__x86_64__)
292 register long result;
293 __asm__ __volatile__ (
295 : "=r" (result), "=m" (*pAddr)
296 : "0" ((long) (amount)), "m" (*pAddr));
299 #else // Linux / OSX86 (GCC)
300 register long reg __asm__ ("eax") = amount;
301 __asm__ __volatile__ (
302 "lock/xadd %0, %1 \n"
312 ///////////////////////////////////////////////////////////////////////////
313 // 32-bit atomic decrement
314 // Returns new value of *pAddr
315 ///////////////////////////////////////////////////////////////////////////
316 long AtomicDecrement(volatile long* pAddr)
318 #if defined(HAS_GCC_INTRINSICS)
319 return __sync_sub_and_fetch(pAddr, 1);
321 #elif defined(__ppc__) || defined(__powerpc__) // PowerPC
323 __asm__ __volatile__ (
325 "1: lwarx %0, 0, %1 \n"
326 "addic %0, %0, -1 \n"
327 "stwcx. %0, 0, %1 \n"
332 : "cc", "xer", "memory");
335 #elif defined(__arm__)
338 "dmb ish \n" // Memory barrier. Make sure all memory accesses appearing before this complete before any that appear after
340 "ldrex %0, [%1] \n" // (val = *pAddr)
341 "sub %0, #1 \n" // (val -= 1)
342 "strex r1, %0, [%1] \n"
345 "dmb ish \n" // Memory barrier.
352 #elif defined(__mips__)
355 #error AtomicDecrement undefined for mips
363 lock dec dword ptr [eax] ;
369 #elif defined(__x86_64__)
370 register long result;
371 __asm__ __volatile__ (
373 : "=r" (result), "=m" (*pAddr)
374 : "0" ((long) (-1)), "m" (*pAddr));
377 #else // Linux / OSX86 (GCC)
378 register long reg __asm__ ("eax") = -1;
379 __asm__ __volatile__ (
380 "lock/xadd %0, %1 \n"
390 ///////////////////////////////////////////////////////////////////////////
391 // 32-bit atomic subtract
392 // Returns new value of *pAddr
393 ///////////////////////////////////////////////////////////////////////////
394 long AtomicSubtract(volatile long* pAddr, long amount)
396 #if defined(HAS_GCC_INTRINSICS)
397 return __sync_sub_and_fetch(pAddr, amount);
399 #elif defined(__ppc__) || defined(__powerpc__) // PowerPC
402 __asm__ __volatile__ (
404 "1: lwarx %0, 0, %1 \n"
406 "stwcx. %0, 0, %1 \n"
410 : "r" (pAddr), "r" (amount)
414 #elif defined(__arm__)
417 "dmb ish \n" // Memory barrier. Make sure all memory accesses appearing before this complete before any that appear after
419 "ldrex %0, [%1] \n" // (val = *pAddr)
420 "sub %0, %2 \n" // (val -= amount)
421 "strex r1, %0, [%1] \n"
424 "dmb ish \n" // Memory barrier.
426 : "r"(pAddr), "r"(amount)
431 #elif defined(__mips__)
433 #error AtomicSubtract undefined for mips
442 lock xadd dword ptr [ebx], eax;
448 #elif defined(__x86_64__)
449 register long result;
450 __asm__ __volatile__ (
452 : "=r" (result), "=m" (*pAddr)
453 : "0" ((long) (-1 * amount)), "m" (*pAddr));
456 #else // Linux / OSX86 (GCC)
457 register long reg __asm__ ("eax") = -1 * amount;
458 __asm__ __volatile__ (
459 "lock/xadd %0, %1 \n"
469 ///////////////////////////////////////////////////////////////////////////
470 // Fast spinlock implmentation. No backoff when busy
471 ///////////////////////////////////////////////////////////////////////////
472 CAtomicSpinLock::CAtomicSpinLock(long& lock) : m_Lock(lock)
474 while (cas(&m_Lock, 0, 1) != 0) {} // Lock
476 CAtomicSpinLock::~CAtomicSpinLock()
478 m_Lock = 0; // Unlock