3 * Copyright (C) 2005-2008 Team XBMC
6 * This Program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2, or (at your option)
11 * This Program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with XBMC; see the file COPYING. If not, write to
18 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
19 * http://www.gnu.org/copyleft/gpl.html
29 #include <emmintrin.h>
32 // use real compiler defines in here as we want to
33 // avoid including system.h or other magic includes.
34 // use 'gcc -dM -E - < /dev/null' or similar to find them.
36 #if defined(__ppc__) || \
37 defined(__powerpc__) || \
38 (defined(__APPLE__) && defined(__arm__) && defined(__llvm__)) || \
39 (defined(__ANDROID__) && defined(__arm__)) || \
40 defined(TARGET_RASPBERRY_PI)
41 #define DISABLE_MATHUTILS_ASM_ROUND_INT
44 #if defined(__ppc__) || \
45 defined(__powerpc__) || \
46 (defined(__APPLE__) && defined(__llvm__)) || \
47 (defined(__ANDROID__) && defined(__arm__)) || \
48 defined(TARGET_RASPBERRY_PI)
49 #define DISABLE_MATHUTILS_ASM_TRUNCATE_INT
52 /*! \brief Math utility class.
53 Note that the test() routine should return true for all implementations
55 See http://ldesoras.free.fr/doc/articles/rounding_en.pdf for an explanation
56 of the technique used on x86.
60 // GCC does something stupid with optimization on release builds if we try
61 // to assert in these functions
63 /*! \brief Round to nearest integer.
64 This routine does fast rounding to the nearest integer.
65 In the case (k + 0.5 for any integer k) we round up to k+1, and in all other
66 instances we should return the nearest integer.
67 Thus, { -1.5, -0.5, 0.5, 1.5 } is rounded to { -1, 0, 1, 2 }.
68 It preserves the property that round(k) - round(k-1) = 1 for all doubles k.
70 Make sure MathUtils::test() returns true for each implementation.
71 \sa truncate_int, test
73 inline int round_int(double x)
75 assert(x > static_cast<double>(INT_MIN / 2) - 1.0);
76 assert(x < static_cast<double>(INT_MAX / 2) + 1.0);
77 const float round_to_nearest = 0.5f;
80 #if defined(DISABLE_MATHUTILS_ASM_ROUND_INT)
81 i = floor(x + round_to_nearest);
83 #elif defined(__arm__)
84 // From 'ARM-v7-M Architecture Reference Manual' page A7-569:
85 // "The floating-point to integer operation (vcvt) [normally] uses the Round towards Zero rounding mode"
86 // Because of this...we must use some less-than-straightforward logic to perform this operation without
87 // changing the rounding mode flags
89 /* The assembly below implements the following logic:
94 int_val = trunc(x+inc);
101 __asm__ __volatile__ (
102 #if defined(__ARM_PCS_VFP)
103 "fconstd d1,#%G[rnd_val] \n\t" // Copy round_to_nearest into a working register (d1 = 0.5)
105 "vmov.F64 d1,%[rnd_val] \n\t"
107 "fcmpezd %P[value] \n\t" // Check value against zero (value == 0?)
108 "fmstat \n\t" // Copy the floating-point status flags into the general-purpose status flags
110 "vnegmi.F64 d1, d1 \n\t" // if N-flag is set, negate round_to_nearest (if (value < 0) d1 = -1 * d1)
111 "vadd.F64 d1,%P[value],d1 \n\t" // Add round_to_nearest to value, store result in working register (d1 += value)
112 "vcvt.S32.F64 s3,d1 \n\t" // Truncate(round towards zero) (s3 = (int)d1)
113 "vmov %[result],s3 \n\t" // Store the integer result in a general-purpose register (result = s3)
114 "vcvt.F64.S32 d1,s3 \n\t" // Convert back to floating-point (d1 = (double)s3)
115 "vsub.F64 d1,%P[value],d1 \n\t" // Calculate the error (d1 = value - d1)
116 #if defined(__ARM_PCS_VFP)
117 "fconstd d2,#%G[rnd_val] \n\t" // d2 = 0.5;
119 "vmov.F64 d2,%[rnd_val] \n\t"
121 "fcmped d1, d2 \n\t" // (d1 == 0.5?)
122 "fmstat \n\t" // Copy the floating-point status flags into the general-purpose status flags
124 "addeq %[result],#1 \n\t" // (if (d1 == d2) result++;)
125 : [result] "=r"(i) // Outputs
126 : [rnd_val] "Dv" (round_to_nearest), [value] "w"(x) // Inputs
127 : "d1", "d2", "s3" // Clobbers
130 #elif defined(__SSE2__)
131 const float round_dn_to_nearest = 0.4999999f;
132 i = (x > 0) ? _mm_cvttsd_si32(_mm_set_sd(x + round_to_nearest)) : _mm_cvttsd_si32(_mm_set_sd(x - round_dn_to_nearest));
134 #elif defined(_WIN32)
139 fadd round_to_nearest
145 __asm__ __volatile__ (
150 : "=m"(i) : "u"(round_to_nearest), "t"(x) : "st"
158 /*! \brief Truncate to nearest integer.
159 This routine does fast truncation to an integer.
160 It should simply drop the fractional portion of the floating point number.
162 Make sure MathUtils::test() returns true for each implementation.
165 inline int truncate_int(double x)
167 assert(x > static_cast<double>(INT_MIN / 2) - 1.0);
168 assert(x < static_cast<double>(INT_MAX / 2) + 1.0);
171 #if defined(DISABLE_MATHUTILS_ASM_TRUNCATE_INT)
174 #elif defined(__arm__)
175 __asm__ __volatile__ (
176 "vcvt.S32.F64 %[result],%P[value] \n\t" // Truncate(round towards zero) and store the result
177 : [result] "=w"(i) // Outputs
178 : [value] "w"(x) // Inputs
182 #elif defined(_WIN32)
183 const float round_towards_m_i = -0.5f;
189 fadd round_towards_m_i
195 const float round_towards_m_i = -0.5f;
196 __asm__ __volatile__ (
202 : "=m"(i) : "u"(round_towards_m_i), "t"(x) : "st"
210 inline int64_t abs(int64_t a)
212 return (a < 0) ? -a : a;
215 inline unsigned bitcount(unsigned v)
219 v &= v - 1; // clear the least significant bit set
225 // stupid hack to keep compiler from dropping these
226 // functions as unused
227 MathUtils::round_int(0.0);
228 MathUtils::truncate_int(0.0);
233 /*! \brief test routine for round_int and truncate_int
234 Must return true on all platforms.
238 for (int i = -8; i < 8; ++i)
241 int r = (i < 0) ? (i - 1) / 4 : (i + 2) / 4;
243 if (round_int(d) != r || truncate_int(d) != t)
249 } // namespace MathUtils