2 * Copyright (C) 2010-2012 Team XBMC
5 * This Program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2, or (at your option)
10 * This Program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with XBMC; see the file COPYING. If not, write to
17 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
18 * http://www.gnu.org/copyleft/gpl.html
21 #ifndef __STDC_LIMIT_MACROS
22 #define __STDC_LIMIT_MACROS
25 #include "utils/StdString.h"
27 #include "utils/log.h"
28 #include "utils/TimeUtils.h"
32 /* declare the rng seed and initialize it */
33 unsigned int CAEUtil::m_seed = (unsigned int)(CurrentHostCounter() / 1000.0f);
35 /* declare the SSE seed and initialize it */
36 MEMALIGN(16, __m128i CAEUtil::m_sseSeed) = _mm_set_epi32(CAEUtil::m_seed, CAEUtil::m_seed+1, CAEUtil::m_seed, CAEUtil::m_seed+1);
39 CAEChannelInfo CAEUtil::GuessChLayout(const unsigned int channels)
41 CLog::Log(LOGWARNING, "CAEUtil::GuessChLayout - This method should really never be used, please fix the code that called this");
43 CAEChannelInfo result;
44 if (channels < 1 || channels > 8)
49 case 1: result = AE_CH_LAYOUT_1_0; break;
50 case 2: result = AE_CH_LAYOUT_2_0; break;
51 case 3: result = AE_CH_LAYOUT_3_0; break;
52 case 4: result = AE_CH_LAYOUT_4_0; break;
53 case 5: result = AE_CH_LAYOUT_5_0; break;
54 case 6: result = AE_CH_LAYOUT_5_1; break;
55 case 7: result = AE_CH_LAYOUT_7_0; break;
56 case 8: result = AE_CH_LAYOUT_7_1; break;
62 const char* CAEUtil::GetStdChLayoutName(const enum AEStdChLayout layout)
64 if (layout < 0 || layout >= AE_CH_LAYOUT_MAX)
67 static const char* layouts[AE_CH_LAYOUT_MAX] =
70 "2.0", "2.1", "3.0", "3.1", "4.0",
71 "4.1", "5.0", "5.1", "7.0", "7.1"
74 return layouts[layout];
77 const unsigned int CAEUtil::DataFormatToBits(const enum AEDataFormat dataFormat)
79 if (dataFormat < 0 || dataFormat >= AE_FMT_MAX)
82 static const unsigned int formats[AE_FMT_MAX] =
103 sizeof(double) << 3, /* DOUBLE */
104 sizeof(float ) << 3, /* FLOAT */
115 return formats[dataFormat];
118 const char* CAEUtil::DataFormatToStr(const enum AEDataFormat dataFormat)
120 if (dataFormat < 0 || dataFormat >= AE_FMT_MAX)
123 static const char *formats[AE_FMT_MAX] =
138 "AE_FMT_S24NE4", /* S24 in 4 bytes */
142 "AE_FMT_S24NE3", /* S24 in 3 bytes */
147 /* for passthrough streams and the like */
157 return formats[dataFormat];
161 void CAEUtil::SSEMulArray(float *data, const float mul, uint32_t count)
163 const __m128 m = _mm_set_ps1(mul);
165 /* work around invalid alignment */
166 while (((uintptr_t)data & 0xF) && count > 0)
173 uint32_t even = count & ~0x3;
174 for (uint32_t i = 0; i < even; i+=4, data+=4)
176 __m128 to = _mm_load_ps(data);
177 *(__m128*)data = _mm_mul_ps (to, m);
182 uint32_t odd = count - even;
190 to = _mm_setr_ps(data[0], data[1], 0, 0);
191 __m128 ou = _mm_mul_ps(to, m);
192 data[0] = ((float*)&ou)[0];
193 data[1] = ((float*)&ou)[1];
197 to = _mm_setr_ps(data[0], data[1], data[2], 0);
198 __m128 ou = _mm_mul_ps(to, m);
199 data[0] = ((float*)&ou)[0];
200 data[1] = ((float*)&ou)[1];
201 data[2] = ((float*)&ou)[2];
207 void CAEUtil::SSEMulAddArray(float *data, float *add, const float mul, uint32_t count)
209 const __m128 m = _mm_set_ps1(mul);
211 /* work around invalid alignment */
212 while ((((uintptr_t)data & 0xF) || ((uintptr_t)add & 0xF)) && count > 0)
214 data[0] += add[0] * mul;
220 uint32_t even = count & ~0x3;
221 for (uint32_t i = 0; i < even; i+=4, data+=4, add+=4)
223 __m128 ad = _mm_load_ps(add );
224 __m128 to = _mm_load_ps(data);
225 *(__m128*)data = _mm_add_ps (to, _mm_mul_ps(ad, m));
230 uint32_t odd = count - even;
232 data[0] += add[0] * mul;
239 ad = _mm_setr_ps(add [0], add [1], 0, 0);
240 to = _mm_setr_ps(data[0], data[1], 0, 0);
241 __m128 ou = _mm_add_ps(to, _mm_mul_ps(ad, m));
242 data[0] = ((float*)&ou)[0];
243 data[1] = ((float*)&ou)[1];
247 ad = _mm_setr_ps(add [0], add [1], add [2], 0);
248 to = _mm_setr_ps(data[0], data[1], data[2], 0);
249 __m128 ou = _mm_add_ps(to, _mm_mul_ps(ad, m));
250 data[0] = ((float*)&ou)[0];
251 data[1] = ((float*)&ou)[1];
252 data[2] = ((float*)&ou)[2];
259 inline float CAEUtil::SoftClamp(const float x)
263 This is a rational function to approximate a tanh-like soft clipper.
264 It is based on the pade-approximation of the tanh function with tweaked coefficients.
265 See: http://www.musicdsp.org/showone.php?id=238
272 return x * (27.0f + y) / (27.0f + 9.0f * y);
274 /* slower method using tanh, but more accurate */
276 static const double k = 0.9f;
277 /* perform a soft clamp */
279 x = (float) (tanh((x - k) / (1 - k)) * (1 - k) + k);
281 x = (float) (tanh((x + k) / (1 - k)) * (1 - k) - k);
283 /* hard clamp anything still outside the bounds */
289 /* return the final sample */
294 void CAEUtil::ClampArray(float *data, uint32_t count)
297 for (uint32_t i = 0; i < count; ++i)
298 data[i] = SoftClamp(data[i]);
301 const __m128 c1 = _mm_set_ps1(27.0f);
302 const __m128 c2 = _mm_set_ps1(27.0f + 9.0f);
304 /* work around invalid alignment */
305 while (((uintptr_t)data & 0xF) && count > 0)
307 data[0] = SoftClamp(data[0]);
312 uint32_t even = count & ~0x3;
313 for (uint32_t i = 0; i < even; i+=4, data+=4)
315 /* tanh approx clamp */
316 __m128 dt = _mm_load_ps(data);
317 __m128 tmp = _mm_mul_ps(dt, dt);
318 *(__m128*)data = _mm_div_ps(
329 uint32_t odd = count - even;
331 data[0] = SoftClamp(data[0]);
339 /* tanh approx clamp */
340 dt = _mm_setr_ps(data[0], data[1], 0, 0);
341 tmp = _mm_mul_ps(dt, dt);
350 data[0] = ((float*)&out)[0];
351 data[1] = ((float*)&out)[1];
355 /* tanh approx clamp */
356 dt = _mm_setr_ps(data[0], data[1], data[2], 0);
357 tmp = _mm_mul_ps(dt, dt);
366 data[0] = ((float*)&out)[0];
367 data[1] = ((float*)&out)[1];
368 data[2] = ((float*)&out)[2];
376 Rand implementations based on:
377 http://software.intel.com/en-us/articles/fast-random-number-generator-on-the-intel-pentiumr-4-processor/
378 This is NOT safe for crypto work, but perfectly fine for audio usage (dithering)
380 float CAEUtil::FloatRand1(const float min, const float max)
382 const float delta = (max - min) / 2;
383 const float factor = delta / (float)INT32_MAX;
384 return ((float)(m_seed = (214013 * m_seed + 2531011)) * factor) - delta;
387 void CAEUtil::FloatRand4(const float min, const float max, float result[4], __m128 *sseresult/* = NULL */)
391 this method may be called from other SSE code, we need
392 to calculate the delta & factor using SSE as the FPU
393 state is unknown and _mm_clear() is expensive.
395 MEMALIGN(16, static const __m128 point5 ) = _mm_set_ps1(0.5f);
396 MEMALIGN(16, static const __m128 int32max) = _mm_set_ps1((const float)INT32_MAX);
397 MEMALIGN(16, __m128 f) = _mm_div_ps(
408 MEMALIGN(16, __m128i cur_seed_split);
409 MEMALIGN(16, __m128i multiplier);
410 MEMALIGN(16, __m128i adder);
411 MEMALIGN(16, __m128i mod_mask);
412 MEMALIGN(16, __m128 res);
413 MEMALIGN(16, static const unsigned int mult [4]) = {214013, 17405, 214013, 69069};
414 MEMALIGN(16, static const unsigned int gadd [4]) = {2531011, 10395331, 13737667, 1};
415 MEMALIGN(16, static const unsigned int mask [4]) = {0xFFFFFFFF, 0, 0xFFFFFFFF, 0};
417 adder = _mm_load_si128((__m128i*)gadd);
418 multiplier = _mm_load_si128((__m128i*)mult);
419 mod_mask = _mm_load_si128((__m128i*)mask);
420 cur_seed_split = _mm_shuffle_epi32(m_sseSeed, _MM_SHUFFLE(2, 3, 0, 1));
422 m_sseSeed = _mm_mul_epu32(m_sseSeed, multiplier);
423 multiplier = _mm_shuffle_epi32(multiplier, _MM_SHUFFLE(2, 3, 0, 1));
424 cur_seed_split = _mm_mul_epu32(cur_seed_split, multiplier);
426 m_sseSeed = _mm_and_si128(m_sseSeed, mod_mask);
427 cur_seed_split = _mm_and_si128(cur_seed_split, mod_mask);
428 cur_seed_split = _mm_shuffle_epi32(cur_seed_split, _MM_SHUFFLE(2, 3, 0, 1));
429 m_sseSeed = _mm_or_si128(m_sseSeed, cur_seed_split);
430 m_sseSeed = _mm_add_epi32(m_sseSeed, adder);
432 /* adjust the value to the range requested */
433 res = _mm_cvtepi32_ps(m_sseSeed);
435 *sseresult = _mm_mul_ps(res, f);
438 res = _mm_mul_ps(res, f);
439 _mm_storeu_ps(result, res);
441 /* returning a float array, so cleanup */
446 const float delta = (max - min) / 2.0f;
447 const float factor = delta / (float)INT32_MAX;
449 /* cant return sseresult if we are not using SSE intrinsics */
450 ASSERT(result && !sseresult);
452 result[0] = ((float)(m_seed = (214013 * m_seed + 2531011)) * factor) - delta;
453 result[1] = ((float)(m_seed = (214013 * m_seed + 2531011)) * factor) - delta;
454 result[2] = ((float)(m_seed = (214013 * m_seed + 2531011)) * factor) - delta;
455 result[3] = ((float)(m_seed = (214013 * m_seed + 2531011)) * factor) - delta;