2 * Copyright (C) 2005-2013 Team XBMC
5 * This Program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2, or (at your option)
10 * This Program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with XBMC; see the file COPYING. If not, see
17 * <http://www.gnu.org/licenses/>.
21 #define _USE_MATH_DEFINES
24 #include "ConvolutionKernels.h"
25 #include "utils/MathUtils.h"
28 #define M_PI 3.14159265358979323846
31 #define SINC(x) (sin(M_PI * (x)) / (M_PI * (x)))
33 CConvolutionKernel::CConvolutionKernel(ESCALINGMETHOD method, int size)
36 m_floatpixels = new float[m_size * 4];
38 if (method == VS_SCALINGMETHOD_LANCZOS2)
40 else if (method == VS_SCALINGMETHOD_SPLINE36_FAST)
42 else if (method == VS_SCALINGMETHOD_LANCZOS3_FAST)
44 else if (method == VS_SCALINGMETHOD_SPLINE36)
46 else if (method == VS_SCALINGMETHOD_LANCZOS3)
48 else if (method == VS_SCALINGMETHOD_CUBIC)
49 Bicubic(1.0 / 3.0, 1.0 / 3.0);
55 CConvolutionKernel::~CConvolutionKernel()
57 delete [] m_floatpixels;
58 delete [] m_intfractpixels;
59 delete [] m_uint8pixels;
62 //generate a lanczos2 kernel which can be loaded with RGBA format
63 //each value of RGBA has one tap, so a shader can load 4 taps with a single pixel lookup
64 void CConvolutionKernel::Lanczos2()
66 for (int i = 0; i < m_size; i++)
68 double x = (double)i / (double)m_size;
71 for (int j = 0; j < 4; j++)
72 m_floatpixels[i * 4 + j] = (float)LanczosWeight(x + (double)(j - 2), 2.0);
74 //any collection of 4 taps added together needs to be exactly 1.0
75 //for lanczos this is not always the case, so we take each collection of 4 taps
76 //and divide those taps by the sum of the taps
78 for (int j = 0; j < 4; j++)
79 weight += m_floatpixels[i * 4 + j];
81 for (int j = 0; j < 4; j++)
82 m_floatpixels[i * 4 + j] /= weight;
86 //generate a lanczos3 kernel which can be loaded with RGBA format
87 //each value of RGBA has one tap, so a shader can load 4 taps with a single pixel lookup
88 //the two outer lobes of the lanczos3 kernel are added to the two lobes one step to the middle
89 //this basically looks the same as lanczos3, but the kernel only has 4 taps,
90 //so it can use the 4x4 convolution shader which is twice as fast as the 6x6 one
91 void CConvolutionKernel::Lanczos3Fast()
93 for (int i = 0; i < m_size; i++)
96 double x = (double)i / (double)m_size;
99 m_floatpixels[i * 4 + 0] = (float)(LanczosWeight(x - 2.0, a) + LanczosWeight(x - 3.0, a));
100 m_floatpixels[i * 4 + 1] = (float) LanczosWeight(x - 1.0, a);
101 m_floatpixels[i * 4 + 2] = (float) LanczosWeight(x , a);
102 m_floatpixels[i * 4 + 3] = (float)(LanczosWeight(x + 1.0, a) + LanczosWeight(x + 2.0, a));
104 //any collection of 4 taps added together needs to be exactly 1.0
105 //for lanczos this is not always the case, so we take each collection of 4 taps
106 //and divide those taps by the sum of the taps
108 for (int j = 0; j < 4; j++)
109 weight += m_floatpixels[i * 4 + j];
111 for (int j = 0; j < 4; j++)
112 m_floatpixels[i * 4 + j] /= weight;
116 //generate a lanczos3 kernel which can be loaded with RGBA format
117 //each value of RGB has one tap, so a shader can load 3 taps with a single pixel lookup
118 void CConvolutionKernel::Lanczos3()
120 for (int i = 0; i < m_size; i++)
122 double x = (double)i / (double)m_size;
125 for (int j = 0; j < 3; j++)
126 m_floatpixels[i * 4 + j] = (float)LanczosWeight(x * 2.0 + (double)(j * 2 - 3), 3.0);
128 m_floatpixels[i * 4 + 3] = 0.0;
131 //any collection of 6 taps added together needs to be exactly 1.0
132 //for lanczos this is not always the case, so we take each collection of 6 taps
133 //and divide those taps by the sum of the taps
134 for (int i = 0; i < m_size / 2; i++)
137 for (int j = 0; j < 3; j++)
139 weight += m_floatpixels[i * 4 + j];
140 weight += m_floatpixels[(i + m_size / 2) * 4 + j];
142 for (int j = 0; j < 3; j++)
144 m_floatpixels[i * 4 + j] /= weight;
145 m_floatpixels[(i + m_size / 2) * 4 + j] /= weight;
150 void CConvolutionKernel::Spline36Fast()
152 for (int i = 0; i < m_size; i++)
154 double x = (double)i / (double)m_size;
157 m_floatpixels[i * 4 + 0] = (float)(Spline36Weight(x - 2.0) + Spline36Weight(x - 3.0));
158 m_floatpixels[i * 4 + 1] = (float) Spline36Weight(x - 1.0);
159 m_floatpixels[i * 4 + 2] = (float) Spline36Weight(x );
160 m_floatpixels[i * 4 + 3] = (float)(Spline36Weight(x + 1.0) + Spline36Weight(x + 2.0));
163 for (int j = 0; j < 4; j++)
164 weight += m_floatpixels[i * 4 + j];
166 for (int j = 0; j < 4; j++)
167 m_floatpixels[i * 4 + j] /= weight;
171 void CConvolutionKernel::Spline36()
173 for (int i = 0; i < m_size; i++)
175 double x = (double)i / (double)m_size;
178 for (int j = 0; j < 3; j++)
179 m_floatpixels[i * 4 + j] = (float)Spline36Weight(x * 2.0 + (double)(j * 2 - 3));
181 m_floatpixels[i * 4 + 3] = 0.0;
184 for (int i = 0; i < m_size / 2; i++)
187 for (int j = 0; j < 3; j++)
189 weight += m_floatpixels[i * 4 + j];
190 weight += m_floatpixels[(i + m_size / 2) * 4 + j];
192 for (int j = 0; j < 3; j++)
194 m_floatpixels[i * 4 + j] /= weight;
195 m_floatpixels[(i + m_size / 2) * 4 + j] /= weight;
200 //generate a bicubic kernel which can be loaded with RGBA format
201 //each value of RGBA has one tap, so a shader can load 4 taps with a single pixel lookup
202 void CConvolutionKernel::Bicubic(double B, double C)
204 for (int i = 0; i < m_size; i++)
206 double x = (double)i / (double)m_size;
209 for (int j = 0; j < 4; j++)
210 m_floatpixels[i * 4 + j] = (float)BicubicWeight(x + (double)(j - 2), B, C);
214 double CConvolutionKernel::LanczosWeight(double x, double radius)
220 else if (ax < radius)
221 return SINC(ax) * SINC(ax / radius);
226 double CConvolutionKernel::BicubicWeight(double x, double B, double C)
232 return ((12 - 9*B - 6*C) * ax * ax * ax +
233 (-18 + 12*B + 6*C) * ax * ax +
238 return ((-B - 6*C) * ax * ax * ax +
239 (6*B + 30*C) * ax * ax + (-12*B - 48*C) *
240 ax + (8*B + 24*C)) / 6;
248 double CConvolutionKernel::Spline36Weight(double x)
253 return ( ( 13.0 / 11.0 * (ax ) - 453.0 / 209.0 ) * (ax ) - 3.0 / 209.0 ) * (ax ) + 1.0;
255 return ( ( -6.0 / 11.0 * (ax - 1.0) + 270.0 / 209.0 ) * (ax - 1.0) - 156.0 / 209.0 ) * (ax - 1.0);
257 return ( ( 1.0 / 11.0 * (ax - 2.0) - 45.0 / 209.0 ) * (ax - 2.0) + 26.0 / 209.0 ) * (ax - 2.0);
261 //convert float to high byte/low byte, so the kernel can be loaded into an 8 bit texture
262 //with height 2 and converted back to real float in the shader
263 //it only works when the kernel texture uses nearest neighbour, but there's almost no difference
264 //between that and linear interpolation
265 void CConvolutionKernel::ToIntFract()
267 m_intfractpixels = new uint8_t[m_size * 8];
269 for (int i = 0; i < m_size * 4; i++)
271 int value = MathUtils::round_int((m_floatpixels[i] + 1.0) / 2.0 * 65535.0);
274 else if (value > 65535)
277 int integer = value / 256;
278 int fract = value % 256;
280 m_intfractpixels[i] = (uint8_t)integer;
281 m_intfractpixels[i + m_size * 4] = (uint8_t)fract;
285 //convert to 8 bits unsigned
286 void CConvolutionKernel::ToUint8()
288 m_uint8pixels = new uint8_t[m_size * 4];
290 for (int i = 0; i < m_size * 4; i++)
292 int value = MathUtils::round_int((m_floatpixels[i] * 0.5 + 0.5) * 255.0);
295 else if (value > 255)
298 m_uint8pixels[i] = (uint8_t)value;