code.vuplus.com Git - vuplus_xbmc/blob - xbmc/cores/VideoRenderers/VideoShaders/ConvolutionKernels.cpp

   1 /*
   2  *      Copyright (C) 2005-2013 Team XBMC
   3  *      http://www.xbmc.org
   4  *
   5  *  This Program is free software; you can redistribute it and/or modify
   6  *  it under the terms of the GNU General Public License as published by
   7  *  the Free Software Foundation; either version 2, or (at your option)
   8  *  any later version.
   9  *
  10  *  This Program is distributed in the hope that it will be useful,
  11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13  *  GNU General Public License for more details.
  14  *
  15  *  You should have received a copy of the GNU General Public License
  16  *  along with XBMC; see the file COPYING.  If not, see
  17  *  <http://www.gnu.org/licenses/>.
  18  *
  19  */
  20 #ifdef _WIN32
  21   #define _USE_MATH_DEFINES
  22 #endif
  23
  24 #include "ConvolutionKernels.h"
  25 #include "utils/MathUtils.h"
  26
  27 #ifndef M_PI
  28   #define M_PI       3.14159265358979323846
  29 #endif
  30
  31 #define SINC(x) (sin(M_PI * (x)) / (M_PI * (x)))
  32
  33 CConvolutionKernel::CConvolutionKernel(ESCALINGMETHOD method, int size)
  34 {
  35   m_size = size;
  36   m_floatpixels = new float[m_size * 4];
  37
  38   if (method == VS_SCALINGMETHOD_LANCZOS2)
  39     Lanczos2();
  40   else if (method == VS_SCALINGMETHOD_SPLINE36_FAST)
  41     Spline36Fast();
  42   else if (method == VS_SCALINGMETHOD_LANCZOS3_FAST)
  43     Lanczos3Fast();
  44   else if (method == VS_SCALINGMETHOD_SPLINE36)
  45     Spline36();
  46   else if (method == VS_SCALINGMETHOD_LANCZOS3)
  47     Lanczos3();
  48   else if (method == VS_SCALINGMETHOD_CUBIC)
  49     Bicubic(1.0 / 3.0, 1.0 / 3.0);
  50
  51   ToIntFract();
  52   ToUint8();
  53 }
  54
  55 CConvolutionKernel::~CConvolutionKernel()
  56 {
  57   delete [] m_floatpixels;
  58   delete [] m_intfractpixels;
  59   delete [] m_uint8pixels;
  60 }
  61
  62 //generate a lanczos2 kernel which can be loaded with RGBA format
  63 //each value of RGBA has one tap, so a shader can load 4 taps with a single pixel lookup
  64 void CConvolutionKernel::Lanczos2()
  65 {
  66   for (int i = 0; i < m_size; i++)
  67   {
  68     double x = (double)i / (double)m_size;
  69
  70     //generate taps
  71     for (int j = 0; j < 4; j++)
  72       m_floatpixels[i * 4 + j] = (float)LanczosWeight(x + (double)(j - 2), 2.0);
  73
  74     //any collection of 4 taps added together needs to be exactly 1.0
  75     //for lanczos this is not always the case, so we take each collection of 4 taps
  76     //and divide those taps by the sum of the taps
  77     float weight = 0.0;
  78     for (int j = 0; j < 4; j++)
  79       weight += m_floatpixels[i * 4 + j];
  80
  81     for (int j = 0; j < 4; j++)
  82       m_floatpixels[i * 4 + j] /= weight;
  83   }
  84 }
  85
  86 //generate a lanczos3 kernel which can be loaded with RGBA format
  87 //each value of RGBA has one tap, so a shader can load 4 taps with a single pixel lookup
  88 //the two outer lobes of the lanczos3 kernel are added to the two lobes one step to the middle
  89 //this basically looks the same as lanczos3, but the kernel only has 4 taps,
  90 //so it can use the 4x4 convolution shader which is twice as fast as the 6x6 one
  91 void CConvolutionKernel::Lanczos3Fast()
  92 {
  93   for (int i = 0; i < m_size; i++)
  94   {
  95     double a = 3.0;
  96     double x = (double)i / (double)m_size;
  97
  98     //generate taps
  99     m_floatpixels[i * 4 + 0] = (float)(LanczosWeight(x - 2.0, a) + LanczosWeight(x - 3.0, a));
 100     m_floatpixels[i * 4 + 1] = (float) LanczosWeight(x - 1.0, a);
 101     m_floatpixels[i * 4 + 2] = (float) LanczosWeight(x      , a);
 102     m_floatpixels[i * 4 + 3] = (float)(LanczosWeight(x + 1.0, a) + LanczosWeight(x + 2.0, a));
 103
 104     //any collection of 4 taps added together needs to be exactly 1.0
 105     //for lanczos this is not always the case, so we take each collection of 4 taps
 106     //and divide those taps by the sum of the taps
 107     float weight = 0.0;
 108     for (int j = 0; j < 4; j++)
 109       weight += m_floatpixels[i * 4 + j];
 110
 111     for (int j = 0; j < 4; j++)
 112       m_floatpixels[i * 4 + j] /= weight;
 113   }
 114 }
 115
 116 //generate a lanczos3 kernel which can be loaded with RGBA format
 117 //each value of RGB has one tap, so a shader can load 3 taps with a single pixel lookup
 118 void CConvolutionKernel::Lanczos3()
 119 {
 120   for (int i = 0; i < m_size; i++)
 121   {
 122     double x = (double)i / (double)m_size;
 123
 124     //generate taps
 125     for (int j = 0; j < 3; j++)
 126       m_floatpixels[i * 4 + j] = (float)LanczosWeight(x * 2.0 + (double)(j * 2 - 3), 3.0);
 127
 128     m_floatpixels[i * 4 + 3] = 0.0;
 129   }
 130
 131   //any collection of 6 taps added together needs to be exactly 1.0
 132   //for lanczos this is not always the case, so we take each collection of 6 taps
 133   //and divide those taps by the sum of the taps
 134   for (int i = 0; i < m_size / 2; i++)
 135   {
 136     float weight = 0.0;
 137     for (int j = 0; j < 3; j++)
 138     {
 139       weight += m_floatpixels[i * 4 + j];
 140       weight += m_floatpixels[(i + m_size / 2) * 4 + j];
 141     }
 142     for (int j = 0; j < 3; j++)
 143     {
 144       m_floatpixels[i * 4 + j] /= weight;
 145       m_floatpixels[(i + m_size / 2) * 4 + j] /= weight;
 146     }
 147   }
 148 }
 149
 150 void CConvolutionKernel::Spline36Fast()
 151 {
 152   for (int i = 0; i < m_size; i++)
 153   {
 154     double x = (double)i / (double)m_size;
 155
 156     //generate taps
 157     m_floatpixels[i * 4 + 0] = (float)(Spline36Weight(x - 2.0) + Spline36Weight(x - 3.0));
 158     m_floatpixels[i * 4 + 1] = (float) Spline36Weight(x - 1.0);
 159     m_floatpixels[i * 4 + 2] = (float) Spline36Weight(x      );
 160     m_floatpixels[i * 4 + 3] = (float)(Spline36Weight(x + 1.0) + Spline36Weight(x + 2.0));
 161
 162     float weight = 0.0;
 163     for (int j = 0; j < 4; j++)
 164       weight += m_floatpixels[i * 4 + j];
 165
 166     for (int j = 0; j < 4; j++)
 167       m_floatpixels[i * 4 + j] /= weight;
 168   }
 169 }
 170
 171 void CConvolutionKernel::Spline36()
 172 {
 173   for (int i = 0; i < m_size; i++)
 174   {
 175     double x = (double)i / (double)m_size;
 176
 177     //generate taps
 178     for (int j = 0; j < 3; j++)
 179       m_floatpixels[i * 4 + j] = (float)Spline36Weight(x * 2.0 + (double)(j * 2 - 3));
 180
 181     m_floatpixels[i * 4 + 3] = 0.0;
 182   }
 183
 184   for (int i = 0; i < m_size / 2; i++)
 185   {
 186     float weight = 0.0;
 187     for (int j = 0; j < 3; j++)
 188     {
 189       weight += m_floatpixels[i * 4 + j];
 190       weight += m_floatpixels[(i + m_size / 2) * 4 + j];
 191     }
 192     for (int j = 0; j < 3; j++)
 193     {
 194       m_floatpixels[i * 4 + j] /= weight;
 195       m_floatpixels[(i + m_size / 2) * 4 + j] /= weight;
 196     }
 197   }
 198 }
 199
 200 //generate a bicubic kernel which can be loaded with RGBA format
 201 //each value of RGBA has one tap, so a shader can load 4 taps with a single pixel lookup
 202 void CConvolutionKernel::Bicubic(double B, double C)
 203 {
 204   for (int i = 0; i < m_size; i++)
 205   {
 206     double x = (double)i / (double)m_size;
 207
 208     //generate taps
 209     for (int j = 0; j < 4; j++)
 210       m_floatpixels[i * 4 + j] = (float)BicubicWeight(x + (double)(j - 2), B, C);
 211   }
 212 }
 213
 214 double CConvolutionKernel::LanczosWeight(double x, double radius)
 215 {
 216   double ax = fabs(x);
 217
 218   if (ax == 0.0)
 219     return 1.0;
 220   else if (ax < radius)
 221     return SINC(ax) * SINC(ax / radius);
 222   else
 223     return 0.0;
 224 }
 225
 226 double CConvolutionKernel::BicubicWeight(double x, double B, double C)
 227 {
 228   double ax = fabs(x);
 229
 230   if (ax<1.0)
 231   {
 232     return ((12 - 9*B - 6*C) * ax * ax * ax +
 233             (-18 + 12*B + 6*C) * ax * ax +
 234             (6 - 2*B))/6;
 235   }
 236   else if (ax<2.0)
 237   {
 238     return ((-B - 6*C) * ax * ax * ax +
 239             (6*B + 30*C) * ax * ax + (-12*B - 48*C) *
 240              ax + (8*B + 24*C)) / 6;
 241   }
 242   else
 243   {
 244     return 0.0;
 245   }
 246 }
 247
 248 double CConvolutionKernel::Spline36Weight(double x)
 249 {
 250   double ax = fabs(x);
 251
 252   if      ( ax < 1.0 )
 253     return ( ( 13.0 / 11.0 * (ax      ) - 453.0 / 209.0 ) * (ax      ) -   3.0 / 209.0 ) * (ax      ) + 1.0;
 254   else if ( ax < 2.0 )
 255     return ( ( -6.0 / 11.0 * (ax - 1.0) + 270.0 / 209.0 ) * (ax - 1.0) - 156.0 / 209.0 ) * (ax - 1.0);
 256   else if ( ax < 3.0 )
 257     return ( (  1.0 / 11.0 * (ax - 2.0) -  45.0 / 209.0 ) * (ax - 2.0) +  26.0 / 209.0 ) * (ax - 2.0);
 258   return 0.0;
 259 }
 260
 261 //convert float to high byte/low byte, so the kernel can be loaded into an 8 bit texture
 262 //with height 2 and converted back to real float in the shader
 263 //it only works when the kernel texture uses nearest neighbour, but there's almost no difference
 264 //between that and linear interpolation
 265 void CConvolutionKernel::ToIntFract()
 266 {
 267   m_intfractpixels = new uint8_t[m_size * 8];
 268
 269   for (int i = 0; i < m_size * 4; i++)
 270   {
 271     int value = MathUtils::round_int((m_floatpixels[i] + 1.0) / 2.0 * 65535.0);
 272     if (value < 0)
 273       value = 0;
 274     else if (value > 65535)
 275       value = 65535;
 276
 277     int integer = value / 256;
 278     int fract   = value % 256;
 279
 280     m_intfractpixels[i] = (uint8_t)integer;
 281     m_intfractpixels[i + m_size * 4] = (uint8_t)fract;
 282   }
 283 }
 284
 285 //convert to 8 bits unsigned
 286 void CConvolutionKernel::ToUint8()
 287 {
 288   m_uint8pixels = new uint8_t[m_size * 4];
 289
 290   for (int i = 0; i < m_size * 4; i++)
 291   {
 292     int value = MathUtils::round_int((m_floatpixels[i] * 0.5 + 0.5) * 255.0);
 293     if (value < 0)
 294       value = 0;
 295     else if (value > 255)
 296       value = 255;
 297
 298     m_uint8pixels[i] = (uint8_t)value;
 299   }
 300 }
 301