code.vuplus.com Git - vuplus_xbmc/blob - lib/libsquish/squish.cpp

   1 /* -----------------------------------------------------------------------------
   2
   3         Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
   4
   5         Permission is hereby granted, free of charge, to any person obtaining
   6         a copy of this software and associated documentation files (the
   7         "Software"), to deal in the Software without restriction, including
   8         without limitation the rights to use, copy, modify, merge, publish,
   9         distribute, sublicense, and/or sell copies of the Software, and to
  10         permit persons to whom the Software is furnished to do so, subject to
  11         the following conditions:
  12
  13         The above copyright notice and this permission notice shall be included
  14         in all copies or substantial portions of the Software.
  15
  16         THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  17         OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  18         MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  19         IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
  20         CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  21         TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  22         SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23
  24    -------------------------------------------------------------------------- */
  25
  26 #include <string.h>
  27 #include <squish.h>
  28 #include "colourset.h"
  29 #include "maths.h"
  30 #include "rangefit.h"
  31 #include "clusterfit.h"
  32 #include "colourblock.h"
  33 #include "alpha.h"
  34 #include "singlecolourfit.h"
  35
  36 namespace squish {
  37
  38 static int FixFlags( int flags )
  39 {
  40         // grab the flag bits
  41         int method = flags & ( kDxt1 | kDxt3 | kDxt5 );
  42         int fit = flags & ( kColourIterativeClusterFit | kColourClusterFit | kColourRangeFit );
  43         int extra = flags & ( kWeightColourByAlpha | kSourceBGRA );
  44
  45         // set defaults
  46         if( method != kDxt3 && method != kDxt5 )
  47                 method = kDxt1;
  48         if( fit != kColourRangeFit && fit != kColourIterativeClusterFit )
  49                 fit = kColourClusterFit;
  50
  51         // done
  52         return method | fit | extra;
  53 }
  54
  55 void CompressMasked( u8 const* rgba, int mask, void* block, int flags, float* metric )
  56 {
  57         // fix any bad flags
  58         flags = FixFlags( flags );
  59
  60         // get the block locations
  61         void* colourBlock = block;
  62         void* alphaBock = block;
  63         if( ( flags & ( kDxt3 | kDxt5 ) ) != 0 )
  64                 colourBlock = reinterpret_cast< u8* >( block ) + 8;
  65
  66         // create the minimal point set
  67         ColourSet colours( rgba, mask, flags );
  68
  69         // check the compression type and compress colour
  70         if( colours.GetCount() == 1 )
  71         {
  72                 // always do a single colour fit
  73                 SingleColourFit fit( &colours, flags );
  74                 fit.Compress( colourBlock );
  75         }
  76         else if( ( flags & kColourRangeFit ) != 0 || colours.GetCount() == 0 )
  77         {
  78                 // do a range fit
  79                 RangeFit fit( &colours, flags, metric );
  80                 fit.Compress( colourBlock );
  81         }
  82         else
  83         {
  84                 // default to a cluster fit (could be iterative or not)
  85                 ClusterFit fit( &colours, flags, metric );
  86                 fit.Compress( colourBlock );
  87         }
  88
  89         // compress alpha separately if necessary
  90         if( ( flags & kDxt3 ) != 0 )
  91                 CompressAlphaDxt3( rgba, mask, alphaBock );
  92         else if( ( flags & kDxt5 ) != 0 )
  93                 CompressAlphaDxt5( rgba, mask, alphaBock );
  94 }
  95
  96 void Decompress( u8* rgba, void const* block, int flags )
  97 {
  98         // fix any bad flags
  99         flags = FixFlags( flags );
 100
 101         // get the block locations
 102         void const* colourBlock = block;
 103         void const* alphaBock = block;
 104         if( ( flags & ( kDxt3 | kDxt5 ) ) != 0 )
 105                 colourBlock = reinterpret_cast< u8 const* >( block ) + 8;
 106
 107         // decompress colour
 108         DecompressColour( rgba, colourBlock, ( flags & kDxt1 ) != 0 );
 109
 110         // decompress alpha separately if necessary
 111         if( ( flags & kDxt3 ) != 0 )
 112                 DecompressAlphaDxt3( rgba, alphaBock );
 113         else if( ( flags & kDxt5 ) != 0 )
 114                 DecompressAlphaDxt5( rgba, alphaBock );
 115 }
 116
 117 int GetStorageRequirements( int width, int height, int flags )
 118 {
 119         // fix any bad flags
 120         flags = FixFlags( flags );
 121
 122         // compute the storage requirements
 123         int blockcount = ( ( width + 3 )/4 ) * ( ( height + 3 )/4 );
 124         int blocksize = ( ( flags & kDxt1 ) != 0 ) ? 8 : 16;
 125         return blockcount*blocksize;
 126 }
 127
 128 void CopyRGBA( u8 const* source, u8* dest, int flags )
 129 {
 130         if (flags & kSourceBGRA)
 131         {
 132                 // convert from bgra to rgba
 133                 dest[0] = source[2];
 134                 dest[1] = source[1];
 135                 dest[2] = source[0];
 136                 dest[3] = source[3];
 137         }
 138         else
 139         {
 140                 for( int i = 0; i < 4; ++i )
 141                         *dest++ = *source++;
 142         }
 143 }
 144
 145 void CompressImage( u8 const* rgba, int width, int height, void* blocks, int flags, float* metric )
 146 {
 147         CompressImage(rgba, width, height, width*4, blocks, flags, metric);
 148 }
 149
 150 void CompressImage( u8 const* rgba, int width, int height, int pitch, void* blocks, int flags, float* metric )
 151 {
 152         // fix any bad flags
 153         flags = FixFlags( flags );
 154
 155         // initialise the block output
 156         u8* targetBlock = reinterpret_cast< u8* >( blocks );
 157         int bytesPerBlock = ( ( flags & kDxt1 ) != 0 ) ? 8 : 16;
 158
 159         // loop over blocks
 160         for( int y = 0; y < height; y += 4 )
 161         {
 162                 for( int x = 0; x < width; x += 4 )
 163                 {
 164                         // build the 4x4 block of pixels
 165                         u8 sourceRgba[16*4];
 166                         u8* targetPixel = sourceRgba;
 167                         int mask = 0;
 168                         for( int py = 0; py < 4; ++py )
 169                         {
 170                                 for( int px = 0; px < 4; ++px )
 171                                 {
 172                                         // get the source pixel in the image
 173                                         int sx = x + px;
 174                                         int sy = y + py;
 175
 176                                         // enable if we're in the image
 177                                         if( sx < width && sy < height )
 178                                         {
 179                                                 // copy the rgba value
 180                                                 u8 const* sourcePixel = rgba + pitch*sy + 4*sx;
 181                                                 CopyRGBA(sourcePixel, targetPixel, flags);
 182                                                 // enable this pixel
 183                                                 mask |= ( 1 << ( 4*py + px ) );
 184                                         }
 185                                         targetPixel += 4;
 186                                 }
 187                         }
 188
 189                         // compress it into the output
 190                         CompressMasked( sourceRgba, mask, targetBlock, flags, metric );
 191
 192                         // advance
 193                         targetBlock += bytesPerBlock;
 194                 }
 195         }
 196 }
 197
 198 void DecompressImage( u8* rgba, int width, int height, void const* blocks, int flags )
 199 {
 200         DecompressImage( rgba, width, height, width*4, blocks, flags );
 201 }
 202
 203 void DecompressImage( u8* rgba, int width, int height, int pitch, void const* blocks, int flags )
 204 {
 205         // fix any bad flags
 206         flags = FixFlags( flags );
 207
 208         // initialise the block input
 209         u8 const* sourceBlock = reinterpret_cast< u8 const* >( blocks );
 210         int bytesPerBlock = ( ( flags & kDxt1 ) != 0 ) ? 8 : 16;
 211
 212         // loop over blocks
 213         for( int y = 0; y < height; y += 4 )
 214         {
 215                 for( int x = 0; x < width; x += 4 )
 216                 {
 217                         // decompress the block
 218                         u8 targetRgba[4*16];
 219                         Decompress( targetRgba, sourceBlock, flags );
 220
 221                         // write the decompressed pixels to the correct image locations
 222                         u8 const* sourcePixel = targetRgba;
 223                         for( int py = 0; py < 4; ++py )
 224                         {
 225                                 for( int px = 0; px < 4; ++px )
 226                                 {
 227                                         // get the target location
 228                                         int sx = x + px;
 229                                         int sy = y + py;
 230                                         if( sx < width && sy < height )
 231                                         {
 232                                                 u8* targetPixel = rgba + pitch*sy + 4*sx;
 233
 234                                                 // copy the rgba value
 235                                                 CopyRGBA(sourcePixel, targetPixel, flags);
 236                                         }
 237                                         sourcePixel += 4;
 238                                 }
 239                         }
 240
 241                         // advance
 242                         sourceBlock += bytesPerBlock;
 243                 }
 244         }
 245 }
 246
 247 static double ErrorSq(double x, double y)
 248 {
 249         return (x - y) * (x - y);
 250 }
 251
 252 static void ComputeBlockWMSE(u8 const *original, u8 const *compressed, unsigned int w, unsigned int h, double &cmse, double &amse)
 253 {
 254         // Computes the MSE for the block and weights it by the variance of the original block.
 255         // If the variance of the original block is less than 4 (i.e. a standard deviation of 1 per channel)
 256         // then the block is close to being a single colour. Quantisation errors in single colour blocks
 257         // are easier to see than similar errors in blocks that contain more colours, particularly when there
 258         // are many such blocks in a large area (eg a blue sky background) as they cause banding.  Given that
 259         // banding is easier to see than small errors in "complex" blocks, we weight the errors by a factor
 260         // of 5. This implies that images with large, single colour areas will have a higher potential WMSE
 261         // than images with lots of detail.
 262
 263         cmse = amse = 0;
 264         unsigned int sum_p[4];  // per channel sum of pixels
 265         unsigned int sum_p2[4]; // per channel sum of pixels squared
 266         memset(sum_p, 0, sizeof(sum_p));
 267         memset(sum_p2, 0, sizeof(sum_p2));
 268         for( unsigned int py = 0; py < 4; ++py )
 269         {
 270                 for( unsigned int px = 0; px < 4; ++px )
 271                 {
 272                         if( px < w && py < h )
 273                         {
 274                                 double pixelCMSE = 0;
 275                                 for( int i = 0; i < 3; ++i )
 276                                 {
 277                                         pixelCMSE += ErrorSq(original[i], compressed[i]);
 278                                         sum_p[i] += original[i];
 279                                         sum_p2[i] += (unsigned int)original[i]*original[i];
 280                                 }
 281                                 if( original[3] == 0 && compressed[3] == 0 )
 282                                         pixelCMSE = 0; // transparent in both, so colour is inconsequential
 283                                 amse += ErrorSq(original[3], compressed[3]);
 284                                 cmse += pixelCMSE;
 285                                 sum_p[3] += original[3];
 286                                 sum_p2[3] += (unsigned int)original[3]*original[3];
 287                         }
 288                         original += 4;
 289                         compressed += 4;
 290                 }
 291         }
 292         unsigned int variance = 0;
 293         for( int i = 0; i < 4; ++i )
 294                 variance += w*h*sum_p2[i] - sum_p[i]*sum_p[i];
 295         if( variance < 4 * w * w * h * h )
 296         {
 297                 amse *= 5;
 298                 cmse *= 5;
 299         }
 300 }
 301
 302 void ComputeMSE( u8 const *rgba, int width, int height, u8 const *dxt, int flags, double &colourMSE, double &alphaMSE )
 303 {
 304         ComputeMSE(rgba, width, height, width*4, dxt, flags, colourMSE, alphaMSE);
 305 }
 306
 307 void ComputeMSE( u8 const *rgba, int width, int height, int pitch, u8 const *dxt, int flags, double &colourMSE, double &alphaMSE )
 308 {
 309         // fix any bad flags
 310         flags = FixFlags( flags );
 311         colourMSE = alphaMSE = 0;
 312
 313         // initialise the block input
 314         squish::u8 const* sourceBlock = dxt;
 315         int bytesPerBlock = ( ( flags & squish::kDxt1 ) != 0 ) ? 8 : 16;
 316
 317         // loop over blocks
 318         for( int y = 0; y < height; y += 4 )
 319         {
 320                 for( int x = 0; x < width; x += 4 )
 321                 {
 322                         // decompress the block
 323                         u8 targetRgba[4*16];
 324                         Decompress( targetRgba, sourceBlock, flags );
 325                         u8 const* sourcePixel = targetRgba;
 326
 327                         // copy across to a similar pixel block
 328                         u8 originalRgba[4*16];
 329                         u8* originalPixel = originalRgba;
 330
 331                         for( int py = 0; py < 4; ++py )
 332                         {
 333                                 for( int px = 0; px < 4; ++px )
 334                                 {
 335                                         int sx = x + px;
 336                                         int sy = y + py;
 337                                         if( sx < width && sy < height )
 338                                         {
 339                                                 u8 const* targetPixel = rgba + pitch*sy + 4*sx;
 340                                                 CopyRGBA(targetPixel, originalPixel, flags);
 341                                         }
 342                                         sourcePixel += 4;
 343                                         originalPixel += 4;
 344                                 }
 345                         }
 346
 347                         // compute the weighted MSE of the block
 348                         double blockCMSE, blockAMSE;
 349                         ComputeBlockWMSE(originalRgba, targetRgba, std::min(4, width - x), std::min(4, height - y), blockCMSE, blockAMSE);
 350                         colourMSE += blockCMSE;
 351                         alphaMSE += blockAMSE;
 352                         // advance
 353                         sourceBlock += bytesPerBlock;
 354                 }
 355         }
 356         colourMSE /= (width * height * 3);
 357         alphaMSE /= (width * height);
 358 }
 359
 360 } // namespace squish