1 /* -----------------------------------------------------------------------------
3 Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
4 Copyright (c) 2007 Ignacio Castano icastano@nvidia.com
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice shall be included
15 in all copies or substantial portions of the Software.
17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
21 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
22 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
23 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 -------------------------------------------------------------------------- */
27 #include "clusterfit.h"
28 #include "colourset.h"
29 #include "colourblock.h"
34 ClusterFit::ClusterFit( ColourSet const* colours, int flags, float* metric )
35 : ColourFit( colours, flags )
37 // set the iteration count
38 m_iterationCount = ( m_flags & kColourIterativeClusterFit ) ? kMaxIterations : 1;
40 // initialise the metric (old perceptual = 0.2126f, 0.7152f, 0.0722f)
42 m_metric = Vec4( metric[0], metric[1], metric[2], 1.0f );
44 m_metric = VEC4_CONST( 1.0f );
46 // initialise the best error
47 m_besterror = VEC4_CONST( FLT_MAX );
50 int const count = m_colours->GetCount();
51 Vec3 const* values = m_colours->GetPoints();
53 // get the covariance matrix
54 Sym3x3 covariance = ComputeWeightedCovariance( count, values, m_colours->GetWeights() );
56 // compute the principle component
57 m_principle = ComputePrincipleComponent( covariance );
60 bool ClusterFit::ConstructOrdering( Vec3 const& axis, int iteration )
63 int const count = m_colours->GetCount();
64 Vec3 const* values = m_colours->GetPoints();
66 // build the list of dot products
68 u8* order = ( u8* )m_order + 16*iteration;
69 for( int i = 0; i < count; ++i )
71 dps[i] = Dot( values[i], axis );
75 // stable sort using them
76 for( int i = 0; i < count; ++i )
78 for( int j = i; j > 0 && dps[j] < dps[j - 1]; --j )
80 std::swap( dps[j], dps[j - 1] );
81 std::swap( order[j], order[j - 1] );
85 // check this ordering is unique
86 for( int it = 0; it < iteration; ++it )
88 u8 const* prev = ( u8* )m_order + 16*it;
90 for( int i = 0; i < count; ++i )
92 if( order[i] != prev[i] )
102 // copy the ordering and weight all the points
103 Vec3 const* unweighted = m_colours->GetPoints();
104 float const* weights = m_colours->GetWeights();
105 m_xsum_wsum = VEC4_CONST( 0.0f );
106 for( int i = 0; i < count; ++i )
109 Vec4 p( unweighted[j].X(), unweighted[j].Y(), unweighted[j].Z(), 1.0f );
110 Vec4 w( weights[j] );
112 m_points_weights[i] = x;
118 void ClusterFit::Compress3( void* block )
121 int const count = m_colours->GetCount();
122 Vec4 const two = VEC4_CONST( 2.0 );
123 Vec4 const one = VEC4_CONST( 1.0f );
124 Vec4 const half_half2( 0.5f, 0.5f, 0.5f, 0.25f );
125 Vec4 const zero = VEC4_CONST( 0.0f );
126 Vec4 const half = VEC4_CONST( 0.5f );
127 Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
128 Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
130 // prepare an ordering using the principle axis
131 ConstructOrdering( m_principle, 0 );
133 // check all possible clusters and iterate on the total order
134 Vec4 beststart = VEC4_CONST( 0.0f );
135 Vec4 bestend = VEC4_CONST( 0.0f );
136 Vec4 besterror = m_besterror;
138 int bestiteration = 0;
139 int besti = 0, bestj = 0;
141 // loop over iterations (we avoid the case that all points in first or last cluster)
142 for( int iterationIndex = 0;; )
144 // first cluster [0,i) is at the start
145 Vec4 part0 = VEC4_CONST( 0.0f );
146 for( int i = 0; i < count; ++i )
148 // second cluster [i,j) is half along
149 Vec4 part1 = ( i == 0 ) ? m_points_weights[0] : VEC4_CONST( 0.0f );
150 int jmin = ( i == 0 ) ? 1 : i;
151 for( int j = jmin;; )
153 // last cluster [j,count) is at the end
154 Vec4 part2 = m_xsum_wsum - part1 - part0;
156 // compute least squares terms directly
157 Vec4 alphax_sum = MultiplyAdd( part1, half_half2, part0 );
158 Vec4 alpha2_sum = alphax_sum.SplatW();
160 Vec4 betax_sum = MultiplyAdd( part1, half_half2, part2 );
161 Vec4 beta2_sum = betax_sum.SplatW();
163 Vec4 alphabeta_sum = ( part1*half_half2 ).SplatW();
165 // compute the least-squares optimal points
166 Vec4 factor = Reciprocal( NegativeMultiplySubtract( alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum ) );
167 Vec4 a = NegativeMultiplySubtract( betax_sum, alphabeta_sum, alphax_sum*beta2_sum )*factor;
168 Vec4 b = NegativeMultiplySubtract( alphax_sum, alphabeta_sum, betax_sum*alpha2_sum )*factor;
171 a = Min( one, Max( zero, a ) );
172 b = Min( one, Max( zero, b ) );
173 a = Truncate( MultiplyAdd( grid, a, half ) )*gridrcp;
174 b = Truncate( MultiplyAdd( grid, b, half ) )*gridrcp;
176 // compute the error (we skip the constant xxsum)
177 Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
178 Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
179 Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
180 Vec4 e4 = MultiplyAdd( two, e3, e1 );
182 // apply the metric to the error term
183 Vec4 e5 = e4*m_metric;
184 Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
186 // keep the solution if it wins
187 if( CompareAnyLessThan( error, besterror ) )
194 bestiteration = iterationIndex;
200 part1 += m_points_weights[j];
205 part0 += m_points_weights[i];
208 // stop if we didn't improve in this iteration
209 if( bestiteration != iterationIndex )
212 // advance if possible
214 if( iterationIndex == m_iterationCount )
217 // stop if a new iteration is an ordering that has already been tried
218 Vec3 axis = ( bestend - beststart ).GetVec3();
219 if( !ConstructOrdering( axis, iterationIndex ) )
223 // save the block if necessary
224 if( CompareAnyLessThan( besterror, m_besterror ) )
227 u8 const* order = ( u8* )m_order + 16*bestiteration;
230 for( int m = 0; m < besti; ++m )
231 unordered[order[m]] = 0;
232 for( int m = besti; m < bestj; ++m )
233 unordered[order[m]] = 2;
234 for( int m = bestj; m < count; ++m )
235 unordered[order[m]] = 1;
237 m_colours->RemapIndices( unordered, bestindices );
240 WriteColourBlock3( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
243 m_besterror = besterror;
247 void ClusterFit::Compress4( void* block )
250 int const count = m_colours->GetCount();
251 Vec4 const two = VEC4_CONST( 2.0f );
252 Vec4 const one = VEC4_CONST( 1.0f );
253 Vec4 const onethird_onethird2( 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/9.0f );
254 Vec4 const twothirds_twothirds2( 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f, 4.0f/9.0f );
255 Vec4 const twonineths = VEC4_CONST( 2.0f/9.0f );
256 Vec4 const zero = VEC4_CONST( 0.0f );
257 Vec4 const half = VEC4_CONST( 0.5f );
258 Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
259 Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
261 // prepare an ordering using the principle axis
262 ConstructOrdering( m_principle, 0 );
264 // check all possible clusters and iterate on the total order
265 Vec4 beststart = VEC4_CONST( 0.0f );
266 Vec4 bestend = VEC4_CONST( 0.0f );
267 Vec4 besterror = m_besterror;
269 int bestiteration = 0;
270 int besti = 0, bestj = 0, bestk = 0;
272 // loop over iterations (we avoid the case that all points in first or last cluster)
273 for( int iterationIndex = 0;; )
275 // first cluster [0,i) is at the start
276 Vec4 part0 = VEC4_CONST( 0.0f );
277 for( int i = 0; i < count; ++i )
279 // second cluster [i,j) is one third along
280 Vec4 part1 = VEC4_CONST( 0.0f );
283 // third cluster [j,k) is two thirds along
284 Vec4 part2 = ( j == 0 ) ? m_points_weights[0] : VEC4_CONST( 0.0f );
285 int kmin = ( j == 0 ) ? 1 : j;
286 for( int k = kmin;; )
288 // last cluster [k,count) is at the end
289 Vec4 part3 = m_xsum_wsum - part2 - part1 - part0;
291 // compute least squares terms directly
292 Vec4 const alphax_sum = MultiplyAdd( part2, onethird_onethird2, MultiplyAdd( part1, twothirds_twothirds2, part0 ) );
293 Vec4 const alpha2_sum = alphax_sum.SplatW();
295 Vec4 const betax_sum = MultiplyAdd( part1, onethird_onethird2, MultiplyAdd( part2, twothirds_twothirds2, part3 ) );
296 Vec4 const beta2_sum = betax_sum.SplatW();
298 Vec4 const alphabeta_sum = twonineths*( part1 + part2 ).SplatW();
300 // compute the least-squares optimal points
301 Vec4 factor = Reciprocal( NegativeMultiplySubtract( alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum ) );
302 Vec4 a = NegativeMultiplySubtract( betax_sum, alphabeta_sum, alphax_sum*beta2_sum )*factor;
303 Vec4 b = NegativeMultiplySubtract( alphax_sum, alphabeta_sum, betax_sum*alpha2_sum )*factor;
306 a = Min( one, Max( zero, a ) );
307 b = Min( one, Max( zero, b ) );
308 a = Truncate( MultiplyAdd( grid, a, half ) )*gridrcp;
309 b = Truncate( MultiplyAdd( grid, b, half ) )*gridrcp;
311 // compute the error (we skip the constant xxsum)
312 Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
313 Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
314 Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
315 Vec4 e4 = MultiplyAdd( two, e3, e1 );
317 // apply the metric to the error term
318 Vec4 e5 = e4*m_metric;
319 Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
321 // keep the solution if it wins
322 if( CompareAnyLessThan( error, besterror ) )
330 bestiteration = iterationIndex;
336 part2 += m_points_weights[k];
343 part1 += m_points_weights[j];
348 part0 += m_points_weights[i];
351 // stop if we didn't improve in this iteration
352 if( bestiteration != iterationIndex )
355 // advance if possible
357 if( iterationIndex == m_iterationCount )
360 // stop if a new iteration is an ordering that has already been tried
361 Vec3 axis = ( bestend - beststart ).GetVec3();
362 if( !ConstructOrdering( axis, iterationIndex ) )
366 // save the block if necessary
367 if( CompareAnyLessThan( besterror, m_besterror ) )
370 u8 const* order = ( u8* )m_order + 16*bestiteration;
373 for( int m = 0; m < besti; ++m )
374 unordered[order[m]] = 0;
375 for( int m = besti; m < bestj; ++m )
376 unordered[order[m]] = 2;
377 for( int m = bestj; m < bestk; ++m )
378 unordered[order[m]] = 3;
379 for( int m = bestk; m < count; ++m )
380 unordered[order[m]] = 1;
382 m_colours->RemapIndices( unordered, bestindices );
385 WriteColourBlock4( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
388 m_besterror = besterror;
392 } // namespace squish