Paradox Game Engine  v1.0.0 beta06
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Events Macros Pages
DirectXTexConvert.cpp
Go to the documentation of this file.
1 //-------------------------------------------------------------------------------------
2 // DirectXTexConvert.cpp
3 //
4 // DirectX Texture Library - Image conversion
5 //
6 // THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
7 // ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
8 // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
9 // PARTICULAR PURPOSE.
10 //
11 // Copyright (c) Microsoft Corporation. All rights reserved.
12 //
13 // http://go.microsoft.com/fwlink/?LinkId=248926
14 //-------------------------------------------------------------------------------------
15 
16 #include "directxtexp.h"
17 
18 using namespace DirectX::PackedVector;
19 using Microsoft::WRL::ComPtr;
20 
21 namespace
22 {
23 #if DIRECTX_MATH_VERSION < 306
24  inline float round_to_nearest( float x )
25  {
26  // Round to nearest (even)
27  float i = floorf(x);
28  x -= i;
29  if(x < 0.5f)
30  return i;
31  if(x > 0.5f)
32  return i + 1.f;
33 
34  float int_part;
35  modff( i / 2.f, &int_part );
36  if ( (2.f*int_part) == i )
37  {
38  return i;
39  }
40 
41  return i + 1.f;
42  }
43 #endif
44 
45  inline uint32_t FloatTo7e3(float Value)
46  {
47  uint32_t IValue = reinterpret_cast<uint32_t *>(&Value)[0];
48 
49  if ( IValue & 0x80000000U )
50  {
51  // Positive only
52  return 0;
53  }
54  else if (IValue > 0x41FF73FFU)
55  {
56  // The number is too large to be represented as a 7e3. Saturate.
57  return 0x3FFU;
58  }
59  else
60  {
61  if (IValue < 0x3E800000U)
62  {
63  // The number is too small to be represented as a normalized 7e3.
64  // Convert it to a denormalized value.
65  uint32_t Shift = 125U - (IValue >> 23U);
66  IValue = (0x800000U | (IValue & 0x7FFFFFU)) >> Shift;
67  }
68  else
69  {
70  // Rebias the exponent to represent the value as a normalized 7e3.
71  IValue += 0xC2000000U;
72  }
73 
74  return ((IValue + 0x7FFFU + ((IValue >> 16U) & 1U)) >> 16U)&0x3FFU;
75  }
76  }
77 
78  inline float FloatFrom7e3( uint32_t Value )
79  {
80  uint32_t Mantissa = (uint32_t)(Value & 0x7F);
81 
82  uint32_t Exponent = (Value & 0x380);
83  if (Exponent != 0) // The value is normalized
84  {
85  Exponent = (uint32_t)((Value >> 7) & 0x7);
86  }
87  else if (Mantissa != 0) // The value is denormalized
88  {
89  // Normalize the value in the resulting float
90  Exponent = 1;
91 
92  do
93  {
94  Exponent--;
95  Mantissa <<= 1;
96  } while ((Mantissa & 0x80) == 0);
97 
98  Mantissa &= 0x7F;
99  }
100  else // The value is zero
101  {
102  Exponent = (uint32_t)-124;
103  }
104 
105  uint32_t Result = ((Exponent + 124) << 23) | // Exponent
106  (Mantissa << 16); // Mantissa
107 
108  return reinterpret_cast<float*>(&Result)[0];
109  }
110 
111  inline uint32_t FloatTo6e4(float Value)
112  {
113  uint32_t IValue = reinterpret_cast<uint32_t *>(&Value)[0];
114 
115  if ( IValue & 0x80000000U )
116  {
117  // Positive only
118  return 0;
119  }
120  else if (IValue > 0x43FEFFFFU)
121  {
122  // The number is too large to be represented as a 6e4. Saturate.
123  return 0x3FFU;
124  }
125  else
126  {
127  if (IValue < 0x3C800000U)
128  {
129  // The number is too small to be represented as a normalized 6e4.
130  // Convert it to a denormalized value.
131  uint32_t Shift = 121U - (IValue >> 23U);
132  IValue = (0x800000U | (IValue & 0x7FFFFFU)) >> Shift;
133  }
134  else
135  {
136  // Rebias the exponent to represent the value as a normalized 6e4.
137  IValue += 0xC4000000U;
138  }
139 
140  return ((IValue + 0xFFFFU + ((IValue >> 17U) & 1U)) >> 17U)&0x3FFU;
141  }
142  }
143 
144  inline float FloatFrom6e4( uint32_t Value )
145  {
146  uint32_t Mantissa = (uint32_t)(Value & 0x3F);
147 
148  uint32_t Exponent = (Value & 0x3C0);
149  if (Exponent != 0) // The value is normalized
150  {
151  Exponent = (uint32_t)((Value >> 6) & 0xF);
152  }
153  else if (Mantissa != 0) // The value is denormalized
154  {
155  // Normalize the value in the resulting float
156  Exponent = 1;
157 
158  do
159  {
160  Exponent--;
161  Mantissa <<= 1;
162  } while ((Mantissa & 0x40) == 0);
163 
164  Mantissa &= 0x3F;
165  }
166  else // The value is zero
167  {
168  Exponent = (uint32_t)-120;
169  }
170 
171  uint32_t Result = ((Exponent + 120) << 23) | // Exponent
172  (Mantissa << 17); // Mantissa
173 
174  return reinterpret_cast<float*>(&Result)[0];
175  }
176 };
177 
178 namespace DirectX
179 {
180 static const XMVECTORF32 g_Grayscale = { 0.2125f, 0.7154f, 0.0721f, 0.0f };
181 
182 //-------------------------------------------------------------------------------------
183 // Copies an image row with optional clearing of alpha value to 1.0
184 // (can be used in place as well) otherwise copies the image row unmodified.
185 //-------------------------------------------------------------------------------------
186 void _CopyScanline(_When_(pDestination == pSource, _Inout_updates_bytes_(outSize))
187  _When_(pDestination != pSource, _Out_writes_bytes_(outSize))
188  LPVOID pDestination, _In_ size_t outSize,
189  _In_reads_bytes_(inSize) LPCVOID pSource, _In_ size_t inSize,
190  _In_ DXGI_FORMAT format, _In_ DWORD flags)
191 {
192  assert( pDestination && outSize > 0 );
193  assert( pSource && inSize > 0 );
194  assert( IsValid(format) && !IsPalettized(format) );
195 
196  if ( flags & TEXP_SCANLINE_SETALPHA )
197  {
198  switch( static_cast<int>(format) )
199  {
200  //-----------------------------------------------------------------------------
201  case DXGI_FORMAT_R32G32B32A32_TYPELESS:
202  case DXGI_FORMAT_R32G32B32A32_FLOAT:
203  case DXGI_FORMAT_R32G32B32A32_UINT:
204  case DXGI_FORMAT_R32G32B32A32_SINT:
205  if ( inSize >= 16 && outSize >= 16 )
206  {
207  uint32_t alpha;
208  if ( format == DXGI_FORMAT_R32G32B32A32_FLOAT )
209  alpha = 0x3f800000;
210  else if ( format == DXGI_FORMAT_R32G32B32A32_SINT )
211  alpha = 0x7fffffff;
212  else
213  alpha = 0xffffffff;
214 
215  if ( pDestination == pSource )
216  {
217  uint32_t *dPtr = reinterpret_cast<uint32_t*> (pDestination);
218  for( size_t count = 0; count < ( outSize - 15 ); count += 16 )
219  {
220  dPtr += 3;
221  *(dPtr++) = alpha;
222  }
223  }
224  else
225  {
226  const uint32_t * __restrict sPtr = reinterpret_cast<const uint32_t*>(pSource);
227  uint32_t * __restrict dPtr = reinterpret_cast<uint32_t*>(pDestination);
228  size_t size = std::min<size_t>( outSize, inSize );
229  for( size_t count = 0; count < ( size - 15 ); count += 16 )
230  {
231  *(dPtr++) = *(sPtr++);
232  *(dPtr++) = *(sPtr++);
233  *(dPtr++) = *(sPtr++);
234  *(dPtr++) = alpha;
235  sPtr++;
236  }
237  }
238  }
239  return;
240 
241  //-----------------------------------------------------------------------------
242  case DXGI_FORMAT_R16G16B16A16_TYPELESS:
243  case DXGI_FORMAT_R16G16B16A16_FLOAT:
244  case DXGI_FORMAT_R16G16B16A16_UNORM:
245  case DXGI_FORMAT_R16G16B16A16_UINT:
246  case DXGI_FORMAT_R16G16B16A16_SNORM:
247  case DXGI_FORMAT_R16G16B16A16_SINT:
248  case DXGI_FORMAT_Y416:
249  if ( inSize >= 8 && outSize >= 8 )
250  {
251  uint16_t alpha;
252  if ( format == DXGI_FORMAT_R16G16B16A16_FLOAT )
253  alpha = 0x3c00;
254  else if ( format == DXGI_FORMAT_R16G16B16A16_SNORM || format == DXGI_FORMAT_R16G16B16A16_SINT )
255  alpha = 0x7fff;
256  else
257  alpha = 0xffff;
258 
259  if ( pDestination == pSource )
260  {
261  uint16_t *dPtr = reinterpret_cast<uint16_t*>(pDestination);
262  for( size_t count = 0; count < ( outSize - 7 ); count += 8 )
263  {
264  dPtr += 3;
265  *(dPtr++) = alpha;
266  }
267  }
268  else
269  {
270  const uint16_t * __restrict sPtr = reinterpret_cast<const uint16_t*>(pSource);
271  uint16_t * __restrict dPtr = reinterpret_cast<uint16_t*>(pDestination);
272  size_t size = std::min<size_t>( outSize, inSize );
273  for( size_t count = 0; count < ( size - 7 ); count += 8 )
274  {
275  *(dPtr++) = *(sPtr++);
276  *(dPtr++) = *(sPtr++);
277  *(dPtr++) = *(sPtr++);
278  *(dPtr++) = alpha;
279  sPtr++;
280  }
281  }
282  }
283  return;
284 
285  //-----------------------------------------------------------------------------
286  case DXGI_FORMAT_R10G10B10A2_TYPELESS:
287  case DXGI_FORMAT_R10G10B10A2_UNORM:
288  case DXGI_FORMAT_R10G10B10A2_UINT:
289  case DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM:
290  case DXGI_FORMAT_Y410:
291  case 116 /* DXGI_FORMAT_R10G10B10_7E3_A2_FLOAT */:
292  case 117 /* DXGI_FORMAT_R10G10B10_6E4_A2_FLOAT */:
293  if ( inSize >= 4 && outSize >= 4 )
294  {
295  if ( pDestination == pSource )
296  {
297  uint32_t *dPtr = reinterpret_cast<uint32_t*>(pDestination);
298  for( size_t count = 0; count < ( outSize - 3 ); count += 4 )
299  {
300  *dPtr |= 0xC0000000;
301  ++dPtr;
302  }
303  }
304  else
305  {
306  const uint32_t * __restrict sPtr = reinterpret_cast<const uint32_t*>(pSource);
307  uint32_t * __restrict dPtr = reinterpret_cast<uint32_t*>(pDestination);
308  size_t size = std::min<size_t>( outSize, inSize );
309  for( size_t count = 0; count < ( size - 3 ); count += 4 )
310  {
311  *(dPtr++) = *(sPtr++) | 0xC0000000;
312  }
313  }
314  }
315  return;
316 
317  //-----------------------------------------------------------------------------
318  case DXGI_FORMAT_R8G8B8A8_TYPELESS:
319  case DXGI_FORMAT_R8G8B8A8_UNORM:
320  case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB:
321  case DXGI_FORMAT_R8G8B8A8_UINT:
322  case DXGI_FORMAT_R8G8B8A8_SNORM:
323  case DXGI_FORMAT_R8G8B8A8_SINT:
324  case DXGI_FORMAT_B8G8R8A8_UNORM:
325  case DXGI_FORMAT_B8G8R8A8_TYPELESS:
326  case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB:
327  case DXGI_FORMAT_AYUV:
328  if ( inSize >= 4 && outSize >= 4 )
329  {
330  const uint32_t alpha = ( format == DXGI_FORMAT_R8G8B8A8_SNORM || format == DXGI_FORMAT_R8G8B8A8_SINT ) ? 0x7f000000 : 0xff000000;
331 
332  if ( pDestination == pSource )
333  {
334  uint32_t *dPtr = reinterpret_cast<uint32_t*>(pDestination);
335  for( size_t count = 0; count < ( outSize - 3 ); count += 4 )
336  {
337  uint32_t t = *dPtr & 0xFFFFFF;
338  t |= alpha;
339  *(dPtr++) = t;
340  }
341  }
342  else
343  {
344  const uint32_t * __restrict sPtr = reinterpret_cast<const uint32_t*>(pSource);
345  uint32_t * __restrict dPtr = reinterpret_cast<uint32_t*>(pDestination);
346  size_t size = std::min<size_t>( outSize, inSize );
347  for( size_t count = 0; count < ( size - 3 ); count += 4 )
348  {
349  uint32_t t = *(sPtr++) & 0xFFFFFF;
350  t |= alpha;
351  *(dPtr++) = t;
352  }
353  }
354  }
355  return;
356 
357  //-----------------------------------------------------------------------------
358  case DXGI_FORMAT_B5G5R5A1_UNORM:
359  if ( inSize >= 2 && outSize >= 2 )
360  {
361  if ( pDestination == pSource )
362  {
363  uint16_t *dPtr = reinterpret_cast<uint16_t*>(pDestination);
364  for( size_t count = 0; count < ( outSize - 1 ); count += 2 )
365  {
366  *(dPtr++) |= 0x8000;
367  }
368  }
369  else
370  {
371  const uint16_t * __restrict sPtr = reinterpret_cast<const uint16_t*>(pSource);
372  uint16_t * __restrict dPtr = reinterpret_cast<uint16_t*>(pDestination);
373  size_t size = std::min<size_t>( outSize, inSize );
374  for( size_t count = 0; count < ( size - 1 ); count += 2 )
375  {
376  *(dPtr++) = *(sPtr++) | 0x8000;
377  }
378  }
379  }
380  return;
381 
382  //-----------------------------------------------------------------------------
383  case DXGI_FORMAT_A8_UNORM:
384  memset( pDestination, 0xff, outSize );
385  return;
386 
387  //-----------------------------------------------------------------------------
388  case DXGI_FORMAT_B4G4R4A4_UNORM:
389  if ( inSize >= 2 && outSize >= 2 )
390  {
391  if ( pDestination == pSource )
392  {
393  uint16_t *dPtr = reinterpret_cast<uint16_t*>(pDestination);
394  for( size_t count = 0; count < ( outSize - 1 ); count += 2 )
395  {
396  *(dPtr++) |= 0xF000;
397  }
398  }
399  else
400  {
401  const uint16_t * __restrict sPtr = reinterpret_cast<const uint16_t*>(pSource);
402  uint16_t * __restrict dPtr = reinterpret_cast<uint16_t*>(pDestination);
403  size_t size = std::min<size_t>( outSize, inSize );
404  for( size_t count = 0; count < ( size - 1 ); count += 2 )
405  {
406  *(dPtr++) = *(sPtr++) | 0xF000;
407  }
408  }
409  }
410  return;
411  }
412  }
413 
414  // Fall-through case is to just use memcpy (assuming this is not an in-place operation)
415  if ( pDestination == pSource )
416  return;
417 
418  size_t size = std::min<size_t>( outSize, inSize );
419  memcpy_s( pDestination, outSize, pSource, size );
420 }
421 
422 
423 //-------------------------------------------------------------------------------------
424 // Swizzles (RGB <-> BGR) an image row with optional clearing of alpha value to 1.0
425 // (can be used in place as well) otherwise copies the image row unmodified.
426 //-------------------------------------------------------------------------------------
427 _Use_decl_annotations_
428 void _SwizzleScanline( LPVOID pDestination, size_t outSize, LPCVOID pSource, size_t inSize, DXGI_FORMAT format, DWORD flags )
429 {
430  assert( pDestination && outSize > 0 );
431  assert( pSource && inSize > 0 );
432  assert( IsValid(format) && !IsPlanar(format) && !IsPalettized(format) );
433 
434  switch( format )
435  {
436  //---------------------------------------------------------------------------------
437  case DXGI_FORMAT_R10G10B10A2_TYPELESS:
438  case DXGI_FORMAT_R10G10B10A2_UNORM:
439  case DXGI_FORMAT_R10G10B10A2_UINT:
440  case DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM:
441  if ( inSize >= 4 && outSize >= 4 )
442  {
443  if ( flags & TEXP_SCANLINE_LEGACY )
444  {
445  // Swap Red (R) and Blue (B) channel (used for D3DFMT_A2R10G10B10 legacy sources)
446  if ( pDestination == pSource )
447  {
448  uint32_t *dPtr = reinterpret_cast<uint32_t*>(pDestination);
449  for( size_t count = 0; count < ( outSize - 3 ); count += 4 )
450  {
451  uint32_t t = *dPtr;
452 
453  uint32_t t1 = (t & 0x3ff00000) >> 20;
454  uint32_t t2 = (t & 0x000003ff) << 20;
455  uint32_t t3 = (t & 0x000ffc00);
456  uint32_t ta = ( flags & TEXP_SCANLINE_SETALPHA ) ? 0xC0000000 : (t & 0xC0000000);
457 
458  *(dPtr++) = t1 | t2 | t3 | ta;
459  }
460  }
461  else
462  {
463  const uint32_t * __restrict sPtr = reinterpret_cast<const uint32_t*>(pSource);
464  uint32_t * __restrict dPtr = reinterpret_cast<uint32_t*>(pDestination);
465  size_t size = std::min<size_t>( outSize, inSize );
466  for( size_t count = 0; count < ( size - 3 ); count += 4 )
467  {
468  uint32_t t = *(sPtr++);
469 
470  uint32_t t1 = (t & 0x3ff00000) >> 20;
471  uint32_t t2 = (t & 0x000003ff) << 20;
472  uint32_t t3 = (t & 0x000ffc00);
473  uint32_t ta = ( flags & TEXP_SCANLINE_SETALPHA ) ? 0xC0000000 : (t & 0xC0000000);
474 
475  *(dPtr++) = t1 | t2 | t3 | ta;
476  }
477  }
478  return;
479  }
480  }
481  break;
482 
483  //---------------------------------------------------------------------------------
484  case DXGI_FORMAT_R8G8B8A8_TYPELESS:
485  case DXGI_FORMAT_R8G8B8A8_UNORM:
486  case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB:
487  case DXGI_FORMAT_B8G8R8A8_UNORM:
488  case DXGI_FORMAT_B8G8R8X8_UNORM:
489  case DXGI_FORMAT_B8G8R8A8_TYPELESS:
490  case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB:
491  case DXGI_FORMAT_B8G8R8X8_TYPELESS:
492  case DXGI_FORMAT_B8G8R8X8_UNORM_SRGB:
493  if ( inSize >= 4 && outSize >= 4 )
494  {
495  // Swap Red (R) and Blue (B) channels (used to convert from DXGI 1.1 BGR formats to DXGI 1.0 RGB)
496  if ( pDestination == pSource )
497  {
498  uint32_t *dPtr = reinterpret_cast<uint32_t*>(pDestination);
499  for( size_t count = 0; count < ( outSize - 3 ); count += 4 )
500  {
501  uint32_t t = *dPtr;
502 
503  uint32_t t1 = (t & 0x00ff0000) >> 16;
504  uint32_t t2 = (t & 0x000000ff) << 16;
505  uint32_t t3 = (t & 0x0000ff00);
506  uint32_t ta = ( flags & TEXP_SCANLINE_SETALPHA ) ? 0xff000000 : (t & 0xFF000000);
507 
508  *(dPtr++) = t1 | t2 | t3 | ta;
509  }
510  }
511  else
512  {
513  const uint32_t * __restrict sPtr = reinterpret_cast<const uint32_t*>(pSource);
514  uint32_t * __restrict dPtr = reinterpret_cast<uint32_t*>(pDestination);
515  size_t size = std::min<size_t>( outSize, inSize );
516  for( size_t count = 0; count < ( size - 3 ); count += 4 )
517  {
518  uint32_t t = *(sPtr++);
519 
520  uint32_t t1 = (t & 0x00ff0000) >> 16;
521  uint32_t t2 = (t & 0x000000ff) << 16;
522  uint32_t t3 = (t & 0x0000ff00);
523  uint32_t ta = ( flags & TEXP_SCANLINE_SETALPHA ) ? 0xff000000 : (t & 0xFF000000);
524 
525  *(dPtr++) = t1 | t2 | t3 | ta;
526  }
527  }
528  return;
529  }
530  break;
531 
532  //---------------------------------------------------------------------------------
533  case DXGI_FORMAT_YUY2:
534  if ( inSize >= 4 && outSize >= 4 )
535  {
536  if ( flags & TEXP_SCANLINE_LEGACY )
537  {
538  // Reorder YUV components (used to convert legacy UYVY -> YUY2)
539  if ( pDestination == pSource )
540  {
541  uint32_t *dPtr = reinterpret_cast<uint32_t*>(pDestination);
542  for( size_t count = 0; count < ( outSize - 3 ); count += 4 )
543  {
544  uint32_t t = *dPtr;
545 
546  uint32_t t1 = (t & 0x000000ff) << 8;
547  uint32_t t2 = (t & 0x0000ff00) >> 8;
548  uint32_t t3 = (t & 0x00ff0000) << 8;
549  uint32_t t4 = (t & 0xff000000) >> 8;
550 
551  *(dPtr++) = t1 | t2 | t3 | t4;
552  }
553  }
554  else
555  {
556  const uint32_t * __restrict sPtr = reinterpret_cast<const uint32_t*>(pSource);
557  uint32_t * __restrict dPtr = reinterpret_cast<uint32_t*>(pDestination);
558  size_t size = std::min<size_t>( outSize, inSize );
559  for( size_t count = 0; count < ( size - 3 ); count += 4 )
560  {
561  uint32_t t = *(sPtr++);
562 
563  uint32_t t1 = (t & 0x000000ff) << 8;
564  uint32_t t2 = (t & 0x0000ff00) >> 8;
565  uint32_t t3 = (t & 0x00ff0000) << 8;
566  uint32_t t4 = (t & 0xff000000) >> 8;
567 
568  *(dPtr++) = t1 | t2 | t3 | t4;
569  }
570  }
571  return;
572  }
573  }
574  break;
575  }
576 
577  // Fall-through case is to just use memcpy (assuming this is not an in-place operation)
578  if ( pDestination == pSource )
579  return;
580 
581  size_t size = std::min<size_t>( outSize, inSize );
582  memcpy_s( pDestination, outSize, pSource, size );
583 }
584 
585 
586 //-------------------------------------------------------------------------------------
587 // Converts an image row with optional clearing of alpha value to 1.0
588 // Returns true if supported, false if expansion case not supported
589 //-------------------------------------------------------------------------------------
590 _Use_decl_annotations_
591 bool _ExpandScanline( LPVOID pDestination, size_t outSize, DXGI_FORMAT outFormat,
592  LPCVOID pSource, size_t inSize, DXGI_FORMAT inFormat, DWORD flags )
593 {
594  assert( pDestination && outSize > 0 );
595  assert( pSource && inSize > 0 );
596  assert( IsValid(outFormat) && !IsPlanar(outFormat) && !IsPalettized(outFormat) );
597  assert( IsValid(inFormat) && !IsPlanar(inFormat) && !IsPalettized(inFormat) );
598 
599  switch( inFormat )
600  {
601  case DXGI_FORMAT_B5G6R5_UNORM:
602  if ( outFormat != DXGI_FORMAT_R8G8B8A8_UNORM )
603  return false;
604 
605  // DXGI_FORMAT_B5G6R5_UNORM -> DXGI_FORMAT_R8G8B8A8_UNORM
606  if ( inSize >= 2 && outSize >= 4 )
607  {
608  const uint16_t * __restrict sPtr = reinterpret_cast<const uint16_t*>(pSource);
609  uint32_t * __restrict dPtr = reinterpret_cast<uint32_t*>(pDestination);
610 
611  for( size_t ocount = 0, icount = 0; ( ( icount < ( inSize - 1 ) ) && ( ocount < ( outSize - 3 ) ) ); icount += 2, ocount += 4 )
612  {
613  uint16_t t = *(sPtr++);
614 
615  uint32_t t1 = ((t & 0xf800) >> 8) | ((t & 0xe000) >> 13);
616  uint32_t t2 = ((t & 0x07e0) << 5) | ((t & 0x0600) >> 5);
617  uint32_t t3 = ((t & 0x001f) << 19) | ((t & 0x001c) << 14);
618 
619  *(dPtr++) = t1 | t2 | t3 | 0xff000000;
620  }
621  return true;
622  }
623  return false;
624 
625  case DXGI_FORMAT_B5G5R5A1_UNORM:
626  if ( outFormat != DXGI_FORMAT_R8G8B8A8_UNORM )
627  return false;
628 
629  // DXGI_FORMAT_B5G5R5A1_UNORM -> DXGI_FORMAT_R8G8B8A8_UNORM
630  if ( inSize >= 2 && outSize >= 4 )
631  {
632  const uint16_t * __restrict sPtr = reinterpret_cast<const uint16_t*>(pSource);
633  uint32_t * __restrict dPtr = reinterpret_cast<uint32_t*>(pDestination);
634 
635  for( size_t ocount = 0, icount = 0; ( ( icount < ( inSize - 1 ) ) && ( ocount < ( outSize - 3 ) ) ); icount += 2, ocount += 4 )
636  {
637  uint16_t t = *(sPtr++);
638 
639  uint32_t t1 = ((t & 0x7c00) >> 7) | ((t & 0x7000) >> 12);
640  uint32_t t2 = ((t & 0x03e0) << 6) | ((t & 0x0380) << 1);
641  uint32_t t3 = ((t & 0x001f) << 19) | ((t & 0x001c) << 14);
642  uint32_t ta = ( flags & TEXP_SCANLINE_SETALPHA ) ? 0xff000000 : ((t & 0x8000) ? 0xff000000 : 0);
643 
644  *(dPtr++) = t1 | t2 | t3 | ta;
645  }
646  return true;
647  }
648  return false;
649 
650  case DXGI_FORMAT_B4G4R4A4_UNORM:
651  if ( outFormat != DXGI_FORMAT_R8G8B8A8_UNORM )
652  return false;
653 
654  // DXGI_FORMAT_B4G4R4A4_UNORM -> DXGI_FORMAT_R8G8B8A8_UNORM
655  if ( inSize >= 2 && outSize >= 4 )
656  {
657  const uint16_t * __restrict sPtr = reinterpret_cast<const uint16_t*>(pSource);
658  uint32_t * __restrict dPtr = reinterpret_cast<uint32_t*>(pDestination);
659 
660  for( size_t ocount = 0, icount = 0; ( ( icount < ( inSize - 1 ) ) && ( ocount < ( outSize - 3 ) ) ); icount += 2, ocount += 4 )
661  {
662  uint16_t t = *(sPtr++);
663 
664  uint32_t t1 = ((t & 0x0f00) >> 4) | ((t & 0x0f00) >> 8);
665  uint32_t t2 = ((t & 0x00f0) << 8) | ((t & 0x00f0) << 4);
666  uint32_t t3 = ((t & 0x000f) << 20) | ((t & 0x000f) << 16);
667  uint32_t ta = ( flags & TEXP_SCANLINE_SETALPHA ) ? 0xff000000 : (((t & 0xf000) << 16) | ((t & 0xf000) << 12));
668 
669  *(dPtr++) = t1 | t2 | t3 | ta;
670  }
671  return true;
672  }
673  return false;
674  }
675 
676  return false;
677 }
678 
679 
680 //-------------------------------------------------------------------------------------
681 // Loads an image row into standard RGBA XMVECTOR (aligned) array
682 //-------------------------------------------------------------------------------------
683 #define LOAD_SCANLINE( type, func )\
684  if ( size >= sizeof(type) )\
685  {\
686  const type * __restrict sPtr = reinterpret_cast<const type*>(pSource);\
687  for( size_t icount = 0; icount < ( size - sizeof(type) + 1 ); icount += sizeof(type) )\
688  {\
689  if ( dPtr >= ePtr ) break;\
690  *(dPtr++) = func( sPtr++ );\
691  }\
692  return true;\
693  }\
694  return false;
695 
696 #define LOAD_SCANLINE3( type, func, defvec )\
697  if ( size >= sizeof(type) )\
698  {\
699  const type * __restrict sPtr = reinterpret_cast<const type*>(pSource);\
700  for( size_t icount = 0; icount < ( size - sizeof(type) + 1 ); icount += sizeof(type) )\
701  {\
702  XMVECTOR v = func( sPtr++ );\
703  if ( dPtr >= ePtr ) break;\
704  *(dPtr++) = XMVectorSelect( defvec, v, g_XMSelect1110 );\
705  }\
706  return true;\
707  }\
708  return false;
709 
710 #define LOAD_SCANLINE2( type, func, defvec )\
711  if ( size >= sizeof(type) )\
712  {\
713  const type * __restrict sPtr = reinterpret_cast<const type*>(pSource);\
714  for( size_t icount = 0; icount < ( size - sizeof(type) + 1 ); icount += sizeof(type) )\
715  {\
716  XMVECTOR v = func( sPtr++ );\
717  if ( dPtr >= ePtr ) break;\
718  *(dPtr++) = XMVectorSelect( defvec, v, g_XMSelect1100 );\
719  }\
720  return true;\
721  }\
722  return false;
723 
724 _Use_decl_annotations_
725 bool _LoadScanline( XMVECTOR* pDestination, size_t count,
726  LPCVOID pSource, size_t size, DXGI_FORMAT format )
727 {
728  assert( pDestination && count > 0 && (((uintptr_t)pDestination & 0xF) == 0) );
729  assert( pSource && size > 0 );
730  assert( IsValid(format) && !IsTypeless(format, false) && !IsCompressed(format) && !IsPlanar(format) && !IsPalettized(format) );
731 
732  XMVECTOR* __restrict dPtr = pDestination;
733  if ( !dPtr )
734  return false;
735 
736  const XMVECTOR* ePtr = pDestination + count;
737 
738  switch( static_cast<int>(format) )
739  {
740  case DXGI_FORMAT_R32G32B32A32_FLOAT:
741  {
742  size_t msize = (size > (sizeof(XMVECTOR)*count)) ? (sizeof(XMVECTOR)*count) : size;
743  memcpy_s( dPtr, sizeof(XMVECTOR)*count, pSource, msize );
744  }
745  return true;
746 
747  case DXGI_FORMAT_R32G32B32A32_UINT:
748  LOAD_SCANLINE( XMUINT4, XMLoadUInt4 )
749 
750  case DXGI_FORMAT_R32G32B32A32_SINT:
751  LOAD_SCANLINE( XMINT4, XMLoadSInt4 )
752 
753  case DXGI_FORMAT_R32G32B32_FLOAT:
754  LOAD_SCANLINE3( XMFLOAT3, XMLoadFloat3, g_XMIdentityR3 )
755 
756  case DXGI_FORMAT_R32G32B32_UINT:
757  LOAD_SCANLINE3( XMUINT3, XMLoadUInt3, g_XMIdentityR3 )
758 
759  case DXGI_FORMAT_R32G32B32_SINT:
760  LOAD_SCANLINE3( XMINT3, XMLoadSInt3, g_XMIdentityR3 )
761 
762  case DXGI_FORMAT_R16G16B16A16_FLOAT:
763  LOAD_SCANLINE( XMHALF4, XMLoadHalf4 )
764 
765  case DXGI_FORMAT_R16G16B16A16_UNORM:
766  LOAD_SCANLINE( XMUSHORTN4, XMLoadUShortN4 )
767 
768  case DXGI_FORMAT_R16G16B16A16_UINT:
769  LOAD_SCANLINE( XMUSHORT4, XMLoadUShort4 )
770 
771  case DXGI_FORMAT_R16G16B16A16_SNORM:
772  LOAD_SCANLINE( XMSHORTN4, XMLoadShortN4 )
773 
774  case DXGI_FORMAT_R16G16B16A16_SINT:
775  LOAD_SCANLINE( XMSHORT4, XMLoadShort4 )
776 
777  case DXGI_FORMAT_R32G32_FLOAT:
778  LOAD_SCANLINE2( XMFLOAT2, XMLoadFloat2, g_XMIdentityR3 )
779 
780  case DXGI_FORMAT_R32G32_UINT:
781  LOAD_SCANLINE2( XMUINT2, XMLoadUInt2, g_XMIdentityR3 )
782 
783  case DXGI_FORMAT_R32G32_SINT:
784  LOAD_SCANLINE2( XMINT2, XMLoadSInt2, g_XMIdentityR3 )
785 
786  case DXGI_FORMAT_D32_FLOAT_S8X24_UINT:
787  {
788  const size_t psize = sizeof(float)+sizeof(uint32_t);
789  if ( size >= psize )
790  {
791  const float * sPtr = reinterpret_cast<const float*>(pSource);
792  for( size_t icount = 0; icount < ( size - psize + 1 ); icount += psize )
793  {
794  const uint8_t* ps8 = reinterpret_cast<const uint8_t*>( &sPtr[1] );
795  if ( dPtr >= ePtr ) break;
796  *(dPtr++) = XMVectorSet( sPtr[0], static_cast<float>( *ps8 ), 0.f, 1.f );
797  sPtr += 2;
798  }
799  return true;
800  }
801  }
802  return false;
803 
804  case DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS:
805  {
806  const size_t psize = sizeof(float)+sizeof(uint32_t);
807  if ( size >= psize )
808  {
809  const float * sPtr = reinterpret_cast<const float*>(pSource);
810  for( size_t icount = 0; icount < ( size - psize + 1 ); icount += psize )
811  {
812  if ( dPtr >= ePtr ) break;
813  *(dPtr++) = XMVectorSet( sPtr[0], 0.f /* typeless component assumed zero */, 0.f, 1.f );
814  sPtr += 2;
815  }
816  return true;
817  }
818  }
819  return false;
820 
821  case DXGI_FORMAT_X32_TYPELESS_G8X24_UINT:
822  {
823  const size_t psize = sizeof(float)+sizeof(uint32_t);
824  if ( size >= psize )
825  {
826  const float * sPtr = reinterpret_cast<const float*>(pSource);
827  for( size_t icount = 0; icount < ( size - psize + 1 ); icount += psize )
828  {
829  const uint8_t* pg8 = reinterpret_cast<const uint8_t*>( &sPtr[1] );
830  if ( dPtr >= ePtr ) break;
831  *(dPtr++) = XMVectorSet( 0.f /* typeless component assumed zero */, static_cast<float>( *pg8 ), 0.f, 1.f );
832  sPtr += 2;
833  }
834  return true;
835  }
836  }
837  return false;
838 
839  case DXGI_FORMAT_R10G10B10A2_UNORM:
840  LOAD_SCANLINE( XMUDECN4, XMLoadUDecN4 );
841 
842  case DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM:
843 #if DIRECTX_MATH_VERSION >= 306
844  LOAD_SCANLINE( XMUDECN4, XMLoadUDecN4_XR );
845 #else
846  if ( size >= sizeof(XMUDECN4) )
847  {
848  const XMUDECN4 * __restrict sPtr = reinterpret_cast<const XMUDECN4*>(pSource);
849  for( size_t icount = 0; icount < ( size - sizeof(XMUDECN4) + 1 ); icount += sizeof(XMUDECN4) )
850  {
851  if ( dPtr >= ePtr ) break;
852 
853  int32_t ElementX = sPtr->v & 0x3FF;
854  int32_t ElementY = (sPtr->v >> 10) & 0x3FF;
855  int32_t ElementZ = (sPtr->v >> 20) & 0x3FF;
856 
857  XMVECTORF32 vResult = {
858  (float)(ElementX - 0x180) / 510.0f,
859  (float)(ElementY - 0x180) / 510.0f,
860  (float)(ElementZ - 0x180) / 510.0f,
861  (float)(sPtr->v >> 30) / 3.0f
862  };
863 
864  ++sPtr;
865 
866  *(dPtr++) = vResult.v;
867  }
868  return true;
869  }
870  return false;
871 #endif
872 
873  case DXGI_FORMAT_R10G10B10A2_UINT:
874  LOAD_SCANLINE( XMUDEC4, XMLoadUDec4 );
875 
876  case DXGI_FORMAT_R11G11B10_FLOAT:
877  LOAD_SCANLINE3( XMFLOAT3PK, XMLoadFloat3PK, g_XMIdentityR3 );
878 
879  case DXGI_FORMAT_R8G8B8A8_UNORM:
880  case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB:
881  LOAD_SCANLINE( XMUBYTEN4, XMLoadUByteN4 )
882 
883  case DXGI_FORMAT_R8G8B8A8_UINT:
884  LOAD_SCANLINE( XMUBYTE4, XMLoadUByte4 )
885 
886  case DXGI_FORMAT_R8G8B8A8_SNORM:
887  LOAD_SCANLINE( XMBYTEN4, XMLoadByteN4 )
888 
889  case DXGI_FORMAT_R8G8B8A8_SINT:
890  LOAD_SCANLINE( XMBYTE4, XMLoadByte4 )
891 
892  case DXGI_FORMAT_R16G16_FLOAT:
893  LOAD_SCANLINE2( XMHALF2, XMLoadHalf2, g_XMIdentityR3 )
894 
895  case DXGI_FORMAT_R16G16_UNORM:
896  LOAD_SCANLINE2( XMUSHORTN2, XMLoadUShortN2, g_XMIdentityR3 )
897 
898  case DXGI_FORMAT_R16G16_UINT:
899  LOAD_SCANLINE2( XMUSHORT2, XMLoadUShort2, g_XMIdentityR3 )
900 
901  case DXGI_FORMAT_R16G16_SNORM:
902  LOAD_SCANLINE2( XMSHORTN2, XMLoadShortN2, g_XMIdentityR3 )
903 
904  case DXGI_FORMAT_R16G16_SINT:
905  LOAD_SCANLINE2( XMSHORT2, XMLoadShort2, g_XMIdentityR3 )
906 
907  case DXGI_FORMAT_D32_FLOAT:
908  case DXGI_FORMAT_R32_FLOAT:
909  if ( size >= sizeof(float) )
910  {
911  const float* __restrict sPtr = reinterpret_cast<const float*>(pSource);
912  for( size_t icount = 0; icount < ( size - sizeof(float) + 1 ); icount += sizeof(float) )
913  {
914  XMVECTOR v = XMLoadFloat( sPtr++ );
915  if ( dPtr >= ePtr ) break;
916  *(dPtr++) = XMVectorSelect( g_XMIdentityR3, v, g_XMSelect1000 );
917  }
918  return true;
919  }
920  return false;
921 
922  case DXGI_FORMAT_R32_UINT:
923  if ( size >= sizeof(uint32_t) )
924  {
925  const uint32_t* __restrict sPtr = reinterpret_cast<const uint32_t*>(pSource);
926  for( size_t icount = 0; icount < ( size - sizeof(uint32_t) + 1 ); icount += sizeof(uint32_t) )
927  {
928  XMVECTOR v = XMLoadInt( sPtr++ );
929  v = XMConvertVectorUIntToFloat( v, 0 );
930  if ( dPtr >= ePtr ) break;
931  *(dPtr++) = XMVectorSelect( g_XMIdentityR3, v, g_XMSelect1000 );
932  }
933  return true;
934  }
935  return false;
936 
937  case DXGI_FORMAT_R32_SINT:
938  if ( size >= sizeof(int32_t) )
939  {
940  const int32_t * __restrict sPtr = reinterpret_cast<const int32_t*>(pSource);
941  for( size_t icount = 0; icount < ( size - sizeof(int32_t) + 1 ); icount += sizeof(int32_t) )
942  {
943  XMVECTOR v = XMLoadInt( reinterpret_cast<const uint32_t*> (sPtr++) );
944  v = XMConvertVectorIntToFloat( v, 0 );
945  if ( dPtr >= ePtr ) break;
946  *(dPtr++) = XMVectorSelect( g_XMIdentityR3, v, g_XMSelect1000 );
947  }
948  return true;
949  }
950  return false;
951 
952  case DXGI_FORMAT_D24_UNORM_S8_UINT:
953  if ( size >= sizeof(uint32_t) )
954  {
955  const uint32_t * sPtr = reinterpret_cast<const uint32_t*>(pSource);
956  for( size_t icount = 0; icount < ( size - sizeof(uint32_t) + 1 ); icount += sizeof(uint32_t) )
957  {
958  float d = static_cast<float>( *sPtr & 0xFFFFFF ) / 16777215.f;
959  float s = static_cast<float>( ( *sPtr & 0xFF000000 ) >> 24 );
960  ++sPtr;
961  if ( dPtr >= ePtr ) break;
962  *(dPtr++) = XMVectorSet( d, s, 0.f, 1.f );
963  }
964  return true;
965  }
966  return false;
967 
968  case DXGI_FORMAT_R24_UNORM_X8_TYPELESS:
969  if ( size >= sizeof(uint32_t) )
970  {
971  const uint32_t * sPtr = reinterpret_cast<const uint32_t*>(pSource);
972  for( size_t icount = 0; icount < ( size - sizeof(uint32_t) + 1 ); icount += sizeof(uint32_t) )
973  {
974  float r = static_cast<float>( *sPtr & 0xFFFFFF ) / 16777215.f;
975  ++sPtr;
976  if ( dPtr >= ePtr ) break;
977  *(dPtr++) = XMVectorSet( r, 0.f /* typeless component assumed zero */, 0.f, 1.f );
978  }
979  return true;
980  }
981  return false;
982 
983  case DXGI_FORMAT_X24_TYPELESS_G8_UINT:
984  if ( size >= sizeof(uint32_t) )
985  {
986  const uint32_t * sPtr = reinterpret_cast<const uint32_t*>(pSource);
987  for( size_t icount = 0; icount < ( size - sizeof(uint32_t) + 1 ); icount += sizeof(uint32_t) )
988  {
989  float g = static_cast<float>( ( *sPtr & 0xFF000000 ) >> 24 );
990  ++sPtr;
991  if ( dPtr >= ePtr ) break;
992  *(dPtr++) = XMVectorSet( 0.f /* typeless component assumed zero */, g, 0.f, 1.f );
993  }
994  return true;
995  }
996  return false;
997 
998  case DXGI_FORMAT_R8G8_UNORM:
999  LOAD_SCANLINE2( XMUBYTEN2, XMLoadUByteN2, g_XMIdentityR3 )
1000 
1001  case DXGI_FORMAT_R8G8_UINT:
1002  LOAD_SCANLINE2( XMUBYTE2, XMLoadUByte2, g_XMIdentityR3 )
1003 
1004  case DXGI_FORMAT_R8G8_SNORM:
1005  LOAD_SCANLINE2( XMBYTEN2, XMLoadByteN2, g_XMIdentityR3 )
1006 
1007  case DXGI_FORMAT_R8G8_SINT:
1008  LOAD_SCANLINE2( XMBYTE2, XMLoadByte2, g_XMIdentityR3 )
1009 
1010  case DXGI_FORMAT_R16_FLOAT:
1011  if ( size >= sizeof(HALF) )
1012  {
1013  const HALF * __restrict sPtr = reinterpret_cast<const HALF*>(pSource);
1014  for( size_t icount = 0; icount < ( size - sizeof(HALF) + 1 ); icount += sizeof(HALF) )
1015  {
1016  if ( dPtr >= ePtr ) break;
1017  *(dPtr++) = XMVectorSet( XMConvertHalfToFloat(*sPtr++), 0.f, 0.f, 1.f );
1018  }
1019  return true;
1020  }
1021  return false;
1022 
1023  case DXGI_FORMAT_D16_UNORM:
1024  case DXGI_FORMAT_R16_UNORM:
1025  if ( size >= sizeof(uint16_t) )
1026  {
1027  const uint16_t* __restrict sPtr = reinterpret_cast<const uint16_t*>(pSource);
1028  for( size_t icount = 0; icount < ( size - sizeof(uint16_t) + 1 ); icount += sizeof(uint16_t) )
1029  {
1030  if ( dPtr >= ePtr ) break;
1031  *(dPtr++) = XMVectorSet( static_cast<float>(*sPtr++) / 65535.f, 0.f, 0.f, 1.f );
1032  }
1033  return true;
1034  }
1035  return false;
1036 
1037  case DXGI_FORMAT_R16_UINT:
1038  if ( size >= sizeof(uint16_t) )
1039  {
1040  const uint16_t * __restrict sPtr = reinterpret_cast<const uint16_t*>(pSource);
1041  for( size_t icount = 0; icount < ( size - sizeof(uint16_t) + 1 ); icount += sizeof(uint16_t) )
1042  {
1043  if ( dPtr >= ePtr ) break;
1044  *(dPtr++) = XMVectorSet( static_cast<float>(*sPtr++), 0.f, 0.f, 1.f );
1045  }
1046  return true;
1047  }
1048  return false;
1049 
1050  case DXGI_FORMAT_R16_SNORM:
1051  if ( size >= sizeof(int16_t) )
1052  {
1053  const int16_t * __restrict sPtr = reinterpret_cast<const int16_t*>(pSource);
1054  for( size_t icount = 0; icount < ( size - sizeof(int16_t) + 1 ); icount += sizeof(int16_t) )
1055  {
1056  if ( dPtr >= ePtr ) break;
1057  *(dPtr++) = XMVectorSet( static_cast<float>(*sPtr++) / 32767.f, 0.f, 0.f, 1.f );
1058  }
1059  return true;
1060  }
1061  return false;
1062 
1063  case DXGI_FORMAT_R16_SINT:
1064  if ( size >= sizeof(int16_t) )
1065  {
1066  const int16_t * __restrict sPtr = reinterpret_cast<const int16_t*>(pSource);
1067  for( size_t icount = 0; icount < ( size - sizeof(int16_t) + 1 ); icount += sizeof(int16_t) )
1068  {
1069  if ( dPtr >= ePtr ) break;
1070  *(dPtr++) = XMVectorSet( static_cast<float>(*sPtr++), 0.f, 0.f, 1.f );
1071  }
1072  return true;
1073  }
1074  return false;
1075 
1076  case DXGI_FORMAT_R8_UNORM:
1077  if ( size >= sizeof(uint8_t) )
1078  {
1079  const uint8_t * __restrict sPtr = reinterpret_cast<const uint8_t*>(pSource);
1080  for( size_t icount = 0; icount < size; icount += sizeof(uint8_t) )
1081  {
1082  if ( dPtr >= ePtr ) break;
1083  *(dPtr++) = XMVectorSet( static_cast<float>(*sPtr++) / 255.f, 0.f, 0.f, 1.f );
1084  }
1085  return true;
1086  }
1087  return false;
1088 
1089  case DXGI_FORMAT_R8_UINT:
1090  if ( size >= sizeof(uint8_t) )
1091  {
1092  const uint8_t * __restrict sPtr = reinterpret_cast<const uint8_t*>(pSource);
1093  for( size_t icount = 0; icount < size; icount += sizeof(uint8_t) )
1094  {
1095  if ( dPtr >= ePtr ) break;
1096  *(dPtr++) = XMVectorSet( static_cast<float>(*sPtr++), 0.f, 0.f, 1.f );
1097  }
1098  return true;
1099  }
1100  return false;
1101 
1102  case DXGI_FORMAT_R8_SNORM:
1103  if ( size >= sizeof(int8_t) )
1104  {
1105  const int8_t * __restrict sPtr = reinterpret_cast<const int8_t*>(pSource);
1106  for( size_t icount = 0; icount < size; icount += sizeof(int8_t) )
1107  {
1108  if ( dPtr >= ePtr ) break;
1109  *(dPtr++) = XMVectorSet( static_cast<float>(*sPtr++) / 127.f, 0.f, 0.f, 1.f );
1110  }
1111  return true;
1112  }
1113  return false;
1114 
1115  case DXGI_FORMAT_R8_SINT:
1116  if ( size >= sizeof(int8_t) )
1117  {
1118  const int8_t * __restrict sPtr = reinterpret_cast<const int8_t*>(pSource);
1119  for( size_t icount = 0; icount < size; icount += sizeof(int8_t) )
1120  {
1121  if ( dPtr >= ePtr ) break;
1122  *(dPtr++) = XMVectorSet( static_cast<float>(*sPtr++), 0.f, 0.f, 1.f );
1123  }
1124  return true;
1125  }
1126  return false;
1127 
1128  case DXGI_FORMAT_A8_UNORM:
1129  if ( size >= sizeof(uint8_t) )
1130  {
1131  const uint8_t * __restrict sPtr = reinterpret_cast<const uint8_t*>(pSource);
1132  for( size_t icount = 0; icount < size; icount += sizeof(uint8_t) )
1133  {
1134  if ( dPtr >= ePtr ) break;
1135  *(dPtr++) = XMVectorSet( 0.f, 0.f, 0.f, static_cast<float>(*sPtr++) / 255.f );
1136  }
1137  return true;
1138  }
1139  return false;
1140 
1141  case DXGI_FORMAT_R1_UNORM:
1142  if ( size >= sizeof(uint8_t) )
1143  {
1144  const uint8_t * __restrict sPtr = reinterpret_cast<const uint8_t*>(pSource);
1145  for( size_t icount = 0; icount < size; icount += sizeof(uint8_t) )
1146  {
1147  for( size_t bcount = 8; bcount > 0; --bcount )
1148  {
1149  if ( dPtr >= ePtr ) break;
1150  *(dPtr++) = XMVectorSet( (((*sPtr >> (bcount-1)) & 0x1) ? 1.f : 0.f), 0.f, 0.f, 1.f );
1151  }
1152 
1153  ++sPtr;
1154  }
1155  return true;
1156  }
1157  return false;
1158 
1159  case DXGI_FORMAT_R9G9B9E5_SHAREDEXP:
1160 #if DIRECTX_MATH_VERSION >= 306
1161  LOAD_SCANLINE3( XMFLOAT3SE, XMLoadFloat3SE, g_XMIdentityR3 )
1162 #else
1163  if ( size >= sizeof(XMFLOAT3SE) )
1164  {
1165  const XMFLOAT3SE * __restrict sPtr = reinterpret_cast<const XMFLOAT3SE*>(pSource);
1166  for( size_t icount = 0; icount < ( size - sizeof(XMFLOAT3SE) + 1 ); icount += sizeof(XMFLOAT3SE) )
1167  {
1168  union { float f; int32_t i; } fi;
1169  fi.i = 0x33800000 + (sPtr->e << 23);
1170  float Scale = fi.f;
1171 
1172  XMVECTORF32 v = {
1173  Scale * float( sPtr->xm ),
1174  Scale * float( sPtr->ym ),
1175  Scale * float( sPtr->zm ),
1176  1.0f };
1177 
1178  if ( dPtr >= ePtr ) break;
1179  *(dPtr++) = v;
1180  }
1181  return true;
1182  }
1183  return false;
1184 #endif
1185 
1186  case DXGI_FORMAT_R8G8_B8G8_UNORM:
1187  if ( size >= sizeof(XMUBYTEN4) )
1188  {
1189  const XMUBYTEN4 * __restrict sPtr = reinterpret_cast<const XMUBYTEN4*>(pSource);
1190  for( size_t icount = 0; icount < ( size - sizeof(XMUBYTEN4) + 1 ); icount += sizeof(XMUBYTEN4) )
1191  {
1192  XMVECTOR v = XMLoadUByteN4( sPtr++ );
1193  XMVECTOR v1 = XMVectorSwizzle<0, 3, 2, 1>( v );
1194  if ( dPtr >= ePtr ) break;
1195  *(dPtr++) = XMVectorSelect( g_XMIdentityR3, v, g_XMSelect1110 );
1196  if ( dPtr >= ePtr ) break;
1197  *(dPtr++) = XMVectorSelect( g_XMIdentityR3, v1, g_XMSelect1110 );
1198  }
1199  return true;
1200  }
1201  return false;
1202 
1203  case DXGI_FORMAT_G8R8_G8B8_UNORM:
1204  if ( size >= sizeof(XMUBYTEN4) )
1205  {
1206  const XMUBYTEN4 * __restrict sPtr = reinterpret_cast<const XMUBYTEN4*>(pSource);
1207  for( size_t icount = 0; icount < ( size - sizeof(XMUBYTEN4) + 1 ); icount += sizeof(XMUBYTEN4) )
1208  {
1209  XMVECTOR v = XMLoadUByteN4( sPtr++ );
1210  XMVECTOR v0 = XMVectorSwizzle<1, 0, 3, 2>( v );
1211  XMVECTOR v1 = XMVectorSwizzle<1, 2, 3, 0>( v );
1212  if ( dPtr >= ePtr ) break;
1213  *(dPtr++) = XMVectorSelect( g_XMIdentityR3, v0, g_XMSelect1110 );
1214  if ( dPtr >= ePtr ) break;
1215  *(dPtr++) = XMVectorSelect( g_XMIdentityR3, v1, g_XMSelect1110 );
1216  }
1217  return true;
1218  }
1219  return false;
1220 
1221  case DXGI_FORMAT_B5G6R5_UNORM:
1222  if ( size >= sizeof(XMU565) )
1223  {
1224  static XMVECTORF32 s_Scale = { 1.f/31.f, 1.f/63.f, 1.f/31.f, 1.f };
1225  const XMU565 * __restrict sPtr = reinterpret_cast<const XMU565*>(pSource);
1226  for( size_t icount = 0; icount < ( size - sizeof(XMU565) + 1 ); icount += sizeof(XMU565) )
1227  {
1228  XMVECTOR v = XMLoadU565( sPtr++ );
1229  v = XMVectorMultiply( v, s_Scale );
1230  v = XMVectorSwizzle<2, 1, 0, 3>( v );
1231  if ( dPtr >= ePtr ) break;
1232  *(dPtr++) = XMVectorSelect( g_XMIdentityR3, v, g_XMSelect1110 );
1233  }
1234  return true;
1235  }
1236  return false;
1237 
1238  case DXGI_FORMAT_B5G5R5A1_UNORM:
1239  if ( size >= sizeof(XMU555) )
1240  {
1241  static XMVECTORF32 s_Scale = { 1.f/31.f, 1.f/31.f, 1.f/31.f, 1.f };
1242  const XMU555 * __restrict sPtr = reinterpret_cast<const XMU555*>(pSource);
1243  for( size_t icount = 0; icount < ( size - sizeof(XMU555) + 1 ); icount += sizeof(XMU555) )
1244  {
1245  XMVECTOR v = XMLoadU555( sPtr++ );
1246  v = XMVectorMultiply( v, s_Scale );
1247  if ( dPtr >= ePtr ) break;
1248  *(dPtr++) = XMVectorSwizzle<2, 1, 0, 3>( v );
1249  }
1250  return true;
1251  }
1252  return false;
1253 
1254  case DXGI_FORMAT_B8G8R8A8_UNORM:
1255  case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB:
1256  if ( size >= sizeof(XMUBYTEN4) )
1257  {
1258  const XMUBYTEN4 * __restrict sPtr = reinterpret_cast<const XMUBYTEN4*>(pSource);
1259  for( size_t icount = 0; icount < ( size - sizeof(XMUBYTEN4) + 1 ); icount += sizeof(XMUBYTEN4) )
1260  {
1261  XMVECTOR v = XMLoadUByteN4( sPtr++ );
1262  if ( dPtr >= ePtr ) break;
1263  *(dPtr++) = XMVectorSwizzle<2, 1, 0, 3>( v );
1264  }
1265  return true;
1266  }
1267  return false;
1268 
1269  case DXGI_FORMAT_B8G8R8X8_UNORM:
1270  case DXGI_FORMAT_B8G8R8X8_UNORM_SRGB:
1271  if ( size >= sizeof(XMUBYTEN4) )
1272  {
1273  const XMUBYTEN4 * __restrict sPtr = reinterpret_cast<const XMUBYTEN4*>(pSource);
1274  for( size_t icount = 0; icount < ( size - sizeof(XMUBYTEN4) + 1 ); icount += sizeof(XMUBYTEN4) )
1275  {
1276  XMVECTOR v = XMLoadUByteN4( sPtr++ );
1277  v = XMVectorSwizzle<2, 1, 0, 3>( v );
1278  if ( dPtr >= ePtr ) break;
1279  *(dPtr++) = XMVectorSelect( g_XMIdentityR3, v, g_XMSelect1110 );
1280  }
1281  return true;
1282  }
1283  return false;
1284 
1285  case DXGI_FORMAT_AYUV:
1286  if ( size >= sizeof(XMUBYTEN4) )
1287  {
1288  const XMUBYTEN4 * __restrict sPtr = reinterpret_cast<const XMUBYTEN4*>(pSource);
1289  for( size_t icount = 0; icount < ( size - sizeof(XMUBYTEN4) + 1 ); icount += sizeof(XMUBYTEN4) )
1290  {
1291  int v = int(sPtr->x) - 128;
1292  int u = int(sPtr->y) - 128;
1293  int y = int(sPtr->z) - 16;
1294  unsigned int a = sPtr->w;
1295  ++sPtr;
1296 
1297  // http://msdn.microsoft.com/en-us/library/windows/desktop/dd206750.aspx
1298 
1299  // Y’ = Y - 16
1300  // Cb’ = Cb - 128
1301  // Cr’ = Cr - 128
1302 
1303  // R = 1.1644Y’ + 1.5960Cr’
1304  // G = 1.1644Y’ - 0.3917Cb’ - 0.8128Cr’
1305  // B = 1.1644Y’ + 2.0172Cb’
1306 
1307  int r = (298 * y + 409 * v + 128) >> 8;
1308  int g = (298 * y - 100 * u - 208 * v + 128) >> 8;
1309  int b = (298 * y + 516 * u + 128) >> 8;
1310 
1311  if ( dPtr >= ePtr ) break;
1312  *(dPtr++) = XMVectorSet( float( std::min<int>( std::max<int>( r, 0 ), 255 ) ) / 255.f,
1313  float( std::min<int>( std::max<int>( g, 0 ), 255 ) ) / 255.f,
1314  float( std::min<int>( std::max<int>( b, 0 ), 255 ) ) / 255.f,
1315  float( a / 255.f ) );
1316  }
1317  return true;
1318  }
1319  return false;
1320 
1321  case DXGI_FORMAT_Y410:
1322  if ( size >= sizeof(XMUDECN4) )
1323  {
1324  const XMUDECN4 * __restrict sPtr = reinterpret_cast<const XMUDECN4*>(pSource);
1325  for( size_t icount = 0; icount < ( size - sizeof(XMUDECN4) + 1 ); icount += sizeof(XMUDECN4) )
1326  {
1327  int64_t u = int(sPtr->x) - 512;
1328  int64_t y = int(sPtr->y) - 64;
1329  int64_t v = int(sPtr->z) - 512;
1330  unsigned int a = sPtr->w;
1331  ++sPtr;
1332 
1333  // http://msdn.microsoft.com/en-us/library/windows/desktop/bb970578.aspx
1334 
1335  // Y’ = Y - 64
1336  // Cb’ = Cb - 512
1337  // Cr’ = Cr - 512
1338 
1339  // R = 1.1678Y’ + 1.6007Cr’
1340  // G = 1.1678Y’ - 0.3929Cb’ - 0.8152Cr’
1341  // B = 1.1678Y’ + 2.0232Cb’
1342 
1343  int r = static_cast<int>( (76533 * y + 104905 * v + 32768) >> 16 );
1344  int g = static_cast<int>( (76533 * y - 25747 * u - 53425 * v + 32768) >> 16 );
1345  int b = static_cast<int>( (76533 * y + 132590 * u + 32768) >> 16 );
1346 
1347  if ( dPtr >= ePtr ) break;
1348  *(dPtr++) = XMVectorSet( float( std::min<int>( std::max<int>( r, 0 ), 1023 ) ) / 1023.f,
1349  float( std::min<int>( std::max<int>( g, 0 ), 1023 ) ) / 1023.f,
1350  float( std::min<int>( std::max<int>( b, 0 ), 1023 ) ) / 1023.f,
1351  float( a / 3.f ) );
1352  }
1353  return true;
1354  }
1355  return false;
1356 
1357  case DXGI_FORMAT_Y416:
1358  if ( size >= sizeof(XMUSHORTN4) )
1359  {
1360  const XMUSHORTN4 * __restrict sPtr = reinterpret_cast<const XMUSHORTN4*>(pSource);
1361  for( size_t icount = 0; icount < ( size - sizeof(XMUSHORTN4) + 1 ); icount += sizeof(XMUSHORTN4) )
1362  {
1363  int64_t u = int64_t(sPtr->x) - 32768;
1364  int64_t y = int64_t(sPtr->y) - 4096;
1365  int64_t v = int64_t(sPtr->z) - 32768;
1366  unsigned int a = sPtr->w;
1367  ++sPtr;
1368 
1369  // http://msdn.microsoft.com/en-us/library/windows/desktop/bb970578.aspx
1370 
1371  // Y’ = Y - 4096
1372  // Cb’ = Cb - 32768
1373  // Cr’ = Cr - 32768
1374 
1375  // R = 1.1689Y’ + 1.6023Cr’
1376  // G = 1.1689Y’ - 0.3933Cb’ - 0.8160Cr’
1377  // B = 1.1689Y’+ 2.0251Cb’
1378 
1379  int r = static_cast<int>( (76607 * y + 105006 * v + 32768) >> 16 );
1380  int g = static_cast<int>( (76607 * y - 25772 * u - 53477 * v + 32768) >> 16 );
1381  int b = static_cast<int>( (76607 * y + 132718 * u + 32768) >> 16 );
1382 
1383  if ( dPtr >= ePtr ) break;
1384  *(dPtr++) = XMVectorSet( float( std::min<int>( std::max<int>( r, 0 ), 65535 ) ) / 65535.f,
1385  float( std::min<int>( std::max<int>( g, 0 ), 65535 ) ) / 65535.f,
1386  float( std::min<int>( std::max<int>( b, 0 ), 65535 ) ) / 65535.f,
1387  float( std::min<int>( std::max<int>( a, 0 ), 65535 ) ) / 65535.f );
1388  }
1389  return true;
1390  }
1391  return false;
1392 
1393  case DXGI_FORMAT_YUY2:
1394  if ( size >= sizeof(XMUBYTEN4) )
1395  {
1396  const XMUBYTEN4 * __restrict sPtr = reinterpret_cast<const XMUBYTEN4*>(pSource);
1397  for( size_t icount = 0; icount < ( size - sizeof(XMUBYTEN4) + 1 ); icount += sizeof(XMUBYTEN4) )
1398  {
1399  int y0 = int(sPtr->x) - 16;
1400  int u = int(sPtr->y) - 128;
1401  int y1 = int(sPtr->z) - 16;
1402  int v = int(sPtr->w) - 128;
1403  ++sPtr;
1404 
1405  // See AYUV
1406  int r = (298 * y0 + 409 * v + 128) >> 8;
1407  int g = (298 * y0 - 100 * u - 208 * v + 128) >> 8;
1408  int b = (298 * y0 + 516 * u + 128) >> 8;
1409 
1410  if ( dPtr >= ePtr ) break;
1411  *(dPtr++) = XMVectorSet( float( std::min<int>( std::max<int>( r, 0 ), 255 ) ) / 255.f,
1412  float( std::min<int>( std::max<int>( g, 0 ), 255 ) ) / 255.f,
1413  float( std::min<int>( std::max<int>( b, 0 ), 255 ) ) / 255.f,
1414  1.f );
1415 
1416  r = (298 * y1 + 409 * v + 128) >> 8;
1417  g = (298 * y1 - 100 * u - 208 * v + 128) >> 8;
1418  b = (298 * y1 + 516 * u + 128) >> 8;
1419 
1420  if ( dPtr >= ePtr ) break;
1421  *(dPtr++) = XMVectorSet( float( std::min<int>( std::max<int>( r, 0 ), 255 ) ) / 255.f,
1422  float( std::min<int>( std::max<int>( g, 0 ), 255 ) ) / 255.f,
1423  float( std::min<int>( std::max<int>( b, 0 ), 255 ) ) / 255.f,
1424  1.f );
1425  }
1426  return true;
1427  }
1428  return false;
1429 
1430  case DXGI_FORMAT_Y210:
1431  // Same as Y216 with least significant 6 bits set to zero
1432  if ( size >= sizeof(XMUSHORTN4) )
1433  {
1434  const XMUSHORTN4 * __restrict sPtr = reinterpret_cast<const XMUSHORTN4*>(pSource);
1435  for( size_t icount = 0; icount < ( size - sizeof(XMUSHORTN4) + 1 ); icount += sizeof(XMUSHORTN4) )
1436  {
1437  int64_t y0 = int64_t(sPtr->x >> 6) - 64;
1438  int64_t u = int64_t(sPtr->y >> 6) - 512;
1439  int64_t y1 = int64_t(sPtr->z >> 6) - 64;
1440  int64_t v = int64_t(sPtr->w >> 6) - 512;
1441  ++sPtr;
1442 
1443  // See Y410
1444  int r = static_cast<int>( (76533 * y0 + 104905 * v + 32768) >> 16 );
1445  int g = static_cast<int>( (76533 * y0 - 25747 * u - 53425 * v + 32768) >> 16 );
1446  int b = static_cast<int>( (76533 * y0 + 132590 * u + 32768) >> 16 );
1447 
1448  if ( dPtr >= ePtr ) break;
1449  *(dPtr++) = XMVectorSet( float( std::min<int>( std::max<int>( r, 0 ), 1023 ) ) / 1023.f,
1450  float( std::min<int>( std::max<int>( g, 0 ), 1023 ) ) / 1023.f,
1451  float( std::min<int>( std::max<int>( b, 0 ), 1023 ) ) / 1023.f,
1452  1.f );
1453 
1454  r = static_cast<int>( (76533 * y1 + 104905 * v + 32768) >> 16 );
1455  g = static_cast<int>( (76533 * y1 - 25747 * u - 53425 * v + 32768) >> 16 );
1456  b = static_cast<int>( (76533 * y1 + 132590 * u + 32768) >> 16 );
1457 
1458  if ( dPtr >= ePtr ) break;
1459  *(dPtr++) = XMVectorSet( float( std::min<int>( std::max<int>( r, 0 ), 1023 ) ) / 1023.f,
1460  float( std::min<int>( std::max<int>( g, 0 ), 1023 ) ) / 1023.f,
1461  float( std::min<int>( std::max<int>( b, 0 ), 1023 ) ) / 1023.f,
1462  1.f );
1463  }
1464  return true;
1465  }
1466  return false;
1467 
1468  case DXGI_FORMAT_Y216:
1469  if ( size >= sizeof(XMUSHORTN4) )
1470  {
1471  const XMUSHORTN4 * __restrict sPtr = reinterpret_cast<const XMUSHORTN4*>(pSource);
1472  for( size_t icount = 0; icount < ( size - sizeof(XMUSHORTN4) + 1 ); icount += sizeof(XMUSHORTN4) )
1473  {
1474  int64_t y0 = int64_t(sPtr->x) - 4096;
1475  int64_t u = int64_t(sPtr->y) - 32768;
1476  int64_t y1 = int64_t(sPtr->z) - 4096;
1477  int64_t v = int64_t(sPtr->w) - 32768;
1478  ++sPtr;
1479 
1480  // See Y416
1481  int r = static_cast<int>( (76607 * y0 + 105006 * v + 32768) >> 16 );
1482  int g = static_cast<int>( (76607 * y0 - 25772 * u - 53477 * v + 32768) >> 16 );
1483  int b = static_cast<int>( (76607 * y0 + 132718 * u + 32768) >> 16 );
1484 
1485  if ( dPtr >= ePtr ) break;
1486  *(dPtr++) = XMVectorSet( float( std::min<int>( std::max<int>( r, 0 ), 65535 ) ) / 65535.f,
1487  float( std::min<int>( std::max<int>( g, 0 ), 65535 ) ) / 65535.f,
1488  float( std::min<int>( std::max<int>( b, 0 ), 65535 ) ) / 65535.f,
1489  1.f );
1490 
1491  r = static_cast<int>( (76607 * y1 + 105006 * v + 32768) >> 16 );
1492  g = static_cast<int>( (76607 * y1 - 25772 * u - 53477 * v + 32768) >> 16 );
1493  b = static_cast<int>( (76607 * y1 + 132718 * u + 32768) >> 16 );
1494 
1495  if ( dPtr >= ePtr ) break;
1496  *(dPtr++) = XMVectorSet( float( std::min<int>( std::max<int>( r, 0 ), 65535 ) ) / 65535.f,
1497  float( std::min<int>( std::max<int>( g, 0 ), 65535 ) ) / 65535.f,
1498  float( std::min<int>( std::max<int>( b, 0 ), 65535 ) ) / 65535.f,
1499  1.f );
1500  }
1501  return true;
1502  }
1503  return false;
1504 
1505  case DXGI_FORMAT_B4G4R4A4_UNORM:
1506  if ( size >= sizeof(XMUNIBBLE4) )
1507  {
1508  static XMVECTORF32 s_Scale = { 1.f/15.f, 1.f/15.f, 1.f/15.f, 1.f/15.f };
1509  const XMUNIBBLE4 * __restrict sPtr = reinterpret_cast<const XMUNIBBLE4*>(pSource);
1510  for( size_t icount = 0; icount < ( size - sizeof(XMUNIBBLE4) + 1 ); icount += sizeof(XMUNIBBLE4) )
1511  {
1512  XMVECTOR v = XMLoadUNibble4( sPtr++ );
1513  v = XMVectorMultiply( v, s_Scale );
1514  if ( dPtr >= ePtr ) break;
1515  *(dPtr++) = XMVectorSwizzle<2, 1, 0, 3>( v );
1516  }
1517  return true;
1518  }
1519  return false;
1520 
1521  case 116 /* DXGI_FORMAT_R10G10B10_7E3_A2_FLOAT */:
1522  // Xbox One specific 7e3 format
1523  if ( size >= sizeof(XMUDECN4) )
1524  {
1525  const XMUDECN4 * __restrict sPtr = reinterpret_cast<const XMUDECN4*>(pSource);
1526  for( size_t icount = 0; icount < ( size - sizeof(XMUDECN4) + 1 ); icount += sizeof(XMUDECN4) )
1527  {
1528  if ( dPtr >= ePtr ) break;
1529 
1530  XMVECTORF32 vResult = {
1531  FloatFrom7e3(sPtr->x),
1532  FloatFrom7e3(sPtr->y),
1533  FloatFrom7e3(sPtr->z),
1534  (float)(sPtr->v >> 30) / 3.0f
1535  };
1536 
1537  ++sPtr;
1538 
1539  *(dPtr++) = vResult.v;
1540  }
1541  return true;
1542  }
1543  return false;
1544 
1545  case 117 /* DXGI_FORMAT_R10G10B10_6E4_A2_FLOAT */:
1546  // Xbox One specific 6e4 format
1547  if ( size >= sizeof(XMUDECN4) )
1548  {
1549  const XMUDECN4 * __restrict sPtr = reinterpret_cast<const XMUDECN4*>(pSource);
1550  for( size_t icount = 0; icount < ( size - sizeof(XMUDECN4) + 1 ); icount += sizeof(XMUDECN4) )
1551  {
1552  if ( dPtr >= ePtr ) break;
1553 
1554  XMVECTORF32 vResult = {
1555  FloatFrom6e4(sPtr->x),
1556  FloatFrom6e4(sPtr->y),
1557  FloatFrom6e4(sPtr->z),
1558  (float)(sPtr->v >> 30) / 3.0f
1559  };
1560 
1561  ++sPtr;
1562 
1563  *(dPtr++) = vResult.v;
1564  }
1565  return true;
1566  }
1567  return false;
1568 
1569  // We don't support the planar or palettized formats
1570 
1571  default:
1572  return false;
1573  }
1574 }
1575 
1576 #undef LOAD_SCANLINE
1577 #undef LOAD_SCANLINE3
1578 #undef LOAD_SCANLINE2
1579 
1580 
1581 //-------------------------------------------------------------------------------------
1582 // Stores an image row from standard RGBA XMVECTOR (aligned) array
1583 //-------------------------------------------------------------------------------------
1584 #define STORE_SCANLINE( type, func )\
1585  if ( size >= sizeof(type) )\
1586  {\
1587  type * __restrict dPtr = reinterpret_cast<type*>(pDestination);\
1588  for( size_t icount = 0; icount < ( size - sizeof(type) + 1 ); icount += sizeof(type) )\
1589  {\
1590  if ( sPtr >= ePtr ) break;\
1591  func( dPtr++, *sPtr++ );\
1592  }\
1593  return true; \
1594  }\
1595  return false;
1596 
1597 _Use_decl_annotations_
1598 bool _StoreScanline( LPVOID pDestination, size_t size, DXGI_FORMAT format,
1599  const XMVECTOR* pSource, size_t count, float threshold )
1600 {
1601  assert( pDestination && size > 0 );
1602  assert( pSource && count > 0 && (((uintptr_t)pSource & 0xF) == 0) );
1603  assert( IsValid(format) && !IsTypeless(format) && !IsCompressed(format) && !IsPlanar(format) && !IsPalettized(format) );
1604 
1605  const XMVECTOR* __restrict sPtr = pSource;
1606  if ( !sPtr )
1607  return false;
1608 
1609  const XMVECTOR* ePtr = pSource + count;
1610 
1611  switch( static_cast<int>(format) )
1612  {
1613  case DXGI_FORMAT_R32G32B32A32_FLOAT:
1614  STORE_SCANLINE( XMFLOAT4, XMStoreFloat4 )
1615 
1616  case DXGI_FORMAT_R32G32B32A32_UINT:
1617  STORE_SCANLINE( XMUINT4, XMStoreUInt4 )
1618 
1619  case DXGI_FORMAT_R32G32B32A32_SINT:
1620  STORE_SCANLINE( XMINT4, XMStoreSInt4 )
1621 
1622  case DXGI_FORMAT_R32G32B32_FLOAT:
1623  STORE_SCANLINE( XMFLOAT3, XMStoreFloat3 )
1624 
1625  case DXGI_FORMAT_R32G32B32_UINT:
1626  STORE_SCANLINE( XMUINT3, XMStoreUInt3 )
1627 
1628  case DXGI_FORMAT_R32G32B32_SINT:
1629  STORE_SCANLINE( XMINT3, XMStoreSInt3 )
1630 
1631  case DXGI_FORMAT_R16G16B16A16_FLOAT:
1632  STORE_SCANLINE( XMHALF4, XMStoreHalf4 )
1633 
1634  case DXGI_FORMAT_R16G16B16A16_UNORM:
1635  STORE_SCANLINE( XMUSHORTN4, XMStoreUShortN4 )
1636 
1637  case DXGI_FORMAT_R16G16B16A16_UINT:
1638  STORE_SCANLINE( XMUSHORT4, XMStoreUShort4 )
1639 
1640  case DXGI_FORMAT_R16G16B16A16_SNORM:
1641  STORE_SCANLINE( XMSHORTN4, XMStoreShortN4 )
1642 
1643  case DXGI_FORMAT_R16G16B16A16_SINT:
1644  STORE_SCANLINE( XMSHORT4, XMStoreShort4 )
1645 
1646  case DXGI_FORMAT_R32G32_FLOAT:
1647  STORE_SCANLINE( XMFLOAT2, XMStoreFloat2 )
1648 
1649  case DXGI_FORMAT_R32G32_UINT:
1650  STORE_SCANLINE( XMUINT2, XMStoreUInt2 )
1651 
1652  case DXGI_FORMAT_R32G32_SINT:
1653  STORE_SCANLINE( XMINT2, XMStoreSInt2 )
1654 
1655  case DXGI_FORMAT_D32_FLOAT_S8X24_UINT:
1656  {
1657  const size_t psize = sizeof(float)+sizeof(uint32_t);
1658  if ( size >= psize )
1659  {
1660  float *dPtr = reinterpret_cast<float*>(pDestination);
1661  for( size_t icount = 0; icount < ( size - psize + 1 ); icount += psize )
1662  {
1663  if ( sPtr >= ePtr ) break;
1664  XMFLOAT4 f;
1665  XMStoreFloat4( &f, *sPtr++ );
1666  dPtr[0] = f.x;
1667  uint8_t* ps8 = reinterpret_cast<uint8_t*>( &dPtr[1] );
1668  ps8[0] = static_cast<uint8_t>( std::min<float>( 255.f, std::max<float>( 0.f, f.y ) ) );
1669  ps8[1] = ps8[2] = ps8[3] = 0;
1670  dPtr += 2;
1671  }
1672  return true;
1673  }
1674  }
1675  return false;
1676 
1677  case DXGI_FORMAT_R10G10B10A2_UNORM:
1678  STORE_SCANLINE( XMUDECN4, XMStoreUDecN4 );
1679 
1680  case DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM:
1681 #if DIRECTX_MATH_VERSION >= 306
1682  STORE_SCANLINE( XMUDECN4, XMStoreUDecN4_XR );
1683 #else
1684  if ( size >= sizeof(XMUDECN4) )
1685  {
1686  static const XMVECTORF32 Scale = { 510.0f, 510.0f, 510.0f, 3.0f };
1687  static const XMVECTORF32 Bias = { 384.0f, 384.0f, 384.0f, 0.0f };
1688  static const XMVECTORF32 C = { 1023.f, 1023.f, 1023.f, 3.f };
1689 
1690  XMUDECN4 * __restrict dPtr = reinterpret_cast<XMUDECN4*>(pDestination);
1691  for( size_t icount = 0; icount < ( size - sizeof(XMUDECN4) + 1 ); icount += sizeof(XMUDECN4) )
1692  {
1693  if ( sPtr >= ePtr ) break;
1694 
1695  XMVECTOR N = XMVectorMultiplyAdd( *sPtr++, Scale, Bias );
1696  N = XMVectorClamp( N, g_XMZero, C );
1697 
1698  XMFLOAT4A tmp;
1699  XMStoreFloat4A(&tmp, N );
1700 
1701  dPtr->v = ((uint32_t)tmp.w << 30)
1702  | (((uint32_t)tmp.z & 0x3FF) << 20)
1703  | (((uint32_t)tmp.y & 0x3FF) << 10)
1704  | (((uint32_t)tmp.x & 0x3FF));
1705  ++dPtr;
1706  }
1707  return true;
1708  }
1709  return false;
1710 #endif
1711 
1712  case DXGI_FORMAT_R10G10B10A2_UINT:
1713  STORE_SCANLINE( XMUDEC4, XMStoreUDec4 );
1714 
1715  case DXGI_FORMAT_R11G11B10_FLOAT:
1716  STORE_SCANLINE( XMFLOAT3PK, XMStoreFloat3PK );
1717 
1718  case DXGI_FORMAT_R8G8B8A8_UNORM:
1719  case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB:
1720  STORE_SCANLINE( XMUBYTEN4, XMStoreUByteN4 )
1721 
1722  case DXGI_FORMAT_R8G8B8A8_UINT:
1723  STORE_SCANLINE( XMUBYTE4, XMStoreUByte4 )
1724 
1725  case DXGI_FORMAT_R8G8B8A8_SNORM:
1726  STORE_SCANLINE( XMBYTEN4, XMStoreByteN4 )
1727 
1728  case DXGI_FORMAT_R8G8B8A8_SINT:
1729  STORE_SCANLINE( XMBYTE4, XMStoreByte4 )
1730 
1731  case DXGI_FORMAT_R16G16_FLOAT:
1732  STORE_SCANLINE( XMHALF2, XMStoreHalf2 )
1733 
1734  case DXGI_FORMAT_R16G16_UNORM:
1735  STORE_SCANLINE( XMUSHORTN2, XMStoreUShortN2 )
1736 
1737  case DXGI_FORMAT_R16G16_UINT:
1738  STORE_SCANLINE( XMUSHORT2, XMStoreUShort2 )
1739 
1740  case DXGI_FORMAT_R16G16_SNORM:
1741  STORE_SCANLINE( XMSHORTN2, XMStoreShortN2 )
1742 
1743  case DXGI_FORMAT_R16G16_SINT:
1744  STORE_SCANLINE( XMSHORT2, XMStoreShort2 )
1745 
1746  case DXGI_FORMAT_D32_FLOAT:
1747  case DXGI_FORMAT_R32_FLOAT:
1748  if ( size >= sizeof(float) )
1749  {
1750  float * __restrict dPtr = reinterpret_cast<float*>(pDestination);
1751  for( size_t icount = 0; icount < ( size - sizeof(float) + 1 ); icount += sizeof(float) )
1752  {
1753  if ( sPtr >= ePtr ) break;
1754  XMStoreFloat( dPtr++, *(sPtr++) );
1755  }
1756  return true;
1757  }
1758  return false;
1759 
1760  case DXGI_FORMAT_R32_UINT:
1761  if ( size >= sizeof(uint32_t) )
1762  {
1763  uint32_t * __restrict dPtr = reinterpret_cast<uint32_t*>(pDestination);
1764  for( size_t icount = 0; icount < ( size - sizeof(uint32_t) + 1 ); icount += sizeof(uint32_t) )
1765  {
1766  if ( sPtr >= ePtr ) break;
1767  XMVECTOR v = XMConvertVectorFloatToUInt( *(sPtr++), 0 );
1768  XMStoreInt( dPtr++, v );
1769  }
1770  return true;
1771  }
1772  return false;
1773 
1774  case DXGI_FORMAT_R32_SINT:
1775  if ( size >= sizeof(int32_t) )
1776  {
1777  uint32_t * __restrict dPtr = reinterpret_cast<uint32_t*>(pDestination);
1778  for( size_t icount = 0; icount < ( size - sizeof(int32_t) + 1 ); icount += sizeof(int32_t) )
1779  {
1780  if ( sPtr >= ePtr ) break;
1781  XMVECTOR v = XMConvertVectorFloatToInt( *(sPtr++), 0 );
1782  XMStoreInt( dPtr++, v );
1783  }
1784  return true;
1785  }
1786  return false;
1787 
1788  case DXGI_FORMAT_D24_UNORM_S8_UINT:
1789  if ( size >= sizeof(uint32_t) )
1790  {
1791  static const XMVECTORF32 clamp = { 1.f, 255.f, 0.f, 0.f };
1792  XMVECTOR zero = XMVectorZero();
1793  uint32_t *dPtr = reinterpret_cast<uint32_t*>(pDestination);
1794  for( size_t icount = 0; icount < ( size - sizeof(uint32_t) + 1 ); icount += sizeof(uint32_t) )
1795  {
1796  if ( sPtr >= ePtr ) break;
1797  XMFLOAT4 f;
1798  XMStoreFloat4( &f, XMVectorClamp( *sPtr++, zero, clamp ) );
1799  *dPtr++ = (static_cast<uint32_t>( f.x * 16777215.f ) & 0xFFFFFF)
1800  | ((static_cast<uint32_t>( f.y ) & 0xFF) << 24);
1801  }
1802  return true;
1803  }
1804  return false;
1805 
1806  case DXGI_FORMAT_R8G8_UNORM:
1807  STORE_SCANLINE( XMUBYTEN2, XMStoreUByteN2 )
1808 
1809  case DXGI_FORMAT_R8G8_UINT:
1810  STORE_SCANLINE( XMUBYTE2, XMStoreUByte2 )
1811 
1812  case DXGI_FORMAT_R8G8_SNORM:
1813  STORE_SCANLINE( XMBYTEN2, XMStoreByteN2 )
1814 
1815  case DXGI_FORMAT_R8G8_SINT:
1816  STORE_SCANLINE( XMBYTE2, XMStoreByte2 )
1817 
1818  case DXGI_FORMAT_R16_FLOAT:
1819  if ( size >= sizeof(HALF) )
1820  {
1821  HALF * __restrict dPtr = reinterpret_cast<HALF*>(pDestination);
1822  for( size_t icount = 0; icount < ( size - sizeof(HALF) + 1 ); icount += sizeof(HALF) )
1823  {
1824  if ( sPtr >= ePtr ) break;
1825  float v = XMVectorGetX( *sPtr++ );
1826  *(dPtr++) = XMConvertFloatToHalf(v);
1827  }
1828  return true;
1829  }
1830  return false;
1831 
1832  case DXGI_FORMAT_D16_UNORM:
1833  case DXGI_FORMAT_R16_UNORM:
1834  if ( size >= sizeof(uint16_t) )
1835  {
1836  uint16_t * __restrict dPtr = reinterpret_cast<uint16_t*>(pDestination);
1837  for( size_t icount = 0; icount < ( size - sizeof(uint16_t) + 1 ); icount += sizeof(uint16_t) )
1838  {
1839  if ( sPtr >= ePtr ) break;
1840  float v = XMVectorGetX( *sPtr++ );
1841  v = std::max<float>( std::min<float>( v, 1.f ), 0.f );
1842  *(dPtr++) = static_cast<uint16_t>( v*65535.f + 0.5f );
1843  }
1844  return true;
1845  }
1846  return false;
1847 
1848  case DXGI_FORMAT_R16_UINT:
1849  if ( size >= sizeof(uint16_t) )
1850  {
1851  uint16_t * __restrict dPtr = reinterpret_cast<uint16_t*>(pDestination);
1852  for( size_t icount = 0; icount < ( size - sizeof(uint16_t) + 1 ); icount += sizeof(uint16_t) )
1853  {
1854  if ( sPtr >= ePtr ) break;
1855  float v = XMVectorGetX( *sPtr++ );
1856  v = std::max<float>( std::min<float>( v, 65535.f ), 0.f );
1857  *(dPtr++) = static_cast<uint16_t>(v);
1858  }
1859  return true;
1860  }
1861  return false;
1862 
1863  case DXGI_FORMAT_R16_SNORM:
1864  if ( size >= sizeof(int16_t) )
1865  {
1866  int16_t * __restrict dPtr = reinterpret_cast<int16_t*>(pDestination);
1867  for( size_t icount = 0; icount < ( size - sizeof(int16_t) + 1 ); icount += sizeof(int16_t) )
1868  {
1869  if ( sPtr >= ePtr ) break;
1870  float v = XMVectorGetX( *sPtr++ );
1871  v = std::max<float>( std::min<float>( v, 1.f ), -1.f );
1872  *(dPtr++) = static_cast<uint16_t>( v * 32767.f );
1873  }
1874  return true;
1875  }
1876  return false;
1877 
1878  case DXGI_FORMAT_R16_SINT:
1879  if ( size >= sizeof(int16_t) )
1880  {
1881  int16_t * __restrict dPtr = reinterpret_cast<int16_t*>(pDestination);
1882  for( size_t icount = 0; icount < ( size - sizeof(int16_t) + 1 ); icount += sizeof(int16_t) )
1883  {
1884  if ( sPtr >= ePtr ) break;
1885  float v = XMVectorGetX( *sPtr++ );
1886  v = std::max<float>( std::min<float>( v, 32767.f ), -32767.f );
1887  *(dPtr++) = static_cast<int16_t>(v);
1888  }
1889  return true;
1890  }
1891  return false;
1892 
1893  case DXGI_FORMAT_R8_UNORM:
1894  if ( size >= sizeof(uint8_t) )
1895  {
1896  uint8_t * __restrict dPtr = reinterpret_cast<uint8_t*>(pDestination);
1897  for( size_t icount = 0; icount < size; icount += sizeof(uint8_t) )
1898  {
1899  if ( sPtr >= ePtr ) break;
1900  float v = XMVectorGetX( *sPtr++ );
1901  v = std::max<float>( std::min<float>( v, 1.f ), 0.f );
1902  *(dPtr++) = static_cast<uint8_t>( v * 255.f);
1903  }
1904  return true;
1905  }
1906  return false;
1907 
1908  case DXGI_FORMAT_R8_UINT:
1909  if ( size >= sizeof(uint8_t) )
1910  {
1911  uint8_t * __restrict dPtr = reinterpret_cast<uint8_t*>(pDestination);
1912  for( size_t icount = 0; icount < size; icount += sizeof(uint8_t) )
1913  {
1914  if ( sPtr >= ePtr ) break;
1915  float v = XMVectorGetX( *sPtr++ );
1916  v = std::max<float>( std::min<float>( v, 255.f ), 0.f );
1917  *(dPtr++) = static_cast<uint8_t>(v);
1918  }
1919  return true;
1920  }
1921  return false;
1922 
1923  case DXGI_FORMAT_R8_SNORM:
1924  if ( size >= sizeof(int8_t) )
1925  {
1926  int8_t * __restrict dPtr = reinterpret_cast<int8_t*>(pDestination);
1927  for( size_t icount = 0; icount < size; icount += sizeof(int8_t) )
1928  {
1929  if ( sPtr >= ePtr ) break;
1930  float v = XMVectorGetX( *sPtr++ );
1931  v = std::max<float>( std::min<float>( v, 1.f ), -1.f );
1932  *(dPtr++) = static_cast<int8_t>( v * 127.f );
1933  }
1934  return true;
1935  }
1936  return false;
1937 
1938  case DXGI_FORMAT_R8_SINT:
1939  if ( size >= sizeof(int8_t) )
1940  {
1941  int8_t * __restrict dPtr = reinterpret_cast<int8_t*>(pDestination);
1942  for( size_t icount = 0; icount < size; icount += sizeof(int8_t) )
1943  {
1944  if ( sPtr >= ePtr ) break;
1945  float v = XMVectorGetX( *sPtr++ );
1946  v = std::max<float>( std::min<float>( v, 127.f ), -127.f );
1947  *(dPtr++) = static_cast<int8_t>( v );
1948  }
1949  return true;
1950  }
1951  return false;
1952 
1953  case DXGI_FORMAT_A8_UNORM:
1954  if ( size >= sizeof(uint8_t) )
1955  {
1956  uint8_t * __restrict dPtr = reinterpret_cast<uint8_t*>(pDestination);
1957  for( size_t icount = 0; icount < size; icount += sizeof(uint8_t) )
1958  {
1959  if ( sPtr >= ePtr ) break;
1960  float v = XMVectorGetW( *sPtr++ );
1961  v = std::max<float>( std::min<float>( v, 1.f ), 0.f );
1962  *(dPtr++) = static_cast<uint8_t>( v * 255.f);
1963  }
1964  return true;
1965  }
1966  return false;
1967 
1968  case DXGI_FORMAT_R1_UNORM:
1969  if ( size >= sizeof(uint8_t) )
1970  {
1971  uint8_t * __restrict dPtr = reinterpret_cast<uint8_t*>(pDestination);
1972  for( size_t icount = 0; icount < size; icount += sizeof(uint8_t) )
1973  {
1974  uint8_t pixels = 0;
1975  for( size_t bcount = 8; bcount > 0; --bcount )
1976  {
1977  if ( sPtr >= ePtr ) break;
1978  float v = XMVectorGetX( *sPtr++ );
1979 
1980  // Absolute thresholding generally doesn't give good results for all images
1981  // Picking the 'right' threshold automatically requires whole-image analysis
1982 
1983  if ( v > 0.25f )
1984  pixels |= 1 << (bcount-1);
1985  }
1986  *(dPtr++) = pixels;
1987  }
1988  return true;
1989  }
1990  return false;
1991 
1992  case DXGI_FORMAT_R9G9B9E5_SHAREDEXP:
1993 #if DIRECTX_MATH_VERSION >= 306
1994  STORE_SCANLINE( XMFLOAT3SE, XMStoreFloat3SE )
1995 #else
1996  if ( size >= sizeof(XMFLOAT3SE) )
1997  {
1998  static const float maxf9 = float(0x1FF << 7);
1999  static const float minf9 = float(1.f / (1 << 16));
2000 
2001  XMFLOAT3SE * __restrict dPtr = reinterpret_cast<XMFLOAT3SE*>(pDestination);
2002  for( size_t icount = 0; icount < ( size - sizeof(XMFLOAT3SE) + 1 ); icount += sizeof(XMFLOAT3SE) )
2003  {
2004  if ( sPtr >= ePtr ) break;
2005 
2006  XMFLOAT3 rgb;
2007  XMStoreFloat3( &rgb, *(sPtr++) );
2008 
2009  float r = (rgb.x >= 0.f) ? ( (rgb.x > maxf9) ? maxf9 : rgb.x ) : 0.f;
2010  float g = (rgb.y >= 0.f) ? ( (rgb.y > maxf9) ? maxf9 : rgb.y ) : 0.f;
2011  float b = (rgb.z >= 0.f) ? ( (rgb.z > maxf9) ? maxf9 : rgb.z ) : 0.f;
2012 
2013  const float max_rg = (r > g) ? r : g;
2014  const float max_rgb = (max_rg > b) ? max_rg : b;
2015 
2016  const float maxColor = (max_rgb > minf9) ? max_rgb : minf9;
2017 
2018  union { float f; INT32 i; } fi;
2019  fi.f = maxColor;
2020  fi.i &= 0xFF800000; // cut off fraction
2021 
2022  dPtr->e = (fi.i - 0x37800000) >> 23;
2023 
2024  fi.i = 0x83000000 - fi.i;
2025  float ScaleR = fi.f;
2026 
2027  dPtr->xm = static_cast<uint32_t>( round_to_nearest(r * ScaleR) );
2028  dPtr->ym = static_cast<uint32_t>( round_to_nearest(g * ScaleR) );
2029  dPtr->zm = static_cast<uint32_t>( round_to_nearest(b * ScaleR) );
2030  ++dPtr;
2031  }
2032  return true;
2033  }
2034  return false;
2035 #endif
2036 
2037  case DXGI_FORMAT_R8G8_B8G8_UNORM:
2038  if ( size >= sizeof(XMUBYTEN4) )
2039  {
2040  XMUBYTEN4 * __restrict dPtr = reinterpret_cast<XMUBYTEN4*>(pDestination);
2041  for( size_t icount = 0; icount < ( size - sizeof(XMUBYTEN4) + 1 ); icount += sizeof(XMUBYTEN4) )
2042  {
2043  if ( sPtr >= ePtr ) break;
2044  XMVECTOR v0 = *sPtr++;
2045  XMVECTOR v1 = (sPtr < ePtr) ? XMVectorSplatY( *sPtr++ ) : XMVectorZero();
2046  XMVECTOR v = XMVectorSelect( v1, v0, g_XMSelect1110 );
2047  XMStoreUByteN4( dPtr++, v );
2048  }
2049  return true;
2050  }
2051  return false;
2052 
2053  case DXGI_FORMAT_G8R8_G8B8_UNORM:
2054  if ( size >= sizeof(XMUBYTEN4) )
2055  {
2056  static XMVECTORI32 select1101 = {XM_SELECT_1, XM_SELECT_1, XM_SELECT_0, XM_SELECT_1};
2057 
2058  XMUBYTEN4 * __restrict dPtr = reinterpret_cast<XMUBYTEN4*>(pDestination);
2059  for( size_t icount = 0; icount < ( size - sizeof(XMUBYTEN4) + 1 ); icount += sizeof(XMUBYTEN4) )
2060  {
2061  if ( sPtr >= ePtr ) break;
2062  XMVECTOR v0 = XMVectorSwizzle<1, 0, 3, 2>( *sPtr++ );
2063  XMVECTOR v1 = (sPtr < ePtr) ? XMVectorSplatY( *sPtr++ ) : XMVectorZero();
2064  XMVECTOR v = XMVectorSelect( v1, v0, select1101 );
2065  XMStoreUByteN4( dPtr++, v );
2066  }
2067  return true;
2068  }
2069  return false;
2070 
2071  case DXGI_FORMAT_B5G6R5_UNORM:
2072  if ( size >= sizeof(XMU565) )
2073  {
2074  static XMVECTORF32 s_Scale = { 31.f, 63.f, 31.f, 1.f };
2075  XMU565 * __restrict dPtr = reinterpret_cast<XMU565*>(pDestination);
2076  for( size_t icount = 0; icount < ( size - sizeof(XMU565) + 1 ); icount += sizeof(XMU565) )
2077  {
2078  if ( sPtr >= ePtr ) break;
2079  XMVECTOR v = XMVectorSwizzle<2, 1, 0, 3>( *sPtr++ );
2080  v = XMVectorMultiply( v, s_Scale );
2081  XMStoreU565( dPtr++, v );
2082  }
2083  return true;
2084  }
2085  return false;
2086 
2087  case DXGI_FORMAT_B5G5R5A1_UNORM:
2088  if ( size >= sizeof(XMU555) )
2089  {
2090  static XMVECTORF32 s_Scale = { 31.f, 31.f, 31.f, 1.f };
2091  XMU555 * __restrict dPtr = reinterpret_cast<XMU555*>(pDestination);
2092  for( size_t icount = 0; icount < ( size - sizeof(XMU555) + 1 ); icount += sizeof(XMU555) )
2093  {
2094  if ( sPtr >= ePtr ) break;
2095  XMVECTOR v = XMVectorSwizzle<2, 1, 0, 3>( *sPtr++ );
2096  v = XMVectorMultiply( v, s_Scale );
2097  XMStoreU555( dPtr, v );
2098  dPtr->w = ( XMVectorGetW( v ) > threshold ) ? 1 : 0;
2099  ++dPtr;
2100  }
2101  return true;
2102  }
2103  return false;
2104 
2105  case DXGI_FORMAT_B8G8R8A8_UNORM:
2106  case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB:
2107  if ( size >= sizeof(XMUBYTEN4) )
2108  {
2109  XMUBYTEN4 * __restrict dPtr = reinterpret_cast<XMUBYTEN4*>(pDestination);
2110  for( size_t icount = 0; icount < ( size - sizeof(XMUBYTEN4) + 1 ); icount += sizeof(XMUBYTEN4) )
2111  {
2112  if ( sPtr >= ePtr ) break;
2113  XMVECTOR v = XMVectorSwizzle<2, 1, 0, 3>( *sPtr++ );
2114  XMStoreUByteN4( dPtr++, v );
2115  }
2116  return true;
2117  }
2118  return false;
2119 
2120  case DXGI_FORMAT_B8G8R8X8_UNORM:
2121  case DXGI_FORMAT_B8G8R8X8_UNORM_SRGB:
2122  if ( size >= sizeof(XMUBYTEN4) )
2123  {
2124  XMUBYTEN4 * __restrict dPtr = reinterpret_cast<XMUBYTEN4*>(pDestination);
2125  for( size_t icount = 0; icount < ( size - sizeof(XMUBYTEN4) + 1 ); icount += sizeof(XMUBYTEN4) )
2126  {
2127  if ( sPtr >= ePtr ) break;
2128  XMVECTOR v = XMVectorPermute<2, 1, 0, 7>( *sPtr++, g_XMIdentityR3 );
2129  XMStoreUByteN4( dPtr++, v );
2130  }
2131  return true;
2132  }
2133  return false;
2134 
2135  case DXGI_FORMAT_AYUV:
2136  if ( size >= sizeof(XMUBYTEN4) )
2137  {
2138  XMUBYTEN4 * __restrict dPtr = reinterpret_cast<XMUBYTEN4*>(pDestination);
2139  for( size_t icount = 0; icount < ( size - sizeof(XMUBYTEN4) + 1 ); icount += sizeof(XMUBYTEN4) )
2140  {
2141  if ( sPtr >= ePtr ) break;
2142 
2143  XMUBYTEN4 rgba;
2144  XMStoreUByteN4( &rgba, *sPtr++ );
2145 
2146  // http://msdn.microsoft.com/en-us/library/windows/desktop/dd206750.aspx
2147 
2148  // Y = 0.2568R + 0.5041G + 0.1001B + 16
2149  // Cb = -0.1482R - 0.2910G + 0.4392B + 128
2150  // Cr = 0.4392R - 0.3678G - 0.0714B + 128
2151 
2152  int y = ( ( 66 * rgba.x + 129 * rgba.y + 25 * rgba.z + 128) >> 8) + 16;
2153  int u = ( ( -38 * rgba.x - 74 * rgba.y + 112 * rgba.z + 128) >> 8) + 128;
2154  int v = ( ( 112 * rgba.x - 94 * rgba.y - 18 * rgba.z + 128) >> 8) + 128;
2155 
2156  dPtr->x = static_cast<uint8_t>( std::min<int>( std::max<int>( v, 0 ), 255 ) );
2157  dPtr->y = static_cast<uint8_t>( std::min<int>( std::max<int>( u, 0 ), 255 ) );
2158  dPtr->z = static_cast<uint8_t>( std::min<int>( std::max<int>( y, 0 ), 255 ) );
2159  dPtr->w = rgba.w;
2160  ++dPtr;
2161  }
2162  return true;
2163  }
2164  return false;
2165 
2166  case DXGI_FORMAT_Y410:
2167  if ( size >= sizeof(XMUDECN4) )
2168  {
2169  XMUDECN4 * __restrict dPtr = reinterpret_cast<XMUDECN4*>(pDestination);
2170  for( size_t icount = 0; icount < ( size - sizeof(XMUDECN4) + 1 ); icount += sizeof(XMUDECN4) )
2171  {
2172  if ( sPtr >= ePtr ) break;
2173 
2174  XMUDECN4 rgba;
2175  XMStoreUDecN4( &rgba, *sPtr++ );
2176 
2177  // http://msdn.microsoft.com/en-us/library/windows/desktop/bb970578.aspx
2178 
2179  // Y = 0.2560R + 0.5027G + 0.0998B + 64
2180  // Cb = -0.1478R - 0.2902G + 0.4379B + 512
2181  // Cr = 0.4379R - 0.3667G - 0.0712B + 512
2182 
2183  int64_t r = rgba.x;
2184  int64_t g = rgba.y;
2185  int64_t b = rgba.z;
2186 
2187  int y = static_cast<int>( ( 16780 * r + 32942 * g + 6544 * b + 32768) >> 16) + 64;
2188  int u = static_cast<int>( ( -9683 * r - 19017 * g + 28700 * b + 32768) >> 16) + 512;
2189  int v = static_cast<int>( ( 28700 * r - 24033 * g - 4667 * b + 32768) >> 16) + 512;
2190 
2191  dPtr->x = static_cast<uint32_t>( std::min<int>( std::max<int>( u, 0 ), 1023 ) );
2192  dPtr->y = static_cast<uint32_t>( std::min<int>( std::max<int>( y, 0 ), 1023 ) );
2193  dPtr->z = static_cast<uint32_t>( std::min<int>( std::max<int>( v, 0 ), 1023 ) );
2194  dPtr->w = rgba.w;
2195  ++dPtr;
2196  }
2197  return true;
2198  }
2199  return false;
2200 
2201  case DXGI_FORMAT_Y416:
2202  if ( size >= sizeof(XMUSHORTN4) )
2203  {
2204  XMUSHORTN4 * __restrict dPtr = reinterpret_cast<XMUSHORTN4*>(pDestination);
2205  for( size_t icount = 0; icount < ( size - sizeof(XMUSHORTN4) + 1 ); icount += sizeof(XMUSHORTN4) )
2206  {
2207  if ( sPtr >= ePtr ) break;
2208 
2209  XMUSHORTN4 rgba;
2210  XMStoreUShortN4( &rgba, *sPtr++ );
2211 
2212  // http://msdn.microsoft.com/en-us/library/windows/desktop/bb970578.aspx
2213 
2214  // Y = 0.2558R + 0.5022G + 0.0998B + 4096
2215  // Cb = -0.1476R - 0.2899G + 0.4375B + 32768
2216  // Cr = 0.4375R - 0.3664G - 0.0711B + 32768
2217 
2218  int64_t r = int64_t(rgba.x);
2219  int64_t g = int64_t(rgba.y);
2220  int64_t b = int64_t(rgba.z);
2221 
2222  int y = static_cast<int>( ( 16763 * r + 32910 * g + 6537 * b + 32768) >> 16) + 4096;
2223  int u = static_cast<int>( ( -9674 * r - 18998 * g + 28672 * b + 32768) >> 16) + 32768;
2224  int v = static_cast<int>( ( 28672 * r - 24010 * g - 4662 * b + 32768) >> 16) + 32768;
2225 
2226  dPtr->x = static_cast<uint16_t>( std::min<int>( std::max<int>( u, 0 ), 65535 ) );
2227  dPtr->y = static_cast<uint16_t>( std::min<int>( std::max<int>( y, 0 ), 65535 ) );
2228  dPtr->z = static_cast<uint16_t>( std::min<int>( std::max<int>( v, 0 ), 65535 ) );
2229  dPtr->w = rgba.w;
2230  ++dPtr;
2231  }
2232  return true;
2233  }
2234  return false;
2235 
2236  case DXGI_FORMAT_YUY2:
2237  if ( size >= sizeof(XMUBYTEN4) )
2238  {
2239  XMUBYTEN4 * __restrict dPtr = reinterpret_cast<XMUBYTEN4*>(pDestination);
2240  for( size_t icount = 0; icount < ( size - sizeof(XMUBYTEN4) + 1 ); icount += sizeof(XMUBYTEN4) )
2241  {
2242  if ( sPtr >= ePtr ) break;
2243 
2244  XMUBYTEN4 rgb1;
2245  XMStoreUByteN4( &rgb1, *sPtr++ );
2246 
2247  // See AYUV
2248  int y0 = ( ( 66 * rgb1.x + 129 * rgb1.y + 25 * rgb1.z + 128) >> 8) + 16;
2249  int u0 = ( ( -38 * rgb1.x - 74 * rgb1.y + 112 * rgb1.z + 128) >> 8) + 128;
2250  int v0 = ( ( 112 * rgb1.x - 94 * rgb1.y - 18 * rgb1.z + 128) >> 8) + 128;
2251 
2252  XMUBYTEN4 rgb2;
2253  if(sPtr < ePtr)
2254  {
2255  XMStoreUByteN4( &rgb2, *sPtr++ );
2256  }
2257  else
2258  {
2259  rgb2.x = rgb2.y = rgb2.z = rgb2.w = 0;
2260  }
2261 
2262  int y1 = ( ( 66 * rgb2.x + 129 * rgb2.y + 25 * rgb2.z + 128) >> 8) + 16;
2263  int u1 = ( ( -38 * rgb2.x - 74 * rgb2.y + 112 * rgb2.z + 128) >> 8) + 128;
2264  int v1 = ( ( 112 * rgb2.x - 94 * rgb2.y - 18 * rgb2.z + 128) >> 8) + 128;
2265 
2266  dPtr->x = static_cast<uint8_t>( std::min<int>( std::max<int>( y0, 0 ), 255 ) );
2267  dPtr->y = static_cast<uint8_t>( std::min<int>( std::max<int>( (u0 + u1) >> 1, 0 ), 255 ) );
2268  dPtr->z = static_cast<uint8_t>( std::min<int>( std::max<int>( y1, 0 ), 255 ) );
2269  dPtr->w = static_cast<uint8_t>( std::min<int>( std::max<int>( (v0 + v1) >> 1, 0 ), 255 ) );
2270  ++dPtr;
2271  }
2272  return true;
2273  }
2274  return false;
2275 
2276  case DXGI_FORMAT_Y210:
2277  // Same as Y216 with least significant 6 bits set to zero
2278  if ( size >= sizeof(XMUSHORTN4) )
2279  {
2280  XMUSHORTN4 * __restrict dPtr = reinterpret_cast<XMUSHORTN4*>(pDestination);
2281  for( size_t icount = 0; icount < ( size - sizeof(XMUSHORTN4) + 1 ); icount += sizeof(XMUSHORTN4) )
2282  {
2283  if ( sPtr >= ePtr ) break;
2284 
2285  XMUDECN4 rgb1;
2286  XMStoreUDecN4( &rgb1, *sPtr++ );
2287 
2288  // See Y410
2289  int64_t r = rgb1.x;
2290  int64_t g = rgb1.y;
2291  int64_t b = rgb1.z;
2292 
2293  int y0 = static_cast<int>( ( 16780 * r + 32942 * g + 6544 * b + 32768) >> 16) + 64;
2294  int u0 = static_cast<int>( ( -9683 * r - 19017 * g + 28700 * b + 32768) >> 16) + 512;
2295  int v0 = static_cast<int>( ( 28700 * r - 24033 * g - 4667 * b + 32768) >> 16) + 512;
2296 
2297  XMUDECN4 rgb2;
2298  if(sPtr < ePtr)
2299  {
2300  XMStoreUDecN4( &rgb2, *sPtr++ );
2301  }
2302  else
2303  {
2304  rgb2.x = rgb2.y = rgb2.z = rgb2.w = 0;
2305  }
2306 
2307  r = rgb2.x;
2308  g = rgb2.y;
2309  b = rgb2.z;
2310 
2311  int y1 = static_cast<int>( ( 16780 * r + 32942 * g + 6544 * b + 32768) >> 16) + 64;
2312  int u1 = static_cast<int>( ( -9683 * r - 19017 * g + 28700 * b + 32768) >> 16) + 512;
2313  int v1 = static_cast<int>( ( 28700 * r - 24033 * g - 4667 * b + 32768) >> 16) + 512;
2314 
2315  dPtr->x = static_cast<uint16_t>( std::min<int>( std::max<int>( y0, 0 ), 1023 ) << 6 );
2316  dPtr->y = static_cast<uint16_t>( std::min<int>( std::max<int>( (u0 + u1) >> 1, 0 ), 1023 ) << 6 );
2317  dPtr->z = static_cast<uint16_t>( std::min<int>( std::max<int>( y1, 0 ), 1023 ) << 6 );
2318  dPtr->w = static_cast<uint16_t>( std::min<int>( std::max<int>( (v0 + v1) >> 1, 0 ), 1023 ) << 6 );
2319  ++dPtr;
2320  }
2321  return true;
2322  }
2323  return false;
2324 
2325  case DXGI_FORMAT_Y216:
2326  if ( size >= sizeof(XMUSHORTN4) )
2327  {
2328  XMUSHORTN4 * __restrict dPtr = reinterpret_cast<XMUSHORTN4*>(pDestination);
2329  for( size_t icount = 0; icount < ( size - sizeof(XMUSHORTN4) + 1 ); icount += sizeof(XMUSHORTN4) )
2330  {
2331  if ( sPtr >= ePtr ) break;
2332 
2333  XMUSHORTN4 rgb1;
2334  XMStoreUShortN4( &rgb1, *sPtr++ );
2335 
2336  // See Y416
2337  int64_t r = int64_t(rgb1.x);
2338  int64_t g = int64_t(rgb1.y);
2339  int64_t b = int64_t(rgb1.z);
2340 
2341  int y0 = static_cast<int>( ( 16763 * r + 32910 * g + 6537 * b + 32768) >> 16) + 4096;
2342  int u0 = static_cast<int>( (-9674 * r - 18998 * g + 28672 * b + 32768) >> 16) + 32768;
2343  int v0 = static_cast<int>( ( 28672 * r - 24010 * g - 4662 * b + 32768) >> 16) + 32768;
2344 
2345  XMUSHORTN4 rgb2;
2346  if(sPtr < ePtr)
2347  {
2348  XMStoreUShortN4( &rgb2, *sPtr++ );
2349  }
2350  else
2351  {
2352  rgb2.x = rgb2.y = rgb2.z = rgb2.w = 0;
2353  }
2354 
2355  r = int64_t(rgb2.x);
2356  g = int64_t(rgb2.y);
2357  b = int64_t(rgb2.z);
2358 
2359  int y1 = static_cast<int>( ( 16763 * r + 32910 * g + 6537 * b + 32768) >> 16) + 4096;
2360  int u1 = static_cast<int>( (-9674 * r - 18998 * g + 28672 * b + 32768) >> 16) + 32768;
2361  int v1 = static_cast<int>( ( 28672 * r - 24010 * g - 4662 * b + 32768) >> 16) + 32768;
2362 
2363  dPtr->x = static_cast<uint16_t>( std::min<int>( std::max<int>( y0, 0 ), 65535 ) );
2364  dPtr->y = static_cast<uint16_t>( std::min<int>( std::max<int>( (u0 + u1) >> 1, 0 ), 65535 ) );
2365  dPtr->z = static_cast<uint16_t>( std::min<int>( std::max<int>( y1, 0 ), 65535 ) );
2366  dPtr->w = static_cast<uint16_t>( std::min<int>( std::max<int>( (v0 + v1) >> 1, 0 ), 65535 ) );
2367  ++dPtr;
2368  }
2369  return true;
2370  }
2371  return false;
2372 
2373  case DXGI_FORMAT_B4G4R4A4_UNORM:
2374  if ( size >= sizeof(XMUNIBBLE4) )
2375  {
2376  static XMVECTORF32 s_Scale = { 15.f, 15.f, 15.f, 15.f };
2377  XMUNIBBLE4 * __restrict dPtr = reinterpret_cast<XMUNIBBLE4*>(pDestination);
2378  for( size_t icount = 0; icount < ( size - sizeof(XMUNIBBLE4) + 1 ); icount += sizeof(XMUNIBBLE4) )
2379  {
2380  if ( sPtr >= ePtr ) break;
2381  XMVECTOR v = XMVectorSwizzle<2, 1, 0, 3>( *sPtr++ );
2382  v = XMVectorMultiply( v, s_Scale );
2383  XMStoreUNibble4( dPtr++, v );
2384  }
2385  return true;
2386  }
2387  return false;
2388 
2389  case 116 /* DXGI_FORMAT_R10G10B10_7E3_A2_FLOAT */:
2390  // Xbox One specific 7e3 format with alpha
2391  if ( size >= sizeof(XMUDECN4) )
2392  {
2393  static const XMVECTORF32 Scale = { 1.0f, 1.0f, 1.0f, 3.0f };
2394  static const XMVECTORF32 C = { 31.875f, 31.875f, 31.875f, 3.f };
2395 
2396  XMUDECN4 * __restrict dPtr = reinterpret_cast<XMUDECN4*>(pDestination);
2397  for( size_t icount = 0; icount < ( size - sizeof(XMUDECN4) + 1 ); icount += sizeof(XMUDECN4) )
2398  {
2399  if ( sPtr >= ePtr ) break;
2400 
2401  XMVECTOR V = XMVectorMultiply( *sPtr++, Scale );
2402  V = XMVectorClamp( V, g_XMZero, C );
2403 
2404  XMFLOAT4A tmp;
2405  XMStoreFloat4A( &tmp, V );
2406 
2407  dPtr->x = FloatTo7e3( tmp.x );
2408  dPtr->y = FloatTo7e3( tmp.y );
2409  dPtr->z = FloatTo7e3( tmp.z );
2410  dPtr->w = (uint32_t)tmp.w;
2411  ++dPtr;
2412  }
2413  return true;
2414  }
2415  return false;
2416 
2417  case 117 /* DXGI_FORMAT_R10G10B10_6E4_A2_FLOAT */:
2418  // Xbox One specific 6e4 format with alpha
2419  if ( size >= sizeof(XMUDECN4) )
2420  {
2421  static const XMVECTORF32 Scale = { 1.0f, 1.0f, 1.0f, 3.0f };
2422  static const XMVECTORF32 C = { 508.f, 508.f, 508.f, 3.f };
2423 
2424  XMUDECN4 * __restrict dPtr = reinterpret_cast<XMUDECN4*>(pDestination);
2425  for( size_t icount = 0; icount < ( size - sizeof(XMUDECN4) + 1 ); icount += sizeof(XMUDECN4) )
2426  {
2427  if ( sPtr >= ePtr ) break;
2428 
2429  XMVECTOR V = XMVectorMultiply( *sPtr++, Scale );
2430  V = XMVectorClamp( V, g_XMZero, C );
2431 
2432  XMFLOAT4A tmp;
2433  XMStoreFloat4A( &tmp, V );
2434 
2435  dPtr->x = FloatTo6e4( tmp.x );
2436  dPtr->y = FloatTo6e4( tmp.y );
2437  dPtr->z = FloatTo6e4( tmp.z );
2438  dPtr->w = (uint32_t)tmp.w;
2439  ++dPtr;
2440  }
2441  return true;
2442  }
2443  return false;
2444 
2445  // We don't support the planar or palettized formats
2446 
2447  default:
2448  return false;
2449  }
2450 }
2451 
2452 #undef STORE_SCANLINE
2453 
2454 
2455 //-------------------------------------------------------------------------------------
2456 // Convert DXGI image to/from GUID_WICPixelFormat128bppRGBAFloat (no range conversions)
2457 //-------------------------------------------------------------------------------------
2458 _Use_decl_annotations_
2459 HRESULT _ConvertToR32G32B32A32( const Image& srcImage, ScratchImage& image )
2460 {
2461  if ( !srcImage.pixels )
2462  return E_POINTER;
2463 
2464  HRESULT hr = image.Initialize2D( DXGI_FORMAT_R32G32B32A32_FLOAT, srcImage.width, srcImage.height, 1, 1 );
2465  if ( FAILED(hr) )
2466  return hr;
2467 
2468  const Image *img = image.GetImage( 0, 0, 0 );
2469  if ( !img )
2470  {
2471  image.Release();
2472  return E_POINTER;
2473  }
2474 
2475  uint8_t* pDest = img->pixels;
2476  if ( !pDest )
2477  {
2478  image.Release();
2479  return E_POINTER;
2480  }
2481 
2482  const uint8_t *pSrc = srcImage.pixels;
2483  for( size_t h = 0; h < srcImage.height; ++h )
2484  {
2485  if ( !_LoadScanline( reinterpret_cast<XMVECTOR*>(pDest), srcImage.width, pSrc, srcImage.rowPitch, srcImage.format ) )
2486  {
2487  image.Release();
2488  return E_FAIL;
2489  }
2490 
2491  pSrc += srcImage.rowPitch;
2492  pDest += img->rowPitch;
2493  }
2494 
2495  return S_OK;
2496 }
2497 
2498 _Use_decl_annotations_
2499 HRESULT _ConvertFromR32G32B32A32( const Image& srcImage, const Image& destImage )
2500 {
2501  assert( srcImage.format == DXGI_FORMAT_R32G32B32A32_FLOAT );
2502 
2503  if ( !srcImage.pixels || !destImage.pixels )
2504  return E_POINTER;
2505 
2506  if ( srcImage.width != destImage.width || srcImage.height != destImage.height )
2507  return E_FAIL;
2508 
2509  const uint8_t *pSrc = srcImage.pixels;
2510  uint8_t* pDest = destImage.pixels;
2511 
2512  for( size_t h = 0; h < srcImage.height; ++h )
2513  {
2514  if ( !_StoreScanline( pDest, destImage.rowPitch, destImage.format, reinterpret_cast<const XMVECTOR*>(pSrc), srcImage.width ) )
2515  return E_FAIL;
2516 
2517  pSrc += srcImage.rowPitch;
2518  pDest += destImage.rowPitch;
2519  }
2520 
2521  return S_OK;
2522 }
2523 
2524 _Use_decl_annotations_
2525 HRESULT _ConvertFromR32G32B32A32( const Image& srcImage, DXGI_FORMAT format, ScratchImage& image )
2526 {
2527  if ( !srcImage.pixels )
2528  return E_POINTER;
2529 
2530  HRESULT hr = image.Initialize2D( format, srcImage.width, srcImage.height, 1, 1 );
2531  if ( FAILED(hr) )
2532  return hr;
2533 
2534  const Image *img = image.GetImage( 0, 0, 0 );
2535  if ( !img )
2536  {
2537  image.Release();
2538  return E_POINTER;
2539  }
2540 
2541  hr = _ConvertFromR32G32B32A32( srcImage, *img );
2542  if ( FAILED(hr) )
2543  {
2544  image.Release();
2545  return hr;
2546  }
2547 
2548  return S_OK;
2549 }
2550 
2551 _Use_decl_annotations_
2552 HRESULT _ConvertFromR32G32B32A32( const Image* srcImages, size_t nimages, const TexMetadata& metadata, DXGI_FORMAT format, ScratchImage& result )
2553 {
2554  if ( !srcImages )
2555  return E_POINTER;
2556 
2557  result.Release();
2558 
2559  assert( metadata.format == DXGI_FORMAT_R32G32B32A32_FLOAT );
2560 
2561  TexMetadata mdata2 = metadata;
2562  mdata2.format = format;
2563  HRESULT hr = result.Initialize( mdata2 );
2564  if ( FAILED(hr) )
2565  return hr;
2566 
2567  if ( nimages != result.GetImageCount() )
2568  {
2569  result.Release();
2570  return E_FAIL;
2571  }
2572 
2573  const Image* dest = result.GetImages();
2574  if ( !dest )
2575  {
2576  result.Release();
2577  return E_POINTER;
2578  }
2579 
2580  for( size_t index=0; index < nimages; ++index )
2581  {
2582  const Image& src = srcImages[ index ];
2583  const Image& dst = dest[ index ];
2584 
2585  assert( src.format == DXGI_FORMAT_R32G32B32A32_FLOAT );
2586  assert( dst.format == format );
2587 
2588  if ( src.width != dst.width || src.height != dst.height )
2589  {
2590  result.Release();
2591  return E_FAIL;
2592  }
2593 
2594  const uint8_t* pSrc = src.pixels;
2595  uint8_t* pDest = dst.pixels;
2596  if ( !pSrc || !pDest )
2597  {
2598  result.Release();
2599  return E_POINTER;
2600  }
2601 
2602  for( size_t h=0; h < src.height; ++h )
2603  {
2604  if ( !_StoreScanline( pDest, dst.rowPitch, format, reinterpret_cast<const XMVECTOR*>(pSrc), src.width ) )
2605  {
2606  result.Release();
2607  return E_FAIL;
2608  }
2609 
2610  pSrc += src.rowPitch;
2611  pDest += dst.rowPitch;
2612  }
2613  }
2614 
2615  return S_OK;
2616 }
2617 
2618 
2619 //-------------------------------------------------------------------------------------
2620 // Convert from Linear RGB to sRGB
2621 //
2622 // if C_linear <= 0.0031308 -> C_srgb = 12.92 * C_linear
2623 // if C_linear > 0.0031308 -> C_srgb = ( 1 + a ) * pow( C_Linear, 1 / 2.4 ) - a
2624 // where a = 0.055
2625 //-------------------------------------------------------------------------------------
2626 #if DIRECTX_MATH_VERSION < 306
2627 static inline XMVECTOR XMColorRGBToSRGB( FXMVECTOR rgb )
2628 {
2629  static const XMVECTORF32 Cutoff = { 0.0031308f, 0.0031308f, 0.0031308f, 1.f };
2630  static const XMVECTORF32 Linear = { 12.92f, 12.92f, 12.92f, 1.f };
2631  static const XMVECTORF32 Scale = { 1.055f, 1.055f, 1.055f, 1.f };
2632  static const XMVECTORF32 Bias = { 0.055f, 0.055f, 0.055f, 0.f };
2633  static const XMVECTORF32 InvGamma = { 1.0f/2.4f, 1.0f/2.4f, 1.0f/2.4f, 1.f };
2634 
2635  XMVECTOR V = XMVectorSaturate(rgb);
2636  XMVECTOR V0 = XMVectorMultiply( V, Linear );
2637  XMVECTOR V1 = Scale * XMVectorPow( V, InvGamma ) - Bias;
2638  XMVECTOR select = XMVectorLess( V, Cutoff );
2639  V = XMVectorSelect( V1, V0, select );
2640  return XMVectorSelect( rgb, V, g_XMSelect1110 );
2641 }
2642 #endif
2643 
2644 _Use_decl_annotations_
2645 bool _StoreScanlineLinear( LPVOID pDestination, size_t size, DXGI_FORMAT format,
2646  XMVECTOR* pSource, size_t count, DWORD flags )
2647 {
2648  assert( pDestination && size > 0 );
2649  assert( pSource && count > 0 && (((uintptr_t)pSource & 0xF) == 0) );
2650  assert( IsValid(format) && !IsTypeless(format) && !IsCompressed(format) && !IsPlanar(format) && !IsPalettized(format) );
2651 
2652  switch ( format )
2653  {
2654  case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB:
2655  case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB:
2656  case DXGI_FORMAT_B8G8R8X8_UNORM_SRGB:
2657  flags |= TEX_FILTER_SRGB;
2658  break;
2659 
2660  case DXGI_FORMAT_R32G32B32A32_FLOAT:
2661  case DXGI_FORMAT_R32G32B32_FLOAT:
2662  case DXGI_FORMAT_R16G16B16A16_FLOAT:
2663  case DXGI_FORMAT_R16G16B16A16_UNORM:
2664  case DXGI_FORMAT_R32G32_FLOAT:
2665  case DXGI_FORMAT_R10G10B10A2_UNORM:
2666  case DXGI_FORMAT_R11G11B10_FLOAT:
2667  case DXGI_FORMAT_R8G8B8A8_UNORM:
2668  case DXGI_FORMAT_R16G16_FLOAT:
2669  case DXGI_FORMAT_R16G16_UNORM:
2670  case DXGI_FORMAT_R32_FLOAT:
2671  case DXGI_FORMAT_R8G8_UNORM:
2672  case DXGI_FORMAT_R16_FLOAT:
2673  case DXGI_FORMAT_R16_UNORM:
2674  case DXGI_FORMAT_R8_UNORM:
2675  case DXGI_FORMAT_R9G9B9E5_SHAREDEXP:
2676  case DXGI_FORMAT_R8G8_B8G8_UNORM:
2677  case DXGI_FORMAT_G8R8_G8B8_UNORM:
2678  case DXGI_FORMAT_B5G6R5_UNORM:
2679  case DXGI_FORMAT_B5G5R5A1_UNORM:
2680  case DXGI_FORMAT_B8G8R8A8_UNORM:
2681  case DXGI_FORMAT_B8G8R8X8_UNORM:
2682  case DXGI_FORMAT_B4G4R4A4_UNORM:
2683  break;
2684 
2685  default:
2686  // can't treat A8, XR, Depth, SNORM, UINT, or SINT as sRGB
2687  flags &= ~TEX_FILTER_SRGB;
2688  break;
2689  }
2690 
2691  // sRGB output processing (Linear RGB -> sRGB)
2692  if ( flags & TEX_FILTER_SRGB_OUT )
2693  {
2694  // To avoid the need for another temporary scanline buffer, we allow this function to overwrite the source buffer in-place
2695  // Given the intended usage in the filtering routines, this is not a problem.
2696  XMVECTOR* ptr = pSource;
2697  for( size_t i=0; i < count; ++i, ++ptr )
2698  {
2699  *ptr = XMColorRGBToSRGB( *ptr );
2700  }
2701  }
2702 
2703  return _StoreScanline( pDestination, size, format, pSource, count );
2704 }
2705 
2706 
2707 //-------------------------------------------------------------------------------------
2708 // Convert from sRGB to Linear RGB
2709 //
2710 // if C_srgb <= 0.04045 -> C_linear = C_srgb / 12.92
2711 // if C_srgb > 0.04045 -> C_linear = pow( ( C_srgb + a ) / ( 1 + a ), 2.4 )
2712 // where a = 0.055
2713 //-------------------------------------------------------------------------------------
2714 #if DIRECTX_MATH_VERSION < 306
2715 static inline XMVECTOR XMColorSRGBToRGB( FXMVECTOR srgb )
2716 {
2717  static const XMVECTORF32 Cutoff = { 0.04045f, 0.04045f, 0.04045f, 1.f };
2718  static const XMVECTORF32 ILinear = { 1.f/12.92f, 1.f/12.92f, 1.f/12.92f, 1.f };
2719  static const XMVECTORF32 Scale = { 1.f/1.055f, 1.f/1.055f, 1.f/1.055f, 1.f };
2720  static const XMVECTORF32 Bias = { 0.055f, 0.055f, 0.055f, 0.f };
2721  static const XMVECTORF32 Gamma = { 2.4f, 2.4f, 2.4f, 1.f };
2722 
2723  XMVECTOR V = XMVectorSaturate(srgb);
2724  XMVECTOR V0 = XMVectorMultiply( V, ILinear );
2725  XMVECTOR V1 = XMVectorPow( (V + Bias) * Scale, Gamma );
2726  XMVECTOR select = XMVectorGreater( V, Cutoff );
2727  V = XMVectorSelect( V0, V1, select );
2728  return XMVectorSelect( srgb, V, g_XMSelect1110 );
2729 }
2730 #endif
2731 
2732 _Use_decl_annotations_
2733 bool _LoadScanlineLinear( XMVECTOR* pDestination, size_t count,
2734  LPCVOID pSource, size_t size, DXGI_FORMAT format, DWORD flags )
2735 {
2736  assert( pDestination && count > 0 && (((uintptr_t)pDestination & 0xF) == 0) );
2737  assert( pSource && size > 0 );
2738  assert( IsValid(format) && !IsTypeless(format,false) && !IsCompressed(format) && !IsPlanar(format) && !IsPalettized(format) );
2739 
2740  switch ( format )
2741  {
2742  case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB:
2743  case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB:
2744  case DXGI_FORMAT_B8G8R8X8_UNORM_SRGB:
2745  flags |= TEX_FILTER_SRGB;
2746  break;
2747 
2748  case DXGI_FORMAT_R32G32B32A32_FLOAT:
2749  case DXGI_FORMAT_R32G32B32_FLOAT:
2750  case DXGI_FORMAT_R16G16B16A16_FLOAT:
2751  case DXGI_FORMAT_R16G16B16A16_UNORM:
2752  case DXGI_FORMAT_R32G32_FLOAT:
2753  case DXGI_FORMAT_R10G10B10A2_UNORM:
2754  case DXGI_FORMAT_R11G11B10_FLOAT:
2755  case DXGI_FORMAT_R8G8B8A8_UNORM:
2756  case DXGI_FORMAT_R16G16_FLOAT:
2757  case DXGI_FORMAT_R16G16_UNORM:
2758  case DXGI_FORMAT_R32_FLOAT:
2759  case DXGI_FORMAT_R8G8_UNORM:
2760  case DXGI_FORMAT_R16_FLOAT:
2761  case DXGI_FORMAT_R16_UNORM:
2762  case DXGI_FORMAT_R8_UNORM:
2763  case DXGI_FORMAT_R9G9B9E5_SHAREDEXP:
2764  case DXGI_FORMAT_R8G8_B8G8_UNORM:
2765  case DXGI_FORMAT_G8R8_G8B8_UNORM:
2766  case DXGI_FORMAT_B5G6R5_UNORM:
2767  case DXGI_FORMAT_B5G5R5A1_UNORM:
2768  case DXGI_FORMAT_B8G8R8A8_UNORM:
2769  case DXGI_FORMAT_B8G8R8X8_UNORM:
2770  case DXGI_FORMAT_B4G4R4A4_UNORM:
2771  break;
2772 
2773  default:
2774  // can't treat A8, XR, Depth, SNORM, UINT, or SINT as sRGB
2775  flags &= ~TEX_FILTER_SRGB;
2776  break;
2777  }
2778 
2779  if ( _LoadScanline( pDestination, count, pSource, size, format ) )
2780  {
2781  // sRGB input processing (sRGB -> Linear RGB)
2782  if ( flags & TEX_FILTER_SRGB_IN )
2783  {
2784  XMVECTOR* ptr = pDestination;
2785  for( size_t i=0; i < count; ++i, ++ptr )
2786  {
2787  *ptr = XMColorSRGBToRGB( *ptr );
2788  }
2789  }
2790 
2791  return true;
2792  }
2793 
2794  return false;
2795 }
2796 
2797 
2798 //-------------------------------------------------------------------------------------
2799 // Convert scanline based on source/target formats
2800 //-------------------------------------------------------------------------------------
2802 {
2803  DXGI_FORMAT format;
2804  size_t datasize;
2805  DWORD flags;
2806 };
2807 
2808 static const ConvertData g_ConvertTable[] = {
2809  { DXGI_FORMAT_R32G32B32A32_FLOAT, 32, CONVF_FLOAT | CONVF_R | CONVF_G | CONVF_B | CONVF_A },
2810  { DXGI_FORMAT_R32G32B32A32_UINT, 32, CONVF_UINT | CONVF_R | CONVF_G | CONVF_B | CONVF_A },
2811  { DXGI_FORMAT_R32G32B32A32_SINT, 32, CONVF_SINT | CONVF_R | CONVF_G | CONVF_B | CONVF_A },
2812  { DXGI_FORMAT_R32G32B32_FLOAT, 32, CONVF_FLOAT | CONVF_R | CONVF_G | CONVF_B },
2813  { DXGI_FORMAT_R32G32B32_UINT, 32, CONVF_UINT | CONVF_R | CONVF_G | CONVF_B },
2814  { DXGI_FORMAT_R32G32B32_SINT, 32, CONVF_SINT | CONVF_R | CONVF_G | CONVF_B },
2815  { DXGI_FORMAT_R16G16B16A16_FLOAT, 16, CONVF_FLOAT | CONVF_R | CONVF_G | CONVF_B | CONVF_A },
2816  { DXGI_FORMAT_R16G16B16A16_UNORM, 16, CONVF_UNORM | CONVF_R | CONVF_G | CONVF_B | CONVF_A },
2817  { DXGI_FORMAT_R16G16B16A16_UINT, 16, CONVF_UINT | CONVF_R | CONVF_G | CONVF_B | CONVF_A },
2818  { DXGI_FORMAT_R16G16B16A16_SNORM, 16, CONVF_SNORM | CONVF_R | CONVF_G | CONVF_B | CONVF_A },
2819  { DXGI_FORMAT_R16G16B16A16_SINT, 16, CONVF_SINT | CONVF_R | CONVF_G | CONVF_B | CONVF_A },
2820  { DXGI_FORMAT_R32G32_FLOAT, 32, CONVF_FLOAT | CONVF_R | CONVF_G },
2821  { DXGI_FORMAT_R32G32_UINT, 32, CONVF_UINT | CONVF_R | CONVF_G },
2822  { DXGI_FORMAT_R32G32_SINT, 32, CONVF_SINT | CONVF_R | CONVF_G },
2823  { DXGI_FORMAT_D32_FLOAT_S8X24_UINT, 32, CONVF_FLOAT | CONVF_DEPTH | CONVF_STENCIL },
2824  { DXGI_FORMAT_R10G10B10A2_UNORM, 10, CONVF_UNORM | CONVF_R | CONVF_G | CONVF_B | CONVF_A },
2825  { DXGI_FORMAT_R10G10B10A2_UINT, 10, CONVF_UINT | CONVF_R | CONVF_G | CONVF_B | CONVF_A },
2826  { DXGI_FORMAT_R11G11B10_FLOAT, 10, CONVF_FLOAT | CONVF_R | CONVF_G | CONVF_B },
2827  { DXGI_FORMAT_R8G8B8A8_UNORM, 8, CONVF_UNORM | CONVF_R | CONVF_G | CONVF_B | CONVF_A },
2828  { DXGI_FORMAT_R8G8B8A8_UNORM_SRGB, 8, CONVF_UNORM | CONVF_R | CONVF_G | CONVF_B | CONVF_A },
2829  { DXGI_FORMAT_R8G8B8A8_UINT, 8, CONVF_UINT | CONVF_R | CONVF_G | CONVF_B | CONVF_A },
2830  { DXGI_FORMAT_R8G8B8A8_SNORM, 8, CONVF_SNORM | CONVF_R | CONVF_G | CONVF_B | CONVF_A },
2831  { DXGI_FORMAT_R8G8B8A8_SINT, 8, CONVF_SINT | CONVF_R | CONVF_G | CONVF_B | CONVF_A },
2832  { DXGI_FORMAT_R16G16_FLOAT, 16, CONVF_FLOAT | CONVF_R | CONVF_G },
2833  { DXGI_FORMAT_R16G16_UNORM, 16, CONVF_UNORM | CONVF_R | CONVF_G },
2834  { DXGI_FORMAT_R16G16_UINT, 16, CONVF_UINT | CONVF_R | CONVF_G },
2835  { DXGI_FORMAT_R16G16_SNORM, 16, CONVF_SNORM | CONVF_R | CONVF_G },
2836  { DXGI_FORMAT_R16G16_SINT, 16, CONVF_SINT | CONVF_R | CONVF_G },
2837  { DXGI_FORMAT_D32_FLOAT, 32, CONVF_FLOAT | CONVF_DEPTH },
2838  { DXGI_FORMAT_R32_FLOAT, 32, CONVF_FLOAT | CONVF_R },
2839  { DXGI_FORMAT_R32_UINT, 32, CONVF_UINT | CONVF_R },
2840  { DXGI_FORMAT_R32_SINT, 32, CONVF_SINT | CONVF_R },
2841  { DXGI_FORMAT_D24_UNORM_S8_UINT, 32, CONVF_UNORM | CONVF_DEPTH | CONVF_STENCIL },
2842  { DXGI_FORMAT_R8G8_UNORM, 8, CONVF_UNORM | CONVF_R | CONVF_G },
2843  { DXGI_FORMAT_R8G8_UINT, 8, CONVF_UINT | CONVF_R | CONVF_G },
2844  { DXGI_FORMAT_R8G8_SNORM, 8, CONVF_SNORM | CONVF_R | CONVF_G },
2845  { DXGI_FORMAT_R8G8_SINT, 8, CONVF_SINT | CONVF_R | CONVF_G },
2846  { DXGI_FORMAT_R16_FLOAT, 16, CONVF_FLOAT | CONVF_R },
2847  { DXGI_FORMAT_D16_UNORM, 16, CONVF_UNORM | CONVF_DEPTH },
2848  { DXGI_FORMAT_R16_UNORM, 16, CONVF_UNORM | CONVF_R },
2849  { DXGI_FORMAT_R16_UINT, 16, CONVF_UINT | CONVF_R },
2850  { DXGI_FORMAT_R16_SNORM, 16, CONVF_SNORM | CONVF_R },
2851  { DXGI_FORMAT_R16_SINT, 16, CONVF_SINT | CONVF_R },
2852  { DXGI_FORMAT_R8_UNORM, 8, CONVF_UNORM | CONVF_R },
2853  { DXGI_FORMAT_R8_UINT, 8, CONVF_UINT | CONVF_R },
2854  { DXGI_FORMAT_R8_SNORM, 8, CONVF_SNORM | CONVF_R },
2855  { DXGI_FORMAT_R8_SINT, 8, CONVF_SINT | CONVF_R },
2856  { DXGI_FORMAT_A8_UNORM, 8, CONVF_UNORM | CONVF_A },
2857  { DXGI_FORMAT_R1_UNORM, 1, CONVF_UNORM | CONVF_R },
2858  { DXGI_FORMAT_R9G9B9E5_SHAREDEXP, 9, CONVF_SHAREDEXP | CONVF_R | CONVF_G | CONVF_B },
2859  { DXGI_FORMAT_R8G8_B8G8_UNORM, 8, CONVF_UNORM | CONVF_PACKED | CONVF_R | CONVF_G | CONVF_B },
2860  { DXGI_FORMAT_G8R8_G8B8_UNORM, 8, CONVF_UNORM | CONVF_PACKED | CONVF_R | CONVF_G | CONVF_B },
2861  { DXGI_FORMAT_BC1_UNORM, 8, CONVF_UNORM | CONVF_BC | CONVF_R | CONVF_G | CONVF_B | CONVF_A },
2862  { DXGI_FORMAT_BC1_UNORM_SRGB, 8, CONVF_UNORM | CONVF_BC | CONVF_R | CONVF_G | CONVF_B | CONVF_A },
2863  { DXGI_FORMAT_BC2_UNORM, 8, CONVF_UNORM | CONVF_BC | CONVF_R | CONVF_G | CONVF_B | CONVF_A },
2864  { DXGI_FORMAT_BC2_UNORM_SRGB, 8, CONVF_UNORM | CONVF_BC | CONVF_R | CONVF_G | CONVF_B | CONVF_A },
2865  { DXGI_FORMAT_BC3_UNORM, 8, CONVF_UNORM | CONVF_BC | CONVF_R | CONVF_G | CONVF_B | CONVF_A },
2866  { DXGI_FORMAT_BC3_UNORM_SRGB, 8, CONVF_UNORM | CONVF_BC | CONVF_R | CONVF_G | CONVF_B | CONVF_A },
2867  { DXGI_FORMAT_BC4_UNORM, 8, CONVF_UNORM | CONVF_BC | CONVF_R },
2868  { DXGI_FORMAT_BC4_SNORM, 8, CONVF_SNORM | CONVF_BC | CONVF_R },
2869  { DXGI_FORMAT_BC5_UNORM, 8, CONVF_UNORM | CONVF_BC | CONVF_R | CONVF_G },
2870  { DXGI_FORMAT_BC5_SNORM, 8, CONVF_SNORM | CONVF_BC | CONVF_R | CONVF_G },
2871  { DXGI_FORMAT_B5G6R5_UNORM, 5, CONVF_UNORM | CONVF_R | CONVF_G | CONVF_B },
2872  { DXGI_FORMAT_B5G5R5A1_UNORM, 5, CONVF_UNORM | CONVF_R | CONVF_G | CONVF_B | CONVF_A },
2873  { DXGI_FORMAT_B8G8R8A8_UNORM, 8, CONVF_UNORM | CONVF_BGR | CONVF_R | CONVF_G | CONVF_B | CONVF_A },
2874  { DXGI_FORMAT_B8G8R8X8_UNORM, 8, CONVF_UNORM | CONVF_BGR | CONVF_R | CONVF_G | CONVF_B },
2875  { DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM, 10, CONVF_UNORM | CONVF_XR | CONVF_R | CONVF_G | CONVF_B | CONVF_A },
2876  { DXGI_FORMAT_B8G8R8A8_UNORM_SRGB, 8, CONVF_UNORM | CONVF_BGR | CONVF_R | CONVF_G | CONVF_B | CONVF_A },
2877  { DXGI_FORMAT_B8G8R8X8_UNORM_SRGB, 8, CONVF_UNORM | CONVF_BGR | CONVF_R | CONVF_G | CONVF_B },
2878  { DXGI_FORMAT_BC6H_UF16, 16, CONVF_FLOAT | CONVF_BC | CONVF_R | CONVF_G | CONVF_B | CONVF_A },
2879  { DXGI_FORMAT_BC6H_SF16, 16, CONVF_FLOAT | CONVF_BC | CONVF_R | CONVF_G | CONVF_B | CONVF_A },
2880  { DXGI_FORMAT_BC7_UNORM, 8, CONVF_UNORM | CONVF_BC | CONVF_R | CONVF_G | CONVF_B | CONVF_A },
2881  { DXGI_FORMAT_BC7_UNORM_SRGB, 8, CONVF_UNORM | CONVF_BC | CONVF_R | CONVF_G | CONVF_B | CONVF_A },
2882  { DXGI_FORMAT_AYUV, 8, CONVF_UNORM | CONVF_YUV | CONVF_R | CONVF_G | CONVF_B | CONVF_A },
2883  { DXGI_FORMAT_Y410, 10, CONVF_UNORM | CONVF_YUV | CONVF_R | CONVF_G | CONVF_B | CONVF_A },
2884  { DXGI_FORMAT_Y416, 16, CONVF_UNORM | CONVF_YUV | CONVF_R | CONVF_G | CONVF_B | CONVF_A },
2885  { DXGI_FORMAT_YUY2, 8, CONVF_UNORM | CONVF_YUV | CONVF_PACKED | CONVF_R | CONVF_G | CONVF_B },
2886  { DXGI_FORMAT_Y210, 10, CONVF_UNORM | CONVF_YUV | CONVF_PACKED | CONVF_R | CONVF_G | CONVF_B },
2887  { DXGI_FORMAT_Y216, 16, CONVF_UNORM | CONVF_YUV | CONVF_PACKED | CONVF_R | CONVF_G | CONVF_B },
2888  { DXGI_FORMAT_B4G4R4A4_UNORM, 4, CONVF_UNORM | CONVF_BGR | CONVF_R | CONVF_G | CONVF_B | CONVF_A },
2889  { DXGI_FORMAT(116)
2890  /* DXGI_FORMAT_R10G10B10_7E3_A2_FLOAT */, 10, CONVF_FLOAT | CONVF_R | CONVF_G | CONVF_B | CONVF_A },
2891  { DXGI_FORMAT(117)
2892  /* DXGI_FORMAT_R10G10B10_6E4_A2_FLOAT */, 10, CONVF_FLOAT | CONVF_R | CONVF_G | CONVF_B | CONVF_A },
2893 };
2894 
2895 #pragma prefast( suppress : 25004, "Signature must match bsearch_s" );
2896 static int __cdecl _ConvertCompare( void *context, const void* ptr1, const void *ptr2 )
2897 {
2898  UNREFERENCED_PARAMETER(context);
2899  const ConvertData *p1 = reinterpret_cast<const ConvertData*>(ptr1);
2900  const ConvertData *p2 = reinterpret_cast<const ConvertData*>(ptr2);
2901  if ( p1->format == p2->format ) return 0;
2902  else return (p1->format < p2->format ) ? -1 : 1;
2903 }
2904 
2905 _Use_decl_annotations_
2906 DWORD _GetConvertFlags( DXGI_FORMAT format )
2907 {
2908 #ifdef _DEBUG
2909  // Ensure conversion table is in ascending order
2910  assert( _countof(g_ConvertTable) > 0 );
2911  DXGI_FORMAT lastvalue = g_ConvertTable[0].format;
2912  for( size_t index=1; index < _countof(g_ConvertTable); ++index )
2913  {
2914  assert( g_ConvertTable[index].format > lastvalue );
2915  lastvalue = g_ConvertTable[index].format;
2916  }
2917 #endif
2918 
2919  ConvertData key = { format, 0 };
2920  const ConvertData* in = (const ConvertData*) bsearch_s( &key, g_ConvertTable, _countof(g_ConvertTable), sizeof(ConvertData),
2921  _ConvertCompare, 0 );
2922  return (in) ? in->flags : 0;
2923 }
2924 
2925 _Use_decl_annotations_
2926 void _ConvertScanline( XMVECTOR* pBuffer, size_t count, DXGI_FORMAT outFormat, DXGI_FORMAT inFormat, DWORD flags )
2927 {
2928  assert( pBuffer && count > 0 && (((uintptr_t)pBuffer & 0xF) == 0) );
2929  assert( IsValid(outFormat) && !IsTypeless(outFormat) && !IsPlanar(outFormat) && !IsPalettized(outFormat) );
2930  assert( IsValid(inFormat) && !IsTypeless(inFormat) && !IsPlanar(inFormat) && !IsPalettized(inFormat) );
2931 
2932  if ( !pBuffer )
2933  return;
2934 
2935 #ifdef _DEBUG
2936  // Ensure conversion table is in ascending order
2937  assert( _countof(g_ConvertTable) > 0 );
2938  DXGI_FORMAT lastvalue = g_ConvertTable[0].format;
2939  for( size_t index=1; index < _countof(g_ConvertTable); ++index )
2940  {
2941  assert( g_ConvertTable[index].format > lastvalue );
2942  lastvalue = g_ConvertTable[index].format;
2943  }
2944 #endif
2945 
2946  // Determine conversion details about source and dest formats
2947  ConvertData key = { inFormat, 0 };
2948  const ConvertData* in = (const ConvertData*) bsearch_s( &key, g_ConvertTable, _countof(g_ConvertTable), sizeof(ConvertData),
2949  _ConvertCompare, 0 );
2950  key.format = outFormat;
2951  const ConvertData* out = (const ConvertData*) bsearch_s( &key, g_ConvertTable, _countof(g_ConvertTable), sizeof(ConvertData),
2952  _ConvertCompare, 0 );
2953  if ( !in || !out )
2954  {
2955  assert(false);
2956  return;
2957  }
2958 
2959  assert( _GetConvertFlags( inFormat ) == in->flags );
2960  assert( _GetConvertFlags( outFormat ) == out->flags );
2961 
2962  // Handle SRGB filtering modes
2963  switch ( inFormat )
2964  {
2965  case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB:
2966  case DXGI_FORMAT_BC1_UNORM_SRGB:
2967  case DXGI_FORMAT_BC2_UNORM_SRGB:
2968  case DXGI_FORMAT_BC3_UNORM_SRGB:
2969  case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB:
2970  case DXGI_FORMAT_B8G8R8X8_UNORM_SRGB:
2971  case DXGI_FORMAT_BC7_UNORM_SRGB:
2972  flags |= TEX_FILTER_SRGB_IN;
2973  break;
2974 
2975  case DXGI_FORMAT_A8_UNORM:
2976  case DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM:
2977  flags &= ~TEX_FILTER_SRGB_IN;
2978  break;
2979  }
2980 
2981  switch ( outFormat )
2982  {
2983  case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB:
2984  case DXGI_FORMAT_BC1_UNORM_SRGB:
2985  case DXGI_FORMAT_BC2_UNORM_SRGB:
2986  case DXGI_FORMAT_BC3_UNORM_SRGB:
2987  case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB:
2988  case DXGI_FORMAT_B8G8R8X8_UNORM_SRGB:
2989  case DXGI_FORMAT_BC7_UNORM_SRGB:
2990  flags |= TEX_FILTER_SRGB_OUT;
2991  break;
2992 
2993  case DXGI_FORMAT_A8_UNORM:
2994  case DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM:
2995  flags &= ~TEX_FILTER_SRGB_OUT;
2996  break;
2997  }
2998 
3000  {
3002  }
3003 
3004  // sRGB input processing (sRGB -> Linear RGB)
3005  if ( flags & TEX_FILTER_SRGB_IN )
3006  {
3007  if ( !(in->flags & CONVF_DEPTH) && ( (in->flags & CONVF_FLOAT) || (in->flags & CONVF_UNORM) ) )
3008  {
3009  XMVECTOR* ptr = pBuffer;
3010  for( size_t i=0; i < count; ++i, ++ptr )
3011  {
3012  *ptr = XMColorSRGBToRGB( *ptr );
3013  }
3014  }
3015  }
3016 
3017  // Handle conversion special cases
3018  DWORD diffFlags = in->flags ^ out->flags;
3019  if ( diffFlags != 0)
3020  {
3021  if ( out->flags & CONVF_UNORM )
3022  {
3023  if ( in->flags & CONVF_SNORM )
3024  {
3025  // SNORM -> UNORM
3026  XMVECTOR* ptr = pBuffer;
3027  for( size_t i=0; i < count; ++i )
3028  {
3029  XMVECTOR v = *ptr;
3030  *ptr++ = XMVectorMultiplyAdd( v, g_XMOneHalf, g_XMOneHalf );
3031  }
3032  }
3033  else if ( in->flags & CONVF_FLOAT )
3034  {
3035  // FLOAT -> UNORM
3036  XMVECTOR* ptr = pBuffer;
3037  for( size_t i=0; i < count; ++i )
3038  {
3039  XMVECTOR v = *ptr;
3040  *ptr++ = XMVectorSaturate( v );
3041  }
3042  }
3043  }
3044  else if ( out->flags & CONVF_SNORM )
3045  {
3046  if ( in->flags & CONVF_UNORM )
3047  {
3048  // UNORM -> SNORM
3049  static XMVECTORF32 two = { 2.0f, 2.0f, 2.0f, 2.0f };
3050  XMVECTOR* ptr = pBuffer;
3051  for( size_t i=0; i < count; ++i )
3052  {
3053  XMVECTOR v = *ptr;
3054  *ptr++ = XMVectorMultiplyAdd( v, two, g_XMNegativeOne );
3055  }
3056  }
3057  else if ( in->flags & CONVF_FLOAT )
3058  {
3059  // FLOAT -> SNORM
3060  XMVECTOR* ptr = pBuffer;
3061  for( size_t i=0; i < count; ++i )
3062  {
3063  XMVECTOR v = *ptr;
3064  *ptr++ = XMVectorClamp( v, g_XMNegativeOne, g_XMOne );
3065  }
3066  }
3067  }
3068 
3069  // !CONVF_A -> CONVF_A is handled because LoadScanline ensures alpha defaults to 1.0 for no-alpha formats
3070 
3071  // CONVF_PACKED cases are handled because LoadScanline/StoreScanline handles packing/unpacking
3072 
3073  if ( ((out->flags & CONVF_RGBA_MASK) == CONVF_A) && !(in->flags & CONVF_A) )
3074  {
3075  // !CONVF_A -> A format
3076  XMVECTOR* ptr = pBuffer;
3077  for( size_t i=0; i < count; ++i )
3078  {
3079  XMVECTOR v = *ptr;
3080  *ptr++ = XMVectorSplatX( v );
3081  }
3082  }
3083  else if ( ((in->flags & CONVF_RGBA_MASK) == CONVF_A) && !(out->flags & CONVF_A) )
3084  {
3085  // A format -> !CONVF_A
3086  XMVECTOR* ptr = pBuffer;
3087  for( size_t i=0; i < count; ++i )
3088  {
3089  XMVECTOR v = *ptr;
3090  *ptr++ = XMVectorSplatW( v );
3091  }
3092  }
3093  else if ( (in->flags & CONVF_RGB_MASK) == CONVF_R )
3094  {
3095  if ( (out->flags & CONVF_RGB_MASK) == (CONVF_R|CONVF_G|CONVF_B) )
3096  {
3097  // R format -> RGB format
3098  XMVECTOR* ptr = pBuffer;
3099  for( size_t i=0; i < count; ++i )
3100  {
3101  XMVECTOR v = *ptr;
3102  XMVECTOR v1 = XMVectorSplatX( v );
3103  *ptr++ = XMVectorSelect( v, v1, g_XMSelect1110 );
3104  }
3105  }
3106  else if ( (out->flags & CONVF_RGB_MASK) == (CONVF_R|CONVF_G) )
3107  {
3108  // R format -> RG format
3109  XMVECTOR* ptr = pBuffer;
3110  for( size_t i=0; i < count; ++i )
3111  {
3112  XMVECTOR v = *ptr;
3113  XMVECTOR v1 = XMVectorSplatX( v );
3114  *ptr++ = XMVectorSelect( v, v1, g_XMSelect1100 );
3115  }
3116  }
3117  }
3118  else if ( (in->flags & CONVF_RGB_MASK) == (CONVF_R|CONVF_G|CONVF_B) )
3119  {
3120  if ( (out->flags & CONVF_RGB_MASK) == CONVF_R )
3121  {
3122  // RGB format -> R format
3124  {
3126  // Leave data unchanged and the store will handle this...
3127  break;
3128 
3130  {
3131  XMVECTOR* ptr = pBuffer;
3132  for( size_t i=0; i < count; ++i )
3133  {
3134  XMVECTOR v = *ptr;
3135  XMVECTOR v1 = XMVectorSplatY( v );
3136  *ptr++ = XMVectorSelect( v, v1, g_XMSelect1110 );
3137  }
3138  }
3139  break;
3140 
3142  {
3143  XMVECTOR* ptr = pBuffer;
3144  for( size_t i=0; i < count; ++i )
3145  {
3146  XMVECTOR v = *ptr;
3147  XMVECTOR v1 = XMVectorSplatZ( v );
3148  *ptr++ = XMVectorSelect( v, v1, g_XMSelect1110 );
3149  }
3150  }
3151  break;
3152 
3153  default:
3154  {
3155  XMVECTOR* ptr = pBuffer;
3156  for( size_t i=0; i < count; ++i )
3157  {
3158  XMVECTOR v = *ptr;
3159  XMVECTOR v1 = XMVector3Dot( v, g_Grayscale );
3160  *ptr++ = XMVectorSelect( v, v1, g_XMSelect1110 );
3161  }
3162  }
3163  break;
3164  }
3165  }
3166  else if ( (out->flags & CONVF_RGB_MASK) == (CONVF_R|CONVF_G) )
3167  {
3168  // RGB format -> RG format
3170  {
3172  {
3173  XMVECTOR* ptr = pBuffer;
3174  for( size_t i=0; i < count; ++i )
3175  {
3176  XMVECTOR v = *ptr;
3177  XMVECTOR v1 = XMVectorSwizzle<0,2,0,2>( v );
3178  *ptr++ = XMVectorSelect( v, v1, g_XMSelect1100 );
3179  }
3180  }
3181  break;
3182 
3184  {
3185  XMVECTOR* ptr = pBuffer;
3186  for( size_t i=0; i < count; ++i )
3187  {
3188  XMVECTOR v = *ptr;
3189  XMVECTOR v1 = XMVectorSwizzle<1,2,3,0>( v );
3190  *ptr++ = XMVectorSelect( v, v1, g_XMSelect1100 );
3191  }
3192  }
3193  break;
3194 
3196  default:
3197  // Leave data unchanged and the store will handle this...
3198  break;
3199  }
3200  }
3201  }
3202  }
3203 
3204  // sRGB output processing (Linear RGB -> sRGB)
3205  if ( flags & TEX_FILTER_SRGB_OUT )
3206  {
3207  if ( !(out->flags & CONVF_DEPTH) && ( (out->flags & CONVF_FLOAT) || (out->flags & CONVF_UNORM) ) )
3208  {
3209  XMVECTOR* ptr = pBuffer;
3210  for( size_t i=0; i < count; ++i, ++ptr )
3211  {
3212  *ptr = XMColorRGBToSRGB( *ptr );
3213  }
3214  }
3215  }
3216 }
3217 
3218 
3219 //-------------------------------------------------------------------------------------
3220 // Dithering
3221 //-------------------------------------------------------------------------------------
3222 
3223 // 4X4X4 ordered dithering matrix
3224 static const float g_Dither[] =
3225 {
3226  // (z & 3) + ( (y & 3) * 8) + (x & 3)
3227  0.468750f, -0.031250f, 0.343750f, -0.156250f, 0.468750f, -0.031250f, 0.343750f, -0.156250f,
3228  -0.281250f, 0.218750f, -0.406250f, 0.093750f, -0.281250f, 0.218750f, -0.406250f, 0.093750f,
3229  0.281250f, -0.218750f, 0.406250f, -0.093750f, 0.281250f, -0.218750f, 0.406250f, -0.093750f,
3230  -0.468750f, 0.031250f, -0.343750f, 0.156250f, -0.468750f, 0.031250f, -0.343750f, 0.156250f,
3231 };
3232 
3233 static const XMVECTORF32 g_Scale16pc = { 65535.f, 65535.f, 65535.f, 65535.f };
3234 static const XMVECTORF32 g_Scale15pc = { 32767.f, 32767.f, 32767.f, 32767.f };
3235 static const XMVECTORF32 g_Scale10pc = { 1023.f, 1023.f, 1023.f, 3.f };
3236 static const XMVECTORF32 g_Scale8pc = { 255.f, 255.f, 255.f, 255.f };
3237 static const XMVECTORF32 g_Scale7pc = { 127.f, 127.f, 127.f, 127.f };
3238 static const XMVECTORF32 g_Scale565pc = { 31.f, 63.f, 31.f, 1.f };
3239 static const XMVECTORF32 g_Scale5551pc = { 31.f, 31.f, 31.f, 1.f };
3240 static const XMVECTORF32 g_Scale4pc = { 15.f, 15.f, 15.f, 15.f };
3241 
3242 static const XMVECTORF32 g_ErrorWeight3 = { 3.f/16.f, 3.f/16.f, 3.f/16.f, 3.f/16.f };
3243 static const XMVECTORF32 g_ErrorWeight5 = { 5.f/16.f, 5.f/16.f, 5.f/16.f, 5.f/16.f };
3244 static const XMVECTORF32 g_ErrorWeight1 = { 1.f/16.f, 1.f/16.f, 1.f/16.f, 1.f/16.f };
3245 static const XMVECTORF32 g_ErrorWeight7 = { 7.f/16.f, 7.f/16.f, 7.f/16.f, 7.f/16.f };
3246 
3247 #define STORE_SCANLINE( type, scalev, clampzero, norm, itype, mask, row, bgr ) \
3248  if ( size >= sizeof(type) ) \
3249  { \
3250  type * __restrict dest = reinterpret_cast<type*>(pDestination); \
3251  for( size_t i = 0; i < count; ++i ) \
3252  { \
3253  ptrdiff_t index = static_cast<ptrdiff_t>( ( row & 1 ) ? ( count - i - 1 ) : i ); \
3254  ptrdiff_t delta = ( row & 1 ) ? -2 : 0; \
3255  \
3256  XMVECTOR v = sPtr[ index ]; \
3257  if ( bgr ) { v = XMVectorSwizzle<2, 1, 0, 3>( v ); } \
3258  if ( norm && clampzero ) v = XMVectorSaturate( v ) ; \
3259  else if ( clampzero ) v = XMVectorClamp( v, g_XMZero, scalev ); \
3260  else if ( norm ) v = XMVectorClamp( v, g_XMNegativeOne, g_XMOne ); \
3261  else v = XMVectorClamp( v, -scalev + g_XMOne, scalev ); \
3262  v = XMVectorAdd( v, vError ); \
3263  if ( norm ) v = XMVectorMultiply( v, scalev ); \
3264  \
3265  XMVECTOR target; \
3266  if ( pDiffusionErrors ) \
3267  { \
3268  target = XMVectorRound( v ); \
3269  vError = XMVectorSubtract( v, target ); \
3270  if (norm) vError = XMVectorDivide( vError, scalev ); \
3271  \
3272  /* Distribute error to next scanline and next pixel */ \
3273  pDiffusionErrors[ index-delta ] += XMVectorMultiply( g_ErrorWeight3, vError ); \
3274  pDiffusionErrors[ index+1 ] += XMVectorMultiply( g_ErrorWeight5, vError ); \
3275  pDiffusionErrors[ index+2+delta ] += XMVectorMultiply( g_ErrorWeight1, vError ); \
3276  vError = XMVectorMultiply( vError, g_ErrorWeight7 ); \
3277  } \
3278  else \
3279  { \
3280  /* Applied ordered dither */ \
3281  target = XMVectorAdd( v, ordered[ index & 3 ] ); \
3282  target = XMVectorRound( target ); \
3283  } \
3284  \
3285  target = XMVectorMin( scalev, target ); \
3286  target = XMVectorMax( (clampzero) ? g_XMZero : ( -scalev + g_XMOne ), target ); \
3287  \
3288  XMFLOAT4A tmp; \
3289  XMStoreFloat4A( &tmp, target ); \
3290  \
3291  auto dPtr = &dest[ index ]; \
3292  dPtr->x = static_cast<itype>( tmp.x ) & mask; \
3293  dPtr->y = static_cast<itype>( tmp.y ) & mask; \
3294  dPtr->z = static_cast<itype>( tmp.z ) & mask; \
3295  dPtr->w = static_cast<itype>( tmp.w ) & mask; \
3296  } \
3297  return true; \
3298  } \
3299  return false;
3300 
3301 #define STORE_SCANLINE2( type, scalev, clampzero, norm, itype, mask, row ) \
3302  /* The 2 component cases are always bgr=false */ \
3303  if ( size >= sizeof(type) ) \
3304  { \
3305  type * __restrict dest = reinterpret_cast<type*>(pDestination); \
3306  for( size_t i = 0; i < count; ++i ) \
3307  { \
3308  ptrdiff_t index = static_cast<ptrdiff_t>( ( row & 1 ) ? ( count - i - 1 ) : i ); \
3309  ptrdiff_t delta = ( row & 1 ) ? -2 : 0; \
3310  \
3311  XMVECTOR v = sPtr[ index ]; \
3312  if ( norm && clampzero ) v = XMVectorSaturate( v ) ; \
3313  else if ( clampzero ) v = XMVectorClamp( v, g_XMZero, scalev ); \
3314  else if ( norm ) v = XMVectorClamp( v, g_XMNegativeOne, g_XMOne ); \
3315  else v = XMVectorClamp( v, -scalev + g_XMOne, scalev ); \
3316  v = XMVectorAdd( v, vError ); \
3317  if ( norm ) v = XMVectorMultiply( v, scalev ); \
3318  \
3319  XMVECTOR target; \
3320  if ( pDiffusionErrors ) \
3321  { \
3322  target = XMVectorRound( v ); \
3323  vError = XMVectorSubtract( v, target ); \
3324  if (norm) vError = XMVectorDivide( vError, scalev ); \
3325  \
3326  /* Distribute error to next scanline and next pixel */ \
3327  pDiffusionErrors[ index-delta ] += XMVectorMultiply( g_ErrorWeight3, vError ); \
3328  pDiffusionErrors[ index+1 ] += XMVectorMultiply( g_ErrorWeight5, vError ); \
3329  pDiffusionErrors[ index+2+delta ] += XMVectorMultiply( g_ErrorWeight1, vError ); \
3330  vError = XMVectorMultiply( vError, g_ErrorWeight7 ); \
3331  } \
3332  else \
3333  { \
3334  /* Applied ordered dither */ \
3335  target = XMVectorAdd( v, ordered[ index & 3 ] ); \
3336  target = XMVectorRound( target ); \
3337  } \
3338  \
3339  target = XMVectorMin( scalev, target ); \
3340  target = XMVectorMax( (clampzero) ? g_XMZero : ( -scalev + g_XMOne ), target ); \
3341  \
3342  XMFLOAT4A tmp; \
3343  XMStoreFloat4A( &tmp, target ); \
3344  \
3345  auto dPtr = &dest[ index ]; \
3346  dPtr->x = static_cast<itype>( tmp.x ) & mask; \
3347  dPtr->y = static_cast<itype>( tmp.y ) & mask; \
3348  } \
3349  return true; \
3350  } \
3351  return false;
3352 
3353 #define STORE_SCANLINE1( type, scalev, clampzero, norm, mask, row, selectw ) \
3354  /* The 1 component cases are always bgr=false */ \
3355  if ( size >= sizeof(type) ) \
3356  { \
3357  type * __restrict dest = reinterpret_cast<type*>(pDestination); \
3358  for( size_t i = 0; i < count; ++i ) \
3359  { \
3360  ptrdiff_t index = static_cast<ptrdiff_t>( ( row & 1 ) ? ( count - i - 1 ) : i ); \
3361  ptrdiff_t delta = ( row & 1 ) ? -2 : 0; \
3362  \
3363  XMVECTOR v = sPtr[ index ]; \
3364  if ( norm && clampzero ) v = XMVectorSaturate( v ) ; \
3365  else if ( clampzero ) v = XMVectorClamp( v, g_XMZero, scalev ); \
3366  else if ( norm ) v = XMVectorClamp( v, g_XMNegativeOne, g_XMOne ); \
3367  else v = XMVectorClamp( v, -scalev + g_XMOne, scalev ); \
3368  v = XMVectorAdd( v, vError ); \
3369  if ( norm ) v = XMVectorMultiply( v, scalev ); \
3370  \
3371  XMVECTOR target; \
3372  if ( pDiffusionErrors ) \
3373  { \
3374  target = XMVectorRound( v ); \
3375  vError = XMVectorSubtract( v, target ); \
3376  if (norm) vError = XMVectorDivide( vError, scalev ); \
3377  \
3378  /* Distribute error to next scanline and next pixel */ \
3379  pDiffusionErrors[ index-delta ] += XMVectorMultiply( g_ErrorWeight3, vError ); \
3380  pDiffusionErrors[ index+1 ] += XMVectorMultiply( g_ErrorWeight5, vError ); \
3381  pDiffusionErrors[ index+2+delta ] += XMVectorMultiply( g_ErrorWeight1, vError ); \
3382  vError = XMVectorMultiply( vError, g_ErrorWeight7 ); \
3383  } \
3384  else \
3385  { \
3386  /* Applied ordered dither */ \
3387  target = XMVectorAdd( v, ordered[ index & 3 ] ); \
3388  target = XMVectorRound( target ); \
3389  } \
3390  \
3391  target = XMVectorMin( scalev, target ); \
3392  target = XMVectorMax( (clampzero) ? g_XMZero : ( -scalev + g_XMOne ), target ); \
3393  \
3394  dest[ index ] = static_cast<type>( (selectw) ? XMVectorGetW( target ) : XMVectorGetX( target ) ) & mask; \
3395  } \
3396  return true; \
3397  } \
3398  return false;
3399 
3400 #pragma warning(push)
3401 #pragma warning( disable : 4127 )
3402 
3403 _Use_decl_annotations_
3404 bool _StoreScanlineDither( LPVOID pDestination, size_t size, DXGI_FORMAT format,
3405  XMVECTOR* pSource, size_t count, float threshold, size_t y, size_t z, XMVECTOR* pDiffusionErrors )
3406 {
3407  assert( pDestination && size > 0 );
3408  assert( pSource && count > 0 && (((uintptr_t)pSource & 0xF) == 0) );
3409  assert( IsValid(format) && !IsTypeless(format) && !IsCompressed(format) && !IsPlanar(format) && !IsPalettized(format) );
3410 
3411  XMVECTOR ordered[4];
3412  if ( pDiffusionErrors )
3413  {
3414  // If pDiffusionErrors != 0, then this function performs error diffusion dithering (aka Floyd-Steinberg dithering)
3415 
3416  // To avoid the need for another temporary scanline buffer, we allow this function to overwrite the source buffer in-place
3417  // Given the intended usage in the conversion routines, this is not a problem.
3418 
3419  XMVECTOR* ptr = pSource;
3420  const XMVECTOR* err = pDiffusionErrors + 1;
3421  for( size_t i=0; i < count; ++i )
3422  {
3423  // Add contribution from previous scanline
3424  XMVECTOR v = XMVectorAdd( *ptr, *err++ );
3425  *ptr++ = v;
3426  }
3427 
3428  // Reset errors for next scanline
3429  memset( pDiffusionErrors, 0, sizeof(XMVECTOR)*(count+2) );
3430  }
3431  else
3432  {
3433  // If pDiffusionErrors == 0, then this function performs ordered dithering
3434 
3435  XMVECTOR dither = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>( g_Dither + (z & 3) + ( (y & 3) * 8 ) ) );
3436 
3437  ordered[0] = XMVectorSplatX( dither );
3438  ordered[1] = XMVectorSplatY( dither );
3439  ordered[2] = XMVectorSplatZ( dither );
3440  ordered[3] = XMVectorSplatW( dither );
3441  }
3442 
3443  const XMVECTOR* __restrict sPtr = pSource;
3444  if ( !sPtr )
3445  return false;
3446 
3447  XMVECTOR vError = XMVectorZero();
3448 
3449  switch( format )
3450  {
3451  case DXGI_FORMAT_R16G16B16A16_UNORM:
3452  STORE_SCANLINE( XMUSHORTN4, g_Scale16pc, true, true, uint16_t, 0xFFFF, y, false )
3453 
3454  case DXGI_FORMAT_R16G16B16A16_UINT:
3455  STORE_SCANLINE( XMUSHORT4, g_Scale16pc, true, false, uint16_t, 0xFFFF, y, false )
3456 
3457  case DXGI_FORMAT_R16G16B16A16_SNORM:
3458  STORE_SCANLINE( XMSHORTN4, g_Scale15pc, false, true, int16_t, 0xFFFF, y, false )
3459 
3460  case DXGI_FORMAT_R16G16B16A16_SINT:
3461  STORE_SCANLINE( XMSHORT4, g_Scale15pc, false, false, int16_t, 0xFFFF, y, false )
3462 
3463  case DXGI_FORMAT_R10G10B10A2_UNORM:
3464  STORE_SCANLINE( XMUDECN4, g_Scale10pc, true, true, uint16_t, 0x3FF, y, false )
3465 
3466  case DXGI_FORMAT_R10G10B10A2_UINT:
3467  STORE_SCANLINE( XMUDEC4, g_Scale10pc, true, false, uint16_t, 0x3FF, y, false )
3468 
3469  case DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM:
3470  if ( size >= sizeof(XMUDEC4) )
3471  {
3472  static const XMVECTORF32 Scale = { 510.0f, 510.0f, 510.0f, 3.0f };
3473  static const XMVECTORF32 Bias = { 384.0f, 384.0f, 384.0f, 0.0f };
3474  static const XMVECTORF32 MinXR = { -0.7529f, -0.7529f, -0.7529f, 0.f };
3475  static const XMVECTORF32 MaxXR = { 1.2529f, 1.2529f, 1.2529f, 1.0f };
3476 
3477  XMUDEC4 * __restrict dest = reinterpret_cast<XMUDEC4*>(pDestination);
3478  for( size_t i = 0; i < count; ++i )
3479  {
3480  ptrdiff_t index = static_cast<ptrdiff_t>( ( y & 1 ) ? ( count - i - 1 ) : i );
3481  ptrdiff_t delta = ( y & 1 ) ? -2 : 0;
3482 
3483  XMVECTOR v = XMVectorClamp( sPtr[ index ], MinXR, MaxXR );
3484  v = XMVectorMultiplyAdd( v, Scale, vError );
3485 
3486  XMVECTOR target;
3487  if ( pDiffusionErrors )
3488  {
3489  target = XMVectorRound( v );
3490  vError = XMVectorSubtract( v, target );
3491  vError = XMVectorDivide( vError, Scale );
3492 
3493  // Distribute error to next scanline and next pixel
3494  pDiffusionErrors[ index-delta ] += XMVectorMultiply( g_ErrorWeight3, vError );
3495  pDiffusionErrors[ index+1 ] += XMVectorMultiply( g_ErrorWeight5, vError );
3496  pDiffusionErrors[ index+2+delta ] += XMVectorMultiply( g_ErrorWeight1, vError );
3497  vError = XMVectorMultiply( vError, g_ErrorWeight7 );
3498  }
3499  else
3500  {
3501  // Applied ordered dither
3502  target = XMVectorAdd( v, ordered[ index & 3 ] );
3503  target = XMVectorRound( target );
3504  }
3505 
3506  target = XMVectorAdd( target, Bias );
3507  target = XMVectorClamp( target, g_XMZero, g_Scale10pc );
3508 
3509  XMFLOAT4A tmp;
3510  XMStoreFloat4A( &tmp, target );
3511 
3512  auto dPtr = &dest[ index ];
3513  dPtr->x = static_cast<uint16_t>( tmp.x ) & 0x3FF;
3514  dPtr->y = static_cast<uint16_t>( tmp.y ) & 0x3FF;
3515  dPtr->z = static_cast<uint16_t>( tmp.z ) & 0x3FF;
3516  dPtr->w = static_cast<uint16_t>( tmp.w );
3517  }
3518  return true;
3519  }
3520  return false;
3521 
3522  case DXGI_FORMAT_R8G8B8A8_UNORM:
3523  case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB:
3524  STORE_SCANLINE( XMUBYTEN4, g_Scale8pc, true, true, uint8_t, 0xFF, y, false )
3525 
3526  case DXGI_FORMAT_R8G8B8A8_UINT:
3527  STORE_SCANLINE( XMUBYTE4, g_Scale8pc, true, false, uint8_t, 0xFF, y, false )
3528 
3529  case DXGI_FORMAT_R8G8B8A8_SNORM:
3530  STORE_SCANLINE( XMBYTEN4, g_Scale7pc, false, true, int8_t, 0xFF, y, false )
3531 
3532  case DXGI_FORMAT_R8G8B8A8_SINT:
3533  STORE_SCANLINE( XMBYTE4, g_Scale7pc, false, false, int8_t, 0xFF, y, false )
3534 
3535  case DXGI_FORMAT_R16G16_UNORM:
3536  STORE_SCANLINE2( XMUSHORTN2, g_Scale16pc, true, true, uint16_t, 0xFFFF, y )
3537 
3538  case DXGI_FORMAT_R16G16_UINT:
3539  STORE_SCANLINE2( XMUSHORT2, g_Scale16pc, true, false, uint16_t, 0xFFFF, y )
3540 
3541  case DXGI_FORMAT_R16G16_SNORM:
3542  STORE_SCANLINE2( XMSHORTN2, g_Scale15pc, false, true, int16_t, 0xFFFF, y )
3543 
3544  case DXGI_FORMAT_R16G16_SINT:
3545  STORE_SCANLINE2( XMSHORT2, g_Scale15pc, false, false, int16_t, 0xFFFF, y )
3546 
3547  case DXGI_FORMAT_D24_UNORM_S8_UINT:
3548  if ( size >= sizeof(uint32_t) )
3549  {
3550  static const XMVECTORF32 Clamp = { 1.f, 255.f, 0.f, 0.f };
3551  static const XMVECTORF32 Scale = { 16777215.f, 1.f, 0.f, 0.f };
3552  static const XMVECTORF32 Scale2 = { 16777215.f, 255.f, 0.f, 0.f };
3553 
3554  uint32_t * __restrict dest = reinterpret_cast<uint32_t*>(pDestination);
3555  for( size_t i = 0; i < count; ++i )
3556  {
3557  ptrdiff_t index = static_cast<ptrdiff_t>( ( y & 1 ) ? ( count - i - 1 ) : i );
3558  ptrdiff_t delta = ( y & 1 ) ? -2 : 0;
3559 
3560  XMVECTOR v = XMVectorClamp( sPtr[ index ], g_XMZero, Clamp );
3561  v = XMVectorAdd( v, vError );
3562  v = XMVectorMultiply( v, Scale );
3563 
3564  XMVECTOR target;
3565  if ( pDiffusionErrors )
3566  {
3567  target = XMVectorRound( v );
3568  vError = XMVectorSubtract( v, target );
3569  vError = XMVectorDivide( vError, Scale );
3570 
3571  // Distribute error to next scanline and next pixel
3572  pDiffusionErrors[ index-delta ] += XMVectorMultiply( g_ErrorWeight3, vError );
3573  pDiffusionErrors[ index+1 ] += XMVectorMultiply( g_ErrorWeight5, vError );
3574  pDiffusionErrors[ index+2+delta ] += XMVectorMultiply( g_ErrorWeight1, vError );
3575  vError = XMVectorMultiply( vError, g_ErrorWeight7 );
3576  }
3577  else
3578  {
3579  // Applied ordered dither
3580  target = XMVectorAdd( v, ordered[ index & 3 ] );
3581  target = XMVectorRound( target );
3582  }
3583 
3584  target = XMVectorClamp( target, g_XMZero, Scale2 );
3585 
3586  XMFLOAT4A tmp;
3587  XMStoreFloat4A( &tmp, target );
3588 
3589  auto dPtr = &dest[ index ];
3590  *dPtr = (static_cast<uint32_t>( tmp.x ) & 0xFFFFFF)
3591  | ((static_cast<uint32_t>( tmp.y ) & 0xFF) << 24);
3592  }
3593  return true;
3594  }
3595  return false;
3596 
3597  case DXGI_FORMAT_R8G8_UNORM:
3598  STORE_SCANLINE2( XMUBYTEN2, g_Scale8pc, true, true, uint8_t, 0xFF, y )
3599 
3600  case DXGI_FORMAT_R8G8_UINT:
3601  STORE_SCANLINE2( XMUBYTE2, g_Scale8pc, true, false, uint8_t, 0xFF, y )
3602 
3603  case DXGI_FORMAT_R8G8_SNORM:
3604  STORE_SCANLINE2( XMBYTEN2, g_Scale7pc, false, true, int8_t, 0xFF, y )
3605 
3606  case DXGI_FORMAT_R8G8_SINT:
3607  STORE_SCANLINE2( XMBYTE2, g_Scale7pc, false, false, int8_t, 0xFF, y )
3608 
3609  case DXGI_FORMAT_D16_UNORM:
3610  case DXGI_FORMAT_R16_UNORM:
3611  STORE_SCANLINE1( uint16_t, g_Scale16pc, true, true, 0xFFFF, y, false )
3612 
3613  case DXGI_FORMAT_R16_UINT:
3614  STORE_SCANLINE1( uint16_t, g_Scale16pc, true, false, 0xFFFF, y, false )
3615 
3616  case DXGI_FORMAT_R16_SNORM:
3617  STORE_SCANLINE1( int16_t, g_Scale15pc, false, true, 0xFFFF, y, false )
3618 
3619  case DXGI_FORMAT_R16_SINT:
3620  STORE_SCANLINE1( int16_t, g_Scale15pc, false, false, 0xFFFF, y, false )
3621 
3622  case DXGI_FORMAT_R8_UNORM:
3623  STORE_SCANLINE1( uint8_t, g_Scale8pc, true, true, 0xFF, y, false )
3624 
3625  case DXGI_FORMAT_R8_UINT:
3626  STORE_SCANLINE1( uint8_t, g_Scale8pc, true, false, 0xFF, y, false )
3627 
3628  case DXGI_FORMAT_R8_SNORM:
3629  STORE_SCANLINE1( int8_t, g_Scale7pc, false, true, 0xFF, y, false )
3630 
3631  case DXGI_FORMAT_R8_SINT:
3632  STORE_SCANLINE1( int8_t, g_Scale7pc, false, false, 0xFF, y, false )
3633 
3634  case DXGI_FORMAT_A8_UNORM:
3635  STORE_SCANLINE1( uint8_t, g_Scale8pc, true, true, 0xFF, y, true )
3636 
3637  case DXGI_FORMAT_B5G6R5_UNORM:
3638  if ( size >= sizeof(XMU565) )
3639  {
3640  XMU565 * __restrict dest = reinterpret_cast<XMU565*>(pDestination);
3641  for( size_t i = 0; i < count; ++i )
3642  {
3643  ptrdiff_t index = static_cast<ptrdiff_t>( ( y & 1 ) ? ( count - i - 1 ) : i );
3644  ptrdiff_t delta = ( y & 1 ) ? -2 : 0;
3645 
3646  XMVECTOR v = XMVectorSwizzle<2, 1, 0, 3>( sPtr[ index ] );
3647  v = XMVectorSaturate( v );
3648  v = XMVectorAdd( v, vError );
3649  v = XMVectorMultiply( v, g_Scale565pc );
3650 
3651  XMVECTOR target;
3652  if ( pDiffusionErrors )
3653  {
3654  target = XMVectorRound( v );
3655  vError = XMVectorSubtract( v, target );
3656  vError = XMVectorDivide( vError, g_Scale565pc );
3657 
3658  // Distribute error to next scanline and next pixel
3659  pDiffusionErrors[ index-delta ] += XMVectorMultiply( g_ErrorWeight3, vError );
3660  pDiffusionErrors[ index+1 ] += XMVectorMultiply( g_ErrorWeight5, vError );
3661  pDiffusionErrors[ index+2+delta ] += XMVectorMultiply( g_ErrorWeight1, vError );
3662  vError = XMVectorMultiply( vError, g_ErrorWeight7 );
3663  }
3664  else
3665  {
3666  // Applied ordered dither
3667  target = XMVectorAdd( v, ordered[ index & 3 ] );
3668  target = XMVectorRound( target );
3669  }
3670 
3671  target = XMVectorClamp( target, g_XMZero, g_Scale565pc );
3672 
3673  XMFLOAT4A tmp;
3674  XMStoreFloat4A( &tmp, target );
3675 
3676  auto dPtr = &dest[ index ];
3677  dPtr->x = static_cast<uint16_t>( tmp.x ) & 0x1F;
3678  dPtr->y = static_cast<uint16_t>( tmp.y ) & 0x3F;
3679  dPtr->z = static_cast<uint16_t>( tmp.z ) & 0x1F;
3680  }
3681  return true;
3682  }
3683  return false;
3684 
3685  case DXGI_FORMAT_B5G5R5A1_UNORM:
3686  if ( size >= sizeof(XMU555) )
3687  {
3688  XMU555 * __restrict dest = reinterpret_cast<XMU555*>(pDestination);
3689  for( size_t i = 0; i < count; ++i )
3690  {
3691  ptrdiff_t index = static_cast<ptrdiff_t>( ( y & 1 ) ? ( count - i - 1 ) : i );
3692  ptrdiff_t delta = ( y & 1 ) ? -2 : 0;
3693 
3694  XMVECTOR v = XMVectorSwizzle<2, 1, 0, 3>( sPtr[ index ] );
3695  v = XMVectorSaturate( v );
3696  v = XMVectorAdd( v, vError );
3697  v = XMVectorMultiply( v, g_Scale5551pc );
3698 
3699  XMVECTOR target;
3700  if ( pDiffusionErrors )
3701  {
3702  target = XMVectorRound( v );
3703  vError = XMVectorSubtract( v, target );
3704  vError = XMVectorDivide( vError, g_Scale5551pc );
3705 
3706  // Distribute error to next scanline and next pixel
3707  pDiffusionErrors[ index-delta ] += XMVectorMultiply( g_ErrorWeight3, vError );
3708  pDiffusionErrors[ index+1 ] += XMVectorMultiply( g_ErrorWeight5, vError );
3709  pDiffusionErrors[ index+2+delta ] += XMVectorMultiply( g_ErrorWeight1, vError );
3710  vError = XMVectorMultiply( vError, g_ErrorWeight7 );
3711  }
3712  else
3713  {
3714  // Applied ordered dither
3715  target = XMVectorAdd( v, ordered[ index & 3 ] );
3716  target = XMVectorRound( target );
3717  }
3718 
3719  target = XMVectorClamp( target, g_XMZero, g_Scale5551pc );
3720 
3721  XMFLOAT4A tmp;
3722  XMStoreFloat4A( &tmp, target );
3723 
3724  auto dPtr = &dest[ index ];
3725  dPtr->x = static_cast<uint16_t>( tmp.x ) & 0x1F;
3726  dPtr->y = static_cast<uint16_t>( tmp.y ) & 0x1F;
3727  dPtr->z = static_cast<uint16_t>( tmp.z ) & 0x1F;
3728  dPtr->w = ( XMVectorGetW( target ) > threshold ) ? 1 : 0;
3729  }
3730  return true;
3731  }
3732  return false;
3733 
3734  case DXGI_FORMAT_B8G8R8A8_UNORM:
3735  case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB:
3736  STORE_SCANLINE( XMUBYTEN4, g_Scale8pc, true, true, uint8_t, 0xFF, y, true )
3737 
3738  case DXGI_FORMAT_B8G8R8X8_UNORM:
3739  case DXGI_FORMAT_B8G8R8X8_UNORM_SRGB:
3740  if ( size >= sizeof(XMUBYTEN4) )
3741  {
3742  XMUBYTEN4 * __restrict dest = reinterpret_cast<XMUBYTEN4*>(pDestination);
3743  for( size_t i = 0; i < count; ++i )
3744  {
3745  ptrdiff_t index = static_cast<ptrdiff_t>( ( y & 1 ) ? ( count - i - 1 ) : i );
3746  ptrdiff_t delta = ( y & 1 ) ? -2 : 0;
3747 
3748  XMVECTOR v = XMVectorSwizzle<2, 1, 0, 3>( sPtr[ index ] );
3749  v = XMVectorSaturate( v );
3750  v = XMVectorAdd( v, vError );
3751  v = XMVectorMultiply( v, g_Scale8pc );
3752 
3753  XMVECTOR target;
3754  if ( pDiffusionErrors )
3755  {
3756  target = XMVectorRound( v );
3757  vError = XMVectorSubtract( v, target );
3758  vError = XMVectorDivide( vError, g_Scale8pc );
3759 
3760  // Distribute error to next scanline and next pixel
3761  pDiffusionErrors[ index-delta ] += XMVectorMultiply( g_ErrorWeight3, vError );
3762  pDiffusionErrors[ index+1 ] += XMVectorMultiply( g_ErrorWeight5, vError );
3763  pDiffusionErrors[ index+2+delta ] += XMVectorMultiply( g_ErrorWeight1, vError );
3764  vError = XMVectorMultiply( vError, g_ErrorWeight7 );
3765  }
3766  else
3767  {
3768  // Applied ordered dither
3769  target = XMVectorAdd( v, ordered[ index & 3 ] );
3770  target = XMVectorRound( target );
3771  }
3772 
3773  target = XMVectorClamp( target, g_XMZero, g_Scale8pc );
3774 
3775  XMFLOAT4A tmp;
3776  XMStoreFloat4A( &tmp, target );
3777 
3778  auto dPtr = &dest[ index ];
3779  dPtr->x = static_cast<uint8_t>( tmp.x ) & 0xFF;
3780  dPtr->y = static_cast<uint8_t>( tmp.y ) & 0xFF;
3781  dPtr->z = static_cast<uint8_t>( tmp.z ) & 0xFF;
3782  dPtr->w = 0;
3783  }
3784  return true;
3785  }
3786  return false;
3787 
3788  case DXGI_FORMAT_B4G4R4A4_UNORM:
3789  STORE_SCANLINE( XMUNIBBLE4, g_Scale4pc, true, true, uint8_t, 0xF, y, true )
3790 
3791  default:
3792  return _StoreScanline( pDestination, size, format, pSource, count, threshold );
3793  }
3794 }
3795 
3796 #pragma warning(pop)
3797 
3798 #undef STORE_SCANLINE
3799 #undef STORE_SCANLINE2
3800 #undef STORE_SCANLINE1
3801 
3802 
3803 //-------------------------------------------------------------------------------------
3804 // Selection logic for using WIC vs. our own routines
3805 //-------------------------------------------------------------------------------------
3806 static inline bool _UseWICConversion( _In_ DWORD filter, _In_ DXGI_FORMAT sformat, _In_ DXGI_FORMAT tformat,
3807  _Out_ WICPixelFormatGUID& pfGUID, _Out_ WICPixelFormatGUID& targetGUID )
3808 {
3809  memcpy( &pfGUID, &GUID_NULL, sizeof(GUID) );
3810  memcpy( &targetGUID, &GUID_NULL, sizeof(GUID) );
3811 
3812  if ( filter & TEX_FILTER_FORCE_NON_WIC )
3813  {
3814  // Explicit flag indicates use of non-WIC code paths
3815  return false;
3816  }
3817 
3818  if ( !_DXGIToWIC( sformat, pfGUID ) || !_DXGIToWIC( tformat, targetGUID ) )
3819  {
3820  // Source or target format are not WIC supported native pixel formats
3821  return false;
3822  }
3823 
3824  if ( filter & TEX_FILTER_FORCE_WIC )
3825  {
3826  // Explicit flag to use WIC code paths, skips all the case checks below
3827  return true;
3828  }
3829 
3830  if ( filter & TEX_FILTER_SEPARATE_ALPHA )
3831  {
3832  // Alpha is not premultiplied, so use non-WIC code paths
3833  return false;
3834  }
3835 
3836  // Check for special cases
3837  switch ( sformat )
3838  {
3839  case DXGI_FORMAT_R32G32B32A32_FLOAT:
3840  case DXGI_FORMAT_R32G32B32_FLOAT:
3841  case DXGI_FORMAT_R16G16B16A16_FLOAT:
3842  switch( tformat )
3843  {
3844  case DXGI_FORMAT_R16_FLOAT:
3845  case DXGI_FORMAT_R32_FLOAT:
3846  case DXGI_FORMAT_D32_FLOAT:
3847  // WIC converts via UNORM formats and ends up converting colorspaces for these cases
3848  case DXGI_FORMAT_A8_UNORM:
3849  // Conversion logic for these kinds of textures is unintuitive for WIC code paths
3850  return false;
3851  }
3852  break;
3853 
3854  case DXGI_FORMAT_R16_FLOAT:
3855  switch( tformat )
3856  {
3857  case DXGI_FORMAT_R32_FLOAT:
3858  case DXGI_FORMAT_D32_FLOAT:
3859  // WIC converts via UNORM formats and ends up converting colorspaces for these cases
3860  case DXGI_FORMAT_A8_UNORM:
3861  // Conversion logic for these kinds of textures is unintuitive for WIC code paths
3862  return false;
3863  }
3864  break;
3865 
3866  case DXGI_FORMAT_A8_UNORM:
3867  // Conversion logic for these kinds of textures is unintuitive for WIC code paths
3868  return false;
3869 
3870  default:
3871  switch( tformat )
3872  {
3873  case DXGI_FORMAT_A8_UNORM:
3874  // Conversion logic for these kinds of textures is unintuitive for WIC code paths
3875  return false;
3876  }
3877  }
3878 
3879  // Check for implicit color space changes
3880  if ( IsSRGB( sformat ) )
3881  filter |= TEX_FILTER_SRGB_IN;
3882 
3883  if ( IsSRGB( tformat ) )
3884  filter |= TEX_FILTER_SRGB_OUT;
3885 
3887  {
3889  }
3890 
3891  DWORD wicsrgb = _CheckWICColorSpace( pfGUID, targetGUID );
3892 
3893  if ( wicsrgb != (filter & (TEX_FILTER_SRGB_IN|TEX_FILTER_SRGB_OUT)) )
3894  {
3895  // WIC will perform a colorspace conversion we didn't request
3896  return false;
3897  }
3898 
3899  return true;
3900 }
3901 
3902 
3903 //-------------------------------------------------------------------------------------
3904 // Convert the source image using WIC
3905 //-------------------------------------------------------------------------------------
3906 static HRESULT _ConvertUsingWIC( _In_ const Image& srcImage, _In_ const WICPixelFormatGUID& pfGUID,
3907  _In_ const WICPixelFormatGUID& targetGUID,
3908  _In_ DWORD filter, _In_ float threshold, _In_ const Image& destImage )
3909 {
3910  assert( srcImage.width == destImage.width );
3911  assert( srcImage.height == destImage.height );
3912 
3913  IWICImagingFactory* pWIC = _GetWIC();
3914  if ( !pWIC )
3915  return E_NOINTERFACE;
3916 
3917  ComPtr<IWICFormatConverter> FC;
3918  HRESULT hr = pWIC->CreateFormatConverter( FC.GetAddressOf() );
3919  if ( FAILED(hr) )
3920  return hr;
3921 
3922  // Note that WIC conversion ignores the TEX_FILTER_SRGB_IN and TEX_FILTER_SRGB_OUT flags,
3923  // but also always assumes UNORM <-> FLOAT conversions are changing color spaces sRGB <-> scRGB
3924 
3925  BOOL canConvert = FALSE;
3926  hr = FC->CanConvert( pfGUID, targetGUID, &canConvert );
3927  if ( FAILED(hr) || !canConvert )
3928  {
3929  // This case is not an issue for the subset of WIC formats that map directly to DXGI
3930  return E_UNEXPECTED;
3931  }
3932 
3933  ComPtr<IWICBitmap> source;
3934  hr = pWIC->CreateBitmapFromMemory( static_cast<UINT>( srcImage.width ), static_cast<UINT>( srcImage.height ), pfGUID,
3935  static_cast<UINT>( srcImage.rowPitch ), static_cast<UINT>( srcImage.slicePitch ),
3936  srcImage.pixels, source.GetAddressOf() );
3937  if ( FAILED(hr) )
3938  return hr;
3939 
3940  hr = FC->Initialize( source.Get(), targetGUID, _GetWICDither( filter ), 0, threshold * 100.f, WICBitmapPaletteTypeCustom );
3941  if ( FAILED(hr) )
3942  return hr;
3943 
3944  hr = FC->CopyPixels( 0, static_cast<UINT>( destImage.rowPitch ), static_cast<UINT>( destImage.slicePitch ), destImage.pixels );
3945  if ( FAILED(hr) )
3946  return hr;
3947 
3948  return S_OK;
3949 }
3950 
3951 
3952 //-------------------------------------------------------------------------------------
3953 // Convert the source image (not using WIC)
3954 //-------------------------------------------------------------------------------------
3955 static HRESULT _Convert( _In_ const Image& srcImage, _In_ DWORD filter, _In_ const Image& destImage, _In_ float threshold, _In_ size_t z )
3956 {
3957  assert( srcImage.width == destImage.width );
3958  assert( srcImage.height == destImage.height );
3959 
3960  const uint8_t *pSrc = srcImage.pixels;
3961  uint8_t *pDest = destImage.pixels;
3962  if ( !pSrc || !pDest )
3963  return E_POINTER;
3964 
3965  size_t width = srcImage.width;
3966 
3967  if ( filter & TEX_FILTER_DITHER_DIFFUSION )
3968  {
3969  // Error diffusion dithering (aka Floyd-Steinberg dithering)
3970  ScopedAlignedArrayXMVECTOR scanline( reinterpret_cast<XMVECTOR*>( _aligned_malloc( (sizeof(XMVECTOR)*(width*2 + 2)), 16 ) ) );
3971  if ( !scanline )
3972  return E_OUTOFMEMORY;
3973 
3974  XMVECTOR* pDiffusionErrors = scanline.get() + width;
3975  memset( pDiffusionErrors, 0, sizeof(XMVECTOR)*(width+2) );
3976 
3977  for( size_t h = 0; h < srcImage.height; ++h )
3978  {
3979  if ( !_LoadScanline( scanline.get(), width, pSrc, srcImage.rowPitch, srcImage.format ) )
3980  return E_FAIL;
3981 
3982  _ConvertScanline( scanline.get(), width, destImage.format, srcImage.format, filter );
3983 
3984  if ( !_StoreScanlineDither( pDest, destImage.rowPitch, destImage.format, scanline.get(), width, threshold, h, z, pDiffusionErrors ) )
3985  return E_FAIL;
3986 
3987  pSrc += srcImage.rowPitch;
3988  pDest += destImage.rowPitch;
3989  }
3990  }
3991  else
3992  {
3993  ScopedAlignedArrayXMVECTOR scanline( reinterpret_cast<XMVECTOR*>( _aligned_malloc( (sizeof(XMVECTOR)*width), 16 ) ) );
3994  if ( !scanline )
3995  return E_OUTOFMEMORY;
3996 
3997  if ( filter & TEX_FILTER_DITHER )
3998  {
3999  // Ordered dithering
4000  for( size_t h = 0; h < srcImage.height; ++h )
4001  {
4002  if ( !_LoadScanline( scanline.get(), width, pSrc, srcImage.rowPitch, srcImage.format ) )
4003  return E_FAIL;
4004 
4005  _ConvertScanline( scanline.get(), width, destImage.format, srcImage.format, filter );
4006 
4007  if ( !_StoreScanlineDither( pDest, destImage.rowPitch, destImage.format, scanline.get(), width, threshold, h, z, nullptr ) )
4008  return E_FAIL;
4009 
4010  pSrc += srcImage.rowPitch;
4011  pDest += destImage.rowPitch;
4012  }
4013  }
4014  else
4015  {
4016  // No dithering
4017  for( size_t h = 0; h < srcImage.height; ++h )
4018  {
4019  if ( !_LoadScanline( scanline.get(), width, pSrc, srcImage.rowPitch, srcImage.format ) )
4020  return E_FAIL;
4021 
4022  _ConvertScanline( scanline.get(), width, destImage.format, srcImage.format, filter );
4023 
4024  if ( !_StoreScanline( pDest, destImage.rowPitch, destImage.format, scanline.get(), width, threshold ) )
4025  return E_FAIL;
4026 
4027  pSrc += srcImage.rowPitch;
4028  pDest += destImage.rowPitch;
4029  }
4030  }
4031  }
4032 
4033  return S_OK;
4034 }
4035 
4036 
4037 //-------------------------------------------------------------------------------------
4038 static DXGI_FORMAT _PlanarToSingle( _In_ DXGI_FORMAT format )
4039 {
4040  switch (format)
4041  {
4042  case DXGI_FORMAT_NV12:
4043  case DXGI_FORMAT_NV11:
4044  return DXGI_FORMAT_YUY2;
4045 
4046  case DXGI_FORMAT_P010:
4047  return DXGI_FORMAT_Y210;
4048 
4049  case DXGI_FORMAT_P016:
4050  return DXGI_FORMAT_Y216;
4051 
4052  // We currently do not support conversion for Xbox One specific depth formats
4053 
4054  // We can't do anything with DXGI_FORMAT_420_OPAQUE because it's an opaque blob of bits
4055 
4056  default:
4057  return DXGI_FORMAT_UNKNOWN;
4058  }
4059 }
4060 
4061 
4062 //-------------------------------------------------------------------------------------
4063 // Convert the image from a planar to non-planar image
4064 //-------------------------------------------------------------------------------------
4065 #define CONVERT_420_TO_422( srcType, destType )\
4066  {\
4067  size_t rowPitch = srcImage.rowPitch;\
4068  \
4069  auto sourceE = reinterpret_cast<const srcType*>( pSrc + srcImage.slicePitch );\
4070  auto pSrcUV = pSrc + ( srcImage.height * rowPitch );\
4071  \
4072  for( size_t y = 0; y < srcImage.height; y+= 2 )\
4073  {\
4074  auto sPtrY0 = reinterpret_cast<const srcType*>( pSrc );\
4075  auto sPtrY2 = reinterpret_cast<const srcType*>( pSrc + rowPitch );\
4076  auto sPtrUV = reinterpret_cast<const srcType*>( pSrcUV );\
4077  \
4078  destType * __restrict dPtr0 = reinterpret_cast<destType*>(pDest);\
4079  destType * __restrict dPtr1 = reinterpret_cast<destType*>(pDest + destImage.rowPitch);\
4080  \
4081  for( size_t x = 0; x < srcImage.width; x+= 2 )\
4082  {\
4083  if ( (sPtrUV+1) >= sourceE ) break;\
4084  \
4085  srcType u = *(sPtrUV++);\
4086  srcType v = *(sPtrUV++);\
4087  \
4088  dPtr0->x = *(sPtrY0++);\
4089  dPtr0->y = u;\
4090  dPtr0->z = *(sPtrY0++);\
4091  dPtr0->w = v;\
4092  ++dPtr0;\
4093  \
4094  dPtr1->x = *(sPtrY2++);\
4095  dPtr1->y = u;\
4096  dPtr1->z = *(sPtrY2++);\
4097  dPtr1->w = v;\
4098  ++dPtr1;\
4099  }\
4100  \
4101  pSrc += rowPitch * 2;\
4102  pSrcUV += rowPitch;\
4103  \
4104  pDest += destImage.rowPitch * 2;\
4105  }\
4106  }
4107 
4108 static HRESULT _ConvertToSinglePlane( _In_ const Image& srcImage, _In_ const Image& destImage )
4109 {
4110  assert( srcImage.width == destImage.width );
4111  assert( srcImage.height == destImage.height );
4112 
4113  const uint8_t *pSrc = srcImage.pixels;
4114  uint8_t *pDest = destImage.pixels;
4115  if ( !pSrc || !pDest )
4116  return E_POINTER;
4117 
4118  switch ( srcImage.format )
4119  {
4120  case DXGI_FORMAT_NV12:
4121  assert( destImage.format == DXGI_FORMAT_YUY2 );
4122  CONVERT_420_TO_422( uint8_t, XMUBYTEN4 );
4123  return S_OK;
4124 
4125  case DXGI_FORMAT_P010:
4126  assert( destImage.format == DXGI_FORMAT_Y210 );
4127  CONVERT_420_TO_422( uint16_t, XMUSHORTN4 );
4128  return S_OK;
4129 
4130  case DXGI_FORMAT_P016:
4131  assert( destImage.format == DXGI_FORMAT_Y216 );
4132  CONVERT_420_TO_422( uint16_t, XMUSHORTN4 );
4133  return S_OK;
4134 
4135  case DXGI_FORMAT_NV11:
4136  assert( destImage.format == DXGI_FORMAT_YUY2 );
4137  // Convert 4:1:1 to 4:2:2
4138  {
4139  size_t rowPitch = srcImage.rowPitch;
4140 
4141  const uint8_t* sourceE = pSrc + srcImage.slicePitch;
4142  const uint8_t* pSrcUV = pSrc + ( srcImage.height * rowPitch );
4143 
4144  for( size_t y = 0; y < srcImage.height; ++y )
4145  {
4146  const uint8_t* sPtrY = pSrc;
4147  const uint8_t* sPtrUV = pSrcUV;
4148 
4149  XMUBYTEN4 * __restrict dPtr = reinterpret_cast<XMUBYTEN4*>(pDest);
4150 
4151  for( size_t x = 0; x < srcImage.width; x+= 4 )
4152  {
4153  if ( (sPtrUV+1) >= sourceE ) break;
4154 
4155  uint8_t u = *(sPtrUV++);
4156  uint8_t v = *(sPtrUV++);
4157 
4158  dPtr->x = *(sPtrY++);
4159  dPtr->y = u;
4160  dPtr->z = *(sPtrY++);
4161  dPtr->w = v;
4162  ++dPtr;
4163 
4164  dPtr->x = *(sPtrY++);
4165  dPtr->y = u;
4166  dPtr->z = *(sPtrY++);
4167  dPtr->w = v;
4168  ++dPtr;
4169  }
4170 
4171  pSrc += rowPitch;
4172  pSrcUV += (rowPitch >> 1);
4173 
4174  pDest += destImage.rowPitch;
4175  }
4176  }
4177  return S_OK;
4178 
4179  default:
4180  return E_UNEXPECTED;
4181  }
4182 }
4183 
4184 #undef CONVERT_420_TO_422
4185 
4186 
4187 //=====================================================================================
4188 // Entry-points
4189 //=====================================================================================
4190 
4191 //-------------------------------------------------------------------------------------
4192 // Convert image
4193 //-------------------------------------------------------------------------------------
4194 _Use_decl_annotations_
4195 HRESULT Convert( const Image& srcImage, DXGI_FORMAT format, DWORD filter, float threshold, ScratchImage& image )
4196 {
4197  if ( (srcImage.format == format) || !IsValid( format ) )
4198  return E_INVALIDARG;
4199 
4200  if ( !srcImage.pixels )
4201  return E_POINTER;
4202 
4203  if ( IsCompressed(srcImage.format) || IsCompressed(format)
4204  || IsPlanar(srcImage.format) || IsPlanar(format)
4205  || IsPalettized(srcImage.format) || IsPalettized(format)
4206  || IsTypeless(srcImage.format) || IsTypeless(format) )
4207  return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED );
4208 
4209 #ifdef _M_X64
4210  if ( (srcImage.width > 0xFFFFFFFF) || (srcImage.height > 0xFFFFFFFF) )
4211  return E_INVALIDARG;
4212 #endif
4213 
4214  HRESULT hr = image.Initialize2D( format, srcImage.width, srcImage.height, 1, 1 );
4215  if ( FAILED(hr) )
4216  return hr;
4217 
4218  const Image *rimage = image.GetImage( 0, 0, 0 );
4219  if ( !rimage )
4220  {
4221  image.Release();
4222  return E_POINTER;
4223  }
4224 
4225  WICPixelFormatGUID pfGUID, targetGUID;
4226  if ( _UseWICConversion( filter, srcImage.format, format, pfGUID, targetGUID ) )
4227  {
4228  hr = _ConvertUsingWIC( srcImage, pfGUID, targetGUID, filter, threshold, *rimage );
4229  }
4230  else
4231  {
4232  hr = _Convert( srcImage, filter, *rimage, threshold, 0 );
4233  }
4234 
4235  if ( FAILED(hr) )
4236  {
4237  image.Release();
4238  return hr;
4239  }
4240 
4241  return S_OK;
4242 }
4243 
4244 
4245 //-------------------------------------------------------------------------------------
4246 // Convert image (complex)
4247 //-------------------------------------------------------------------------------------
4248 _Use_decl_annotations_
4249 HRESULT Convert( const Image* srcImages, size_t nimages, const TexMetadata& metadata,
4250  DXGI_FORMAT format, DWORD filter, float threshold, ScratchImage& result )
4251 {
4252  if ( !srcImages || !nimages || (metadata.format == format) || !IsValid(format) )
4253  return E_INVALIDARG;
4254 
4255  if ( IsCompressed(metadata.format) || IsCompressed(format)
4256  || IsPlanar(metadata.format) || IsPlanar(format)
4257  || IsPalettized(metadata.format) || IsPalettized(format)
4258  || IsTypeless(metadata.format) || IsTypeless(format) )
4259  return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED );
4260 
4261 #ifdef _M_X64
4262  if ( (metadata.width > 0xFFFFFFFF) || (metadata.height > 0xFFFFFFFF) )
4263  return E_INVALIDARG;
4264 #endif
4265 
4266  TexMetadata mdata2 = metadata;
4267  mdata2.format = format;
4268  HRESULT hr = result.Initialize( mdata2 );
4269  if ( FAILED(hr) )
4270  return hr;
4271 
4272  if ( nimages != result.GetImageCount() )
4273  {
4274  result.Release();
4275  return E_FAIL;
4276  }
4277 
4278  const Image* dest = result.GetImages();
4279  if ( !dest )
4280  {
4281  result.Release();
4282  return E_POINTER;
4283  }
4284 
4285  WICPixelFormatGUID pfGUID, targetGUID;
4286  bool usewic = _UseWICConversion( filter, metadata.format, format, pfGUID, targetGUID );
4287 
4288  switch (metadata.dimension)
4289  {
4292  for( size_t index=0; index < nimages; ++index )
4293  {
4294  const Image& src = srcImages[ index ];
4295  if ( src.format != metadata.format )
4296  {
4297  result.Release();
4298  return E_FAIL;
4299  }
4300 
4301 #ifdef _M_X64
4302  if ( (src.width > 0xFFFFFFFF) || (src.height > 0xFFFFFFFF) )
4303  return E_FAIL;
4304 #endif
4305 
4306  const Image& dst = dest[ index ];
4307  assert( dst.format == format );
4308 
4309  if ( src.width != dst.width || src.height != dst.height )
4310  {
4311  result.Release();
4312  return E_FAIL;
4313  }
4314 
4315  if ( usewic )
4316  {
4317  hr = _ConvertUsingWIC( src, pfGUID, targetGUID, filter, threshold, dst );
4318  }
4319  else
4320  {
4321  hr = _Convert( src, filter, dst, threshold, 0 );
4322  }
4323 
4324  if ( FAILED(hr) )
4325  {
4326  result.Release();
4327  return hr;
4328  }
4329  }
4330  break;
4331 
4333  {
4334  size_t index = 0;
4335  size_t d = metadata.depth;
4336  for( size_t level = 0; level < metadata.mipLevels; ++level )
4337  {
4338  for( size_t slice = 0; slice < d; ++slice, ++index )
4339  {
4340  if ( index >= nimages )
4341  return E_FAIL;
4342 
4343  const Image& src = srcImages[ index ];
4344  if ( src.format != metadata.format )
4345  {
4346  result.Release();
4347  return E_FAIL;
4348  }
4349 
4350 #ifdef _M_X64
4351  if ( (src.width > 0xFFFFFFFF) || (src.height > 0xFFFFFFFF) )
4352  return E_FAIL;
4353 #endif
4354 
4355  const Image& dst = dest[ index ];
4356  assert( dst.format == format );
4357 
4358  if ( src.width != dst.width || src.height != dst.height )
4359  {
4360  result.Release();
4361  return E_FAIL;
4362  }
4363 
4364  if ( usewic )
4365  {
4366  hr = _ConvertUsingWIC( src, pfGUID, targetGUID, filter, threshold, dst );
4367  }
4368  else
4369  {
4370  hr = _Convert( src, filter, dst, threshold, slice );
4371  }
4372 
4373  if ( FAILED(hr) )
4374  {
4375  result.Release();
4376  return hr;
4377  }
4378  }
4379 
4380  if ( d > 1 )
4381  d >>= 1;
4382  }
4383  }
4384  break;
4385 
4386  default:
4387  return E_FAIL;
4388  }
4389 
4390  return S_OK;
4391 }
4392 
4393 
4394 //-------------------------------------------------------------------------------------
4395 // Convert image from planar to single plane (image)
4396 //-------------------------------------------------------------------------------------
4397 _Use_decl_annotations_
4398 HRESULT ConvertToSinglePlane( const Image& srcImage, ScratchImage& image )
4399 {
4400  if ( !IsPlanar(srcImage.format) )
4401  return E_INVALIDARG;
4402 
4403  if ( !srcImage.pixels )
4404  return E_POINTER;
4405 
4406  DXGI_FORMAT format = _PlanarToSingle( srcImage.format );
4407  if ( format == DXGI_FORMAT_UNKNOWN )
4408  return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED );
4409 
4410 #ifdef _M_X64
4411  if ( (srcImage.width > 0xFFFFFFFF) || (srcImage.height > 0xFFFFFFFF) )
4412  return E_INVALIDARG;
4413 #endif
4414 
4415  HRESULT hr = image.Initialize2D( format, srcImage.width, srcImage.height, 1, 1 );
4416  if ( FAILED(hr) )
4417  return hr;
4418 
4419  const Image *rimage = image.GetImage( 0, 0, 0 );
4420  if ( !rimage )
4421  {
4422  image.Release();
4423  return E_POINTER;
4424  }
4425 
4426  hr = _ConvertToSinglePlane( srcImage, *rimage );
4427  if ( FAILED(hr) )
4428  {
4429  image.Release();
4430  return hr;
4431  }
4432 
4433  return S_OK;
4434 }
4435 
4436 
4437 //-------------------------------------------------------------------------------------
4438 // Convert image from planar to single plane (complex)
4439 //-------------------------------------------------------------------------------------
4440 _Use_decl_annotations_
4441 HRESULT ConvertToSinglePlane( const Image* srcImages, size_t nimages, const TexMetadata& metadata,
4442  ScratchImage& result )
4443 {
4444  if ( !srcImages || !nimages )
4445  return E_INVALIDARG;
4446 
4447  if ( metadata.IsVolumemap() )
4448  {
4449  // Direct3D does not support any planar formats for Texture3D
4450  return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED );
4451  }
4452 
4453  DXGI_FORMAT format = _PlanarToSingle( metadata.format );
4454  if ( format == DXGI_FORMAT_UNKNOWN )
4455  return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED );
4456 
4457 #ifdef _M_X64
4458  if ( (metadata.width > 0xFFFFFFFF) || (metadata.height > 0xFFFFFFFF) )
4459  return E_INVALIDARG;
4460 #endif
4461 
4462  TexMetadata mdata2 = metadata;
4463  mdata2.format = format;
4464  HRESULT hr = result.Initialize( mdata2 );
4465  if ( FAILED(hr) )
4466  return hr;
4467 
4468  if ( nimages != result.GetImageCount() )
4469  {
4470  result.Release();
4471  return E_FAIL;
4472  }
4473 
4474  const Image* dest = result.GetImages();
4475  if ( !dest )
4476  {
4477  result.Release();
4478  return E_POINTER;
4479  }
4480 
4481  for( size_t index=0; index < nimages; ++index )
4482  {
4483  const Image& src = srcImages[ index ];
4484  if ( src.format != metadata.format )
4485  {
4486  result.Release();
4487  return E_FAIL;
4488  }
4489 
4490 #ifdef _M_X64
4491  if ( (src.width > 0xFFFFFFFF) || (src.height > 0xFFFFFFFF) )
4492  return E_FAIL;
4493 #endif
4494 
4495  const Image& dst = dest[ index ];
4496  assert( dst.format == format );
4497 
4498  if ( src.width != dst.width || src.height != dst.height )
4499  {
4500  result.Release();
4501  return E_FAIL;
4502  }
4503 
4504  hr = _ConvertToSinglePlane( src, dst );
4505  if ( FAILED(hr) )
4506  {
4507  result.Release();
4508  return hr;
4509  }
4510  }
4511 
4512  return S_OK;
4513 }
4514 
4515 }; // namespace
std::unique_ptr< DirectX::XMVECTOR, aligned_deleter > ScopedAlignedArrayXMVECTOR
Definition: scoped.h:27
_Use_decl_annotations_ bool _ExpandScanline(LPVOID pDestination, size_t outSize, DXGI_FORMAT outFormat, LPCVOID pSource, size_t inSize, DXGI_FORMAT inFormat, DWORD flags)
static const XMVECTORF32 g_Scale7pc
const Image * GetImage(_In_ size_t mip, _In_ size_t item, _In_ size_t slice) const
uint8_t * pixels
Definition: DirectXTex.h:230
bool IsVolumemap() const
Definition: DirectXTex.h:138
function b
bool IsPlanar(_In_ DXGI_FORMAT fmt)
size_t outSize
function a
static XMVECTOR XMColorRGBToSRGB(FXMVECTOR rgb)
static HRESULT _Convert(_In_ const Image &srcImage, _In_ DWORD filter, _In_ const Image &destImage, _In_ float threshold, _In_ size_t z)
static const XMVECTORF32 g_Scale16pc
DXGI_FORMAT format
Definition: DirectXTex.h:125
_In_ size_t _In_ DXGI_FORMAT _In_ size_t _In_ DXGI_FORMAT _In_ DWORD flags
Definition: DirectXTexP.h:170
#define STORE_SCANLINE(type, func)
void _CopyScanline(_When_(pDestination==pSource, _Inout_updates_bytes_(outSize)) _When_(pDestination!=pSource, _Out_writes_bytes_(outSize)) LPVOID pDestination, _In_ size_t outSize, _In_reads_bytes_(inSize) LPCVOID pSource, _In_ size_t inSize, _In_ DXGI_FORMAT format, _In_ DWORD flags)
static const XMVECTORF32 g_ErrorWeight7
_Use_decl_annotations_ HRESULT _ConvertFromR32G32B32A32(const Image &srcImage, const Image &destImage)
#define LOAD_SCANLINE3(type, func, defvec)
_Use_decl_annotations_ HRESULT _ConvertToR32G32B32A32(const Image &srcImage, ScratchImage &image)
size_t _In_ DXGI_FORMAT size_t _In_ TEXP_LEGACY_FORMAT _In_ DWORD flags assert(pDestination &&outSize > 0)
_In_ size_t _In_ DXGI_FORMAT _In_ size_t _In_ float threshold
Definition: DirectXTexP.h:183
_In_ size_t _In_ DXGI_FORMAT _In_ size_t _In_ float size_t y
Definition: DirectXTexP.h:191
HRESULT Initialize(_In_ const TexMetadata &mdata, _In_ DWORD flags=CP_FLAGS_NONE)
HRESULT Initialize2D(_In_ DXGI_FORMAT fmt, _In_ size_t width, _In_ size_t height, _In_ size_t arraySize, _In_ size_t mipLevels, _In_ DWORD flags=CP_FLAGS_NONE)
_Use_decl_annotations_ bool _LoadScanline(XMVECTOR *pDestination, size_t count, LPCVOID pSource, size_t size, DXGI_FORMAT format)
static XMVECTOR XMColorSRGBToRGB(FXMVECTOR srgb)
static const XMVECTORF32 g_ErrorWeight3
IWICImagingFactory * _GetWIC()
static const XMVECTORF32 g_Scale8pc
static const XMVECTORF32 g_Scale565pc
HRESULT ConvertToSinglePlane(_In_ const Image &srcImage, _Out_ ScratchImage &image)
bool IsCompressed(_In_ DXGI_FORMAT fmt)
size_t rowPitch
Definition: DirectXTex.h:228
size_t GetImageCount() const
Definition: DirectXTex.h:264
#define STORE_SCANLINE1(type, scalev, clampzero, norm, mask, row, selectw)
_In_ size_t _In_ const TexMetadata & metadata
Definition: DirectXTexP.h:116
_Use_decl_annotations_ void _ConvertScanline(XMVECTOR *pBuffer, size_t count, DXGI_FORMAT outFormat, DXGI_FORMAT inFormat, DWORD flags)
_In_ size_t count
Definition: DirectXTexP.h:174
_Use_decl_annotations_ void _SwizzleScanline(LPVOID pDestination, size_t outSize, LPCVOID pSource, size_t inSize, DXGI_FORMAT format, DWORD flags)
static int __cdecl _ConvertCompare(void *context, const void *ptr1, const void *ptr2)
static HRESULT _ConvertToSinglePlane(_In_ const Image &srcImage, _In_ const Image &destImage)
_Use_decl_annotations_ DWORD _GetConvertFlags(DXGI_FORMAT format)
bool IsValid(_In_ DXGI_FORMAT fmt)
size_t _In_ DXGI_FORMAT outFormat
size_t _In_ DXGI_FORMAT size_t _In_ TEXP_LEGACY_FORMAT inFormat
static HRESULT _ConvertUsingWIC(_In_ const Image &srcImage, _In_ const WICPixelFormatGUID &pfGUID, _In_ const WICPixelFormatGUID &targetGUID, _In_ DWORD filter, _In_ float threshold, _In_ const Image &destImage)
size_t _In_ DXGI_FORMAT size_t inSize
bool IsPalettized(_In_ DXGI_FORMAT fmt)
function s(a)
static const XMVECTORF32 g_Grayscale
static bool _UseWICConversion(_In_ DWORD filter, _In_ DXGI_FORMAT sformat, _In_ DXGI_FORMAT tformat, _Out_ WICPixelFormatGUID &pfGUID, _Out_ WICPixelFormatGUID &targetGUID)
static DXGI_FORMAT _PlanarToSingle(_In_ DXGI_FORMAT format)
size_t _In_ DXGI_FORMAT _In_reads_bytes_(inSize) LPCVOID pSource
HRESULT Convert(_In_ const Image &srcImage, _In_ DXGI_FORMAT format, _In_ DWORD filter, _In_ float threshold, _Out_ ScratchImage &image)
static const XMVECTORF32 g_Scale15pc
WICBitmapDitherType _GetWICDither(_In_ DWORD flags)
Definition: DirectXTexP.h:64
static const XMVECTORF32 g_Scale5551pc
DWORD _CheckWICColorSpace(_In_ const GUID &sourceGUID, _In_ const GUID &targetGUID)
static const XMVECTORF32 g_ErrorWeight5
_Use_decl_annotations_ bool _LoadScanlineLinear(XMVECTOR *pDestination, size_t count, LPCVOID pSource, size_t size, DXGI_FORMAT format, DWORD flags)
_Use_decl_annotations_ bool _StoreScanlineLinear(LPVOID pDestination, size_t size, DXGI_FORMAT format, XMVECTOR *pSource, size_t count, DWORD flags)
bool IsTypeless(_In_ DXGI_FORMAT fmt, _In_ bool partialTypeless=true)
TEX_DIMENSION dimension
Definition: DirectXTex.h:126
static const XMVECTORF32 g_Scale10pc
#define LOAD_SCANLINE(type, func)
static const ConvertData g_ConvertTable[]
#define CONVERT_420_TO_422(srcType, destType)
char * dest
Definition: lz4.h:61
bool IsSRGB(_In_ DXGI_FORMAT fmt)
DXGI_FORMAT format
Definition: DirectXTex.h:227
_In_ size_t _In_ size_t _In_ DXGI_FORMAT format
Definition: DirectXTexP.h:175
static const float g_Dither[]
const Image * GetImages() const
Definition: DirectXTex.h:263
bool _DXGIToWIC(_In_ DXGI_FORMAT format, _Out_ GUID &guid, _In_ bool ignoreRGBvsBGR=false)
_In_ size_t _In_ size_t size
Definition: DirectXTexP.h:175
static const XMVECTORF32 g_ErrorWeight1
static const XMVECTORF32 g_Scale4pc
#define STORE_SCANLINE2(type, scalev, clampzero, norm, itype, mask, row)
_Use_decl_annotations_ bool _StoreScanline(LPVOID pDestination, size_t size, DXGI_FORMAT format, const XMVECTOR *pSource, size_t count, float threshold)
_Use_decl_annotations_ bool _StoreScanlineDither(LPVOID pDestination, size_t size, DXGI_FORMAT format, XMVECTOR *pSource, size_t count, float threshold, size_t y, size_t z, XMVECTOR *pDiffusionErrors)
_In_ size_t _In_ DXGI_FORMAT _In_ size_t _In_ float size_t size_t z
Definition: DirectXTexP.h:191
#define LOAD_SCANLINE2(type, func, defvec)