18 using namespace DirectX::PackedVector;
19 using Microsoft::WRL::ComPtr;
23 #if DIRECTX_MATH_VERSION < 306
24 inline float round_to_nearest(
float x )
35 modff( i / 2.f, &int_part );
36 if ( (2.f*int_part) == i )
45 inline uint32_t FloatTo7e3(
float Value)
47 uint32_t IValue =
reinterpret_cast<uint32_t *
>(&Value)[0];
49 if ( IValue & 0x80000000U )
54 else if (IValue > 0x41FF73FFU)
61 if (IValue < 0x3E800000U)
65 uint32_t
Shift = 125
U - (IValue >> 23
U);
66 IValue = (0x800000
U | (IValue & 0x7FFFFF
U)) >> Shift;
71 IValue += 0xC2000000
U;
74 return ((IValue + 0x7FFFU + ((IValue >> 16U) & 1
U)) >> 16
U)&0x3FFU;
78 inline float FloatFrom7e3( uint32_t Value )
80 uint32_t Mantissa = (uint32_t)(Value & 0x7F);
82 uint32_t Exponent = (Value & 0x380);
85 Exponent = (uint32_t)((Value >> 7) & 0x7);
87 else if (Mantissa != 0)
96 }
while ((Mantissa & 0x80) == 0);
102 Exponent = (uint32_t)-124;
105 uint32_t Result = ((Exponent + 124) << 23) |
108 return reinterpret_cast<float*
>(&Result)[0];
111 inline uint32_t FloatTo6e4(
float Value)
113 uint32_t IValue =
reinterpret_cast<uint32_t *
>(&Value)[0];
115 if ( IValue & 0x80000000U )
120 else if (IValue > 0x43FEFFFFU)
127 if (IValue < 0x3C800000U)
131 uint32_t
Shift = 121
U - (IValue >> 23
U);
132 IValue = (0x800000
U | (IValue & 0x7FFFFF
U)) >> Shift;
137 IValue += 0xC4000000
U;
140 return ((IValue + 0xFFFFU + ((IValue >> 17U) & 1
U)) >> 17
U)&0x3FFU;
144 inline float FloatFrom6e4( uint32_t Value )
146 uint32_t Mantissa = (uint32_t)(Value & 0x3F);
148 uint32_t Exponent = (Value & 0x3C0);
151 Exponent = (uint32_t)((Value >> 6) & 0xF);
153 else if (Mantissa != 0)
162 }
while ((Mantissa & 0x40) == 0);
168 Exponent = (uint32_t)-120;
171 uint32_t Result = ((Exponent + 120) << 23) |
174 return reinterpret_cast<float*
>(&Result)[0];
180 static const XMVECTORF32
g_Grayscale = { 0.2125f, 0.7154f, 0.0721f, 0.0f };
187 _When_(pDestination != pSource, _Out_writes_bytes_(
outSize))
188 LPVOID pDestination, _In_
size_t outSize,
192 assert( pDestination && outSize > 0 );
193 assert( pSource && inSize > 0 );
198 switch( static_cast<int>(format) )
201 case DXGI_FORMAT_R32G32B32A32_TYPELESS:
202 case DXGI_FORMAT_R32G32B32A32_FLOAT:
203 case DXGI_FORMAT_R32G32B32A32_UINT:
204 case DXGI_FORMAT_R32G32B32A32_SINT:
205 if ( inSize >= 16 && outSize >= 16 )
208 if ( format == DXGI_FORMAT_R32G32B32A32_FLOAT )
210 else if ( format == DXGI_FORMAT_R32G32B32A32_SINT )
215 if ( pDestination == pSource )
217 uint32_t *dPtr =
reinterpret_cast<uint32_t*
> (pDestination);
226 const uint32_t * __restrict sPtr =
reinterpret_cast<const uint32_t*
>(pSource);
227 uint32_t * __restrict dPtr =
reinterpret_cast<uint32_t*
>(pDestination);
231 *(dPtr++) = *(sPtr++);
232 *(dPtr++) = *(sPtr++);
233 *(dPtr++) = *(sPtr++);
242 case DXGI_FORMAT_R16G16B16A16_TYPELESS:
243 case DXGI_FORMAT_R16G16B16A16_FLOAT:
244 case DXGI_FORMAT_R16G16B16A16_UNORM:
245 case DXGI_FORMAT_R16G16B16A16_UINT:
246 case DXGI_FORMAT_R16G16B16A16_SNORM:
247 case DXGI_FORMAT_R16G16B16A16_SINT:
248 case DXGI_FORMAT_Y416:
249 if ( inSize >= 8 && outSize >= 8 )
252 if ( format == DXGI_FORMAT_R16G16B16A16_FLOAT )
254 else if ( format == DXGI_FORMAT_R16G16B16A16_SNORM || format == DXGI_FORMAT_R16G16B16A16_SINT )
259 if ( pDestination == pSource )
261 uint16_t *dPtr =
reinterpret_cast<uint16_t*
>(pDestination);
270 const uint16_t * __restrict sPtr =
reinterpret_cast<const uint16_t*
>(pSource);
271 uint16_t * __restrict dPtr =
reinterpret_cast<uint16_t*
>(pDestination);
275 *(dPtr++) = *(sPtr++);
276 *(dPtr++) = *(sPtr++);
277 *(dPtr++) = *(sPtr++);
286 case DXGI_FORMAT_R10G10B10A2_TYPELESS:
287 case DXGI_FORMAT_R10G10B10A2_UNORM:
288 case DXGI_FORMAT_R10G10B10A2_UINT:
289 case DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM:
290 case DXGI_FORMAT_Y410:
293 if ( inSize >= 4 && outSize >= 4 )
295 if ( pDestination == pSource )
297 uint32_t *dPtr =
reinterpret_cast<uint32_t*
>(pDestination);
306 const uint32_t * __restrict sPtr =
reinterpret_cast<const uint32_t*
>(pSource);
307 uint32_t * __restrict dPtr =
reinterpret_cast<uint32_t*
>(pDestination);
311 *(dPtr++) = *(sPtr++) | 0xC0000000;
318 case DXGI_FORMAT_R8G8B8A8_TYPELESS:
319 case DXGI_FORMAT_R8G8B8A8_UNORM:
320 case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB:
321 case DXGI_FORMAT_R8G8B8A8_UINT:
322 case DXGI_FORMAT_R8G8B8A8_SNORM:
323 case DXGI_FORMAT_R8G8B8A8_SINT:
324 case DXGI_FORMAT_B8G8R8A8_UNORM:
325 case DXGI_FORMAT_B8G8R8A8_TYPELESS:
326 case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB:
327 case DXGI_FORMAT_AYUV:
328 if ( inSize >= 4 && outSize >= 4 )
330 const uint32_t alpha = ( format == DXGI_FORMAT_R8G8B8A8_SNORM || format == DXGI_FORMAT_R8G8B8A8_SINT ) ? 0x7f000000 : 0xff000000;
332 if ( pDestination == pSource )
334 uint32_t *dPtr =
reinterpret_cast<uint32_t*
>(pDestination);
337 uint32_t t = *dPtr & 0xFFFFFF;
344 const uint32_t * __restrict sPtr =
reinterpret_cast<const uint32_t*
>(pSource);
345 uint32_t * __restrict dPtr =
reinterpret_cast<uint32_t*
>(pDestination);
349 uint32_t t = *(sPtr++) & 0xFFFFFF;
358 case DXGI_FORMAT_B5G5R5A1_UNORM:
359 if ( inSize >= 2 && outSize >= 2 )
361 if ( pDestination == pSource )
363 uint16_t *dPtr =
reinterpret_cast<uint16_t*
>(pDestination);
371 const uint16_t * __restrict sPtr =
reinterpret_cast<const uint16_t*
>(pSource);
372 uint16_t * __restrict dPtr =
reinterpret_cast<uint16_t*
>(pDestination);
376 *(dPtr++) = *(sPtr++) | 0x8000;
383 case DXGI_FORMAT_A8_UNORM:
384 memset( pDestination, 0xff, outSize );
388 case DXGI_FORMAT_B4G4R4A4_UNORM:
389 if ( inSize >= 2 && outSize >= 2 )
391 if ( pDestination == pSource )
393 uint16_t *dPtr =
reinterpret_cast<uint16_t*
>(pDestination);
401 const uint16_t * __restrict sPtr =
reinterpret_cast<const uint16_t*
>(pSource);
402 uint16_t * __restrict dPtr =
reinterpret_cast<uint16_t*
>(pDestination);
406 *(dPtr++) = *(sPtr++) | 0xF000;
415 if ( pDestination == pSource )
419 memcpy_s( pDestination, outSize, pSource, size );
427 _Use_decl_annotations_
430 assert( pDestination && outSize > 0 );
431 assert( pSource && inSize > 0 );
437 case DXGI_FORMAT_R10G10B10A2_TYPELESS:
438 case DXGI_FORMAT_R10G10B10A2_UNORM:
439 case DXGI_FORMAT_R10G10B10A2_UINT:
440 case DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM:
441 if ( inSize >= 4 && outSize >= 4 )
446 if ( pDestination == pSource )
448 uint32_t *dPtr =
reinterpret_cast<uint32_t*
>(pDestination);
453 uint32_t t1 = (t & 0x3ff00000) >> 20;
454 uint32_t t2 = (t & 0x000003ff) << 20;
455 uint32_t t3 = (t & 0x000ffc00);
458 *(dPtr++) = t1 | t2 | t3 | ta;
463 const uint32_t * __restrict sPtr =
reinterpret_cast<const uint32_t*
>(pSource);
464 uint32_t * __restrict dPtr =
reinterpret_cast<uint32_t*
>(pDestination);
468 uint32_t t = *(sPtr++);
470 uint32_t t1 = (t & 0x3ff00000) >> 20;
471 uint32_t t2 = (t & 0x000003ff) << 20;
472 uint32_t t3 = (t & 0x000ffc00);
475 *(dPtr++) = t1 | t2 | t3 | ta;
484 case DXGI_FORMAT_R8G8B8A8_TYPELESS:
485 case DXGI_FORMAT_R8G8B8A8_UNORM:
486 case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB:
487 case DXGI_FORMAT_B8G8R8A8_UNORM:
488 case DXGI_FORMAT_B8G8R8X8_UNORM:
489 case DXGI_FORMAT_B8G8R8A8_TYPELESS:
490 case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB:
491 case DXGI_FORMAT_B8G8R8X8_TYPELESS:
492 case DXGI_FORMAT_B8G8R8X8_UNORM_SRGB:
493 if ( inSize >= 4 && outSize >= 4 )
496 if ( pDestination == pSource )
498 uint32_t *dPtr =
reinterpret_cast<uint32_t*
>(pDestination);
503 uint32_t t1 = (t & 0x00ff0000) >> 16;
504 uint32_t t2 = (t & 0x000000ff) << 16;
505 uint32_t t3 = (t & 0x0000ff00);
508 *(dPtr++) = t1 | t2 | t3 | ta;
513 const uint32_t * __restrict sPtr =
reinterpret_cast<const uint32_t*
>(pSource);
514 uint32_t * __restrict dPtr =
reinterpret_cast<uint32_t*
>(pDestination);
518 uint32_t t = *(sPtr++);
520 uint32_t t1 = (t & 0x00ff0000) >> 16;
521 uint32_t t2 = (t & 0x000000ff) << 16;
522 uint32_t t3 = (t & 0x0000ff00);
525 *(dPtr++) = t1 | t2 | t3 | ta;
533 case DXGI_FORMAT_YUY2:
534 if ( inSize >= 4 && outSize >= 4 )
539 if ( pDestination == pSource )
541 uint32_t *dPtr =
reinterpret_cast<uint32_t*
>(pDestination);
546 uint32_t t1 = (t & 0x000000ff) << 8;
547 uint32_t t2 = (t & 0x0000ff00) >> 8;
548 uint32_t t3 = (t & 0x00ff0000) << 8;
549 uint32_t t4 = (t & 0xff000000) >> 8;
551 *(dPtr++) = t1 | t2 | t3 | t4;
556 const uint32_t * __restrict sPtr =
reinterpret_cast<const uint32_t*
>(pSource);
557 uint32_t * __restrict dPtr =
reinterpret_cast<uint32_t*
>(pDestination);
561 uint32_t t = *(sPtr++);
563 uint32_t t1 = (t & 0x000000ff) << 8;
564 uint32_t t2 = (t & 0x0000ff00) >> 8;
565 uint32_t t3 = (t & 0x00ff0000) << 8;
566 uint32_t t4 = (t & 0xff000000) >> 8;
568 *(dPtr++) = t1 | t2 | t3 | t4;
578 if ( pDestination == pSource )
582 memcpy_s( pDestination, outSize, pSource, size );
590 _Use_decl_annotations_
594 assert( pDestination && outSize > 0 );
595 assert( pSource && inSize > 0 );
601 case DXGI_FORMAT_B5G6R5_UNORM:
602 if ( outFormat != DXGI_FORMAT_R8G8B8A8_UNORM )
606 if ( inSize >= 2 && outSize >= 4 )
608 const uint16_t * __restrict sPtr =
reinterpret_cast<const uint16_t*
>(pSource);
609 uint32_t * __restrict dPtr =
reinterpret_cast<uint32_t*
>(pDestination);
611 for(
size_t ocount = 0, icount = 0; ( ( icount < ( inSize - 1 ) ) && ( ocount < ( outSize - 3 ) ) ); icount += 2, ocount += 4 )
613 uint16_t t = *(sPtr++);
615 uint32_t t1 = ((t & 0xf800) >> 8) | ((t & 0xe000) >> 13);
616 uint32_t t2 = ((t & 0x07e0) << 5) | ((t & 0x0600) >> 5);
617 uint32_t t3 = ((t & 0x001f) << 19) | ((t & 0x001c) << 14);
619 *(dPtr++) = t1 | t2 | t3 | 0xff000000;
625 case DXGI_FORMAT_B5G5R5A1_UNORM:
626 if ( outFormat != DXGI_FORMAT_R8G8B8A8_UNORM )
630 if ( inSize >= 2 && outSize >= 4 )
632 const uint16_t * __restrict sPtr =
reinterpret_cast<const uint16_t*
>(pSource);
633 uint32_t * __restrict dPtr =
reinterpret_cast<uint32_t*
>(pDestination);
635 for(
size_t ocount = 0, icount = 0; ( ( icount < ( inSize - 1 ) ) && ( ocount < ( outSize - 3 ) ) ); icount += 2, ocount += 4 )
637 uint16_t t = *(sPtr++);
639 uint32_t t1 = ((t & 0x7c00) >> 7) | ((t & 0x7000) >> 12);
640 uint32_t t2 = ((t & 0x03e0) << 6) | ((t & 0x0380) << 1);
641 uint32_t t3 = ((t & 0x001f) << 19) | ((t & 0x001c) << 14);
644 *(dPtr++) = t1 | t2 | t3 | ta;
650 case DXGI_FORMAT_B4G4R4A4_UNORM:
651 if ( outFormat != DXGI_FORMAT_R8G8B8A8_UNORM )
655 if ( inSize >= 2 && outSize >= 4 )
657 const uint16_t * __restrict sPtr =
reinterpret_cast<const uint16_t*
>(pSource);
658 uint32_t * __restrict dPtr =
reinterpret_cast<uint32_t*
>(pDestination);
660 for(
size_t ocount = 0, icount = 0; ( ( icount < ( inSize - 1 ) ) && ( ocount < ( outSize - 3 ) ) ); icount += 2, ocount += 4 )
662 uint16_t t = *(sPtr++);
664 uint32_t t1 = ((t & 0x0f00) >> 4) | ((t & 0x0f00) >> 8);
665 uint32_t t2 = ((t & 0x00f0) << 8) | ((t & 0x00f0) << 4);
666 uint32_t t3 = ((t & 0x000f) << 20) | ((t & 0x000f) << 16);
667 uint32_t ta = ( flags &
TEXP_SCANLINE_SETALPHA ) ? 0xff000000 : (((t & 0xf000) << 16) | ((t & 0xf000) << 12));
669 *(dPtr++) = t1 | t2 | t3 | ta;
683 #define LOAD_SCANLINE( type, func )\
684 if ( size >= sizeof(type) )\
686 const type * __restrict sPtr = reinterpret_cast<const type*>(pSource);\
687 for( size_t icount = 0; icount < ( size - sizeof(type) + 1 ); icount += sizeof(type) )\
689 if ( dPtr >= ePtr ) break;\
690 *(dPtr++) = func( sPtr++ );\
696 #define LOAD_SCANLINE3( type, func, defvec )\
697 if ( size >= sizeof(type) )\
699 const type * __restrict sPtr = reinterpret_cast<const type*>(pSource);\
700 for( size_t icount = 0; icount < ( size - sizeof(type) + 1 ); icount += sizeof(type) )\
702 XMVECTOR v = func( sPtr++ );\
703 if ( dPtr >= ePtr ) break;\
704 *(dPtr++) = XMVectorSelect( defvec, v, g_XMSelect1110 );\
710 #define LOAD_SCANLINE2( type, func, defvec )\
711 if ( size >= sizeof(type) )\
713 const type * __restrict sPtr = reinterpret_cast<const type*>(pSource);\
714 for( size_t icount = 0; icount < ( size - sizeof(type) + 1 ); icount += sizeof(type) )\
716 XMVECTOR v = func( sPtr++ );\
717 if ( dPtr >= ePtr ) break;\
718 *(dPtr++) = XMVectorSelect( defvec, v, g_XMSelect1100 );\
724 _Use_decl_annotations_
726 LPCVOID pSource,
size_t size, DXGI_FORMAT
format )
728 assert( pDestination && count > 0 && (((uintptr_t)pDestination & 0xF) == 0) );
729 assert( pSource && size > 0 );
732 XMVECTOR* __restrict dPtr = pDestination;
736 const XMVECTOR* ePtr = pDestination +
count;
738 switch( static_cast<int>(format) )
740 case DXGI_FORMAT_R32G32B32A32_FLOAT:
742 size_t msize = (size > (
sizeof(XMVECTOR)*count)) ? (
sizeof(XMVECTOR)*
count) : size;
743 memcpy_s( dPtr,
sizeof(XMVECTOR)*count, pSource, msize );
747 case DXGI_FORMAT_R32G32B32A32_UINT:
750 case DXGI_FORMAT_R32G32B32A32_SINT:
753 case DXGI_FORMAT_R32G32B32_FLOAT:
756 case DXGI_FORMAT_R32G32B32_UINT:
759 case DXGI_FORMAT_R32G32B32_SINT:
762 case DXGI_FORMAT_R16G16B16A16_FLOAT:
765 case DXGI_FORMAT_R16G16B16A16_UNORM:
768 case DXGI_FORMAT_R16G16B16A16_UINT:
771 case DXGI_FORMAT_R16G16B16A16_SNORM:
774 case DXGI_FORMAT_R16G16B16A16_SINT:
777 case DXGI_FORMAT_R32G32_FLOAT:
780 case DXGI_FORMAT_R32G32_UINT:
783 case DXGI_FORMAT_R32G32_SINT:
786 case DXGI_FORMAT_D32_FLOAT_S8X24_UINT:
788 const size_t psize =
sizeof(float)+
sizeof(uint32_t);
791 const float * sPtr =
reinterpret_cast<const float*
>(pSource);
792 for(
size_t icount = 0; icount < ( size - psize + 1 ); icount += psize )
794 const uint8_t* ps8 =
reinterpret_cast<const uint8_t*
>( &sPtr[1] );
795 if ( dPtr >= ePtr )
break;
796 *(dPtr++) = XMVectorSet( sPtr[0], static_cast<float>( *ps8 ), 0.f, 1.f );
804 case DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS:
806 const size_t psize =
sizeof(float)+
sizeof(uint32_t);
809 const float * sPtr =
reinterpret_cast<const float*
>(pSource);
810 for(
size_t icount = 0; icount < ( size - psize + 1 ); icount += psize )
812 if ( dPtr >= ePtr )
break;
813 *(dPtr++) = XMVectorSet( sPtr[0], 0.f , 0.f, 1.f );
821 case DXGI_FORMAT_X32_TYPELESS_G8X24_UINT:
823 const size_t psize =
sizeof(float)+
sizeof(uint32_t);
826 const float * sPtr =
reinterpret_cast<const float*
>(pSource);
827 for(
size_t icount = 0; icount < ( size - psize + 1 ); icount += psize )
829 const uint8_t* pg8 =
reinterpret_cast<const uint8_t*
>( &sPtr[1] );
830 if ( dPtr >= ePtr )
break;
831 *(dPtr++) = XMVectorSet( 0.f , static_cast<float>( *pg8 ), 0.f, 1.f );
839 case DXGI_FORMAT_R10G10B10A2_UNORM:
842 case DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM:
843 #if DIRECTX_MATH_VERSION >= 306
846 if ( size >=
sizeof(XMUDECN4) )
848 const XMUDECN4 * __restrict sPtr =
reinterpret_cast<const XMUDECN4*
>(pSource);
849 for(
size_t icount = 0; icount < ( size -
sizeof(XMUDECN4) + 1 ); icount +=
sizeof(XMUDECN4) )
851 if ( dPtr >= ePtr )
break;
853 int32_t ElementX = sPtr->v & 0x3FF;
854 int32_t ElementY = (sPtr->v >> 10) & 0x3FF;
855 int32_t ElementZ = (sPtr->v >> 20) & 0x3FF;
857 XMVECTORF32 vResult = {
858 (float)(ElementX - 0x180) / 510.0f,
859 (float)(ElementY - 0x180) / 510.0f,
860 (float)(ElementZ - 0x180) / 510.0f,
861 (float)(sPtr->v >> 30) / 3.0f
866 *(dPtr++) = vResult.v;
873 case DXGI_FORMAT_R10G10B10A2_UINT:
876 case DXGI_FORMAT_R11G11B10_FLOAT:
879 case DXGI_FORMAT_R8G8B8A8_UNORM:
880 case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB:
883 case DXGI_FORMAT_R8G8B8A8_UINT:
886 case DXGI_FORMAT_R8G8B8A8_SNORM:
889 case DXGI_FORMAT_R8G8B8A8_SINT:
892 case DXGI_FORMAT_R16G16_FLOAT:
895 case DXGI_FORMAT_R16G16_UNORM:
898 case DXGI_FORMAT_R16G16_UINT:
901 case DXGI_FORMAT_R16G16_SNORM:
904 case DXGI_FORMAT_R16G16_SINT:
907 case DXGI_FORMAT_D32_FLOAT:
908 case DXGI_FORMAT_R32_FLOAT:
909 if ( size >=
sizeof(
float) )
911 const float* __restrict sPtr =
reinterpret_cast<const float*
>(pSource);
912 for(
size_t icount = 0; icount < ( size -
sizeof(float) + 1 ); icount +=
sizeof(float) )
914 XMVECTOR v = XMLoadFloat( sPtr++ );
915 if ( dPtr >= ePtr )
break;
916 *(dPtr++) = XMVectorSelect( g_XMIdentityR3, v, g_XMSelect1000 );
922 case DXGI_FORMAT_R32_UINT:
923 if ( size >=
sizeof(uint32_t) )
925 const uint32_t* __restrict sPtr =
reinterpret_cast<const uint32_t*
>(pSource);
926 for(
size_t icount = 0; icount < ( size -
sizeof(uint32_t) + 1 ); icount +=
sizeof(uint32_t) )
928 XMVECTOR v = XMLoadInt( sPtr++ );
929 v = XMConvertVectorUIntToFloat( v, 0 );
930 if ( dPtr >= ePtr )
break;
931 *(dPtr++) = XMVectorSelect( g_XMIdentityR3, v, g_XMSelect1000 );
937 case DXGI_FORMAT_R32_SINT:
938 if ( size >=
sizeof(int32_t) )
940 const int32_t * __restrict sPtr =
reinterpret_cast<const int32_t*
>(pSource);
941 for(
size_t icount = 0; icount < ( size -
sizeof(int32_t) + 1 ); icount +=
sizeof(int32_t) )
943 XMVECTOR v = XMLoadInt( reinterpret_cast<const uint32_t*> (sPtr++) );
944 v = XMConvertVectorIntToFloat( v, 0 );
945 if ( dPtr >= ePtr )
break;
946 *(dPtr++) = XMVectorSelect( g_XMIdentityR3, v, g_XMSelect1000 );
952 case DXGI_FORMAT_D24_UNORM_S8_UINT:
953 if ( size >=
sizeof(uint32_t) )
955 const uint32_t * sPtr =
reinterpret_cast<const uint32_t*
>(pSource);
956 for(
size_t icount = 0; icount < ( size -
sizeof(uint32_t) + 1 ); icount +=
sizeof(uint32_t) )
958 float d =
static_cast<float>( *sPtr & 0xFFFFFF ) / 16777215.f;
959 float s =
static_cast<float>( ( *sPtr & 0xFF000000 ) >> 24 );
961 if ( dPtr >= ePtr )
break;
962 *(dPtr++) = XMVectorSet( d, s, 0.f, 1.f );
968 case DXGI_FORMAT_R24_UNORM_X8_TYPELESS:
969 if ( size >=
sizeof(uint32_t) )
971 const uint32_t * sPtr =
reinterpret_cast<const uint32_t*
>(pSource);
972 for(
size_t icount = 0; icount < ( size -
sizeof(uint32_t) + 1 ); icount +=
sizeof(uint32_t) )
974 float r =
static_cast<float>( *sPtr & 0xFFFFFF ) / 16777215.f;
976 if ( dPtr >= ePtr )
break;
977 *(dPtr++) = XMVectorSet( r, 0.f , 0.f, 1.f );
983 case DXGI_FORMAT_X24_TYPELESS_G8_UINT:
984 if ( size >=
sizeof(uint32_t) )
986 const uint32_t * sPtr =
reinterpret_cast<const uint32_t*
>(pSource);
987 for(
size_t icount = 0; icount < ( size -
sizeof(uint32_t) + 1 ); icount +=
sizeof(uint32_t) )
989 float g =
static_cast<float>( ( *sPtr & 0xFF000000 ) >> 24 );
991 if ( dPtr >= ePtr )
break;
992 *(dPtr++) = XMVectorSet( 0.f , g, 0.f, 1.f );
998 case DXGI_FORMAT_R8G8_UNORM:
1001 case DXGI_FORMAT_R8G8_UINT:
1004 case DXGI_FORMAT_R8G8_SNORM:
1007 case DXGI_FORMAT_R8G8_SINT:
1010 case DXGI_FORMAT_R16_FLOAT:
1011 if ( size >=
sizeof(HALF) )
1013 const HALF * __restrict sPtr =
reinterpret_cast<const HALF*
>(pSource);
1014 for(
size_t icount = 0; icount < ( size -
sizeof(HALF) + 1 ); icount +=
sizeof(HALF) )
1016 if ( dPtr >= ePtr )
break;
1017 *(dPtr++) = XMVectorSet( XMConvertHalfToFloat(*sPtr++), 0.f, 0.f, 1.f );
1023 case DXGI_FORMAT_D16_UNORM:
1024 case DXGI_FORMAT_R16_UNORM:
1025 if ( size >=
sizeof(uint16_t) )
1027 const uint16_t* __restrict sPtr =
reinterpret_cast<const uint16_t*
>(pSource);
1028 for(
size_t icount = 0; icount < ( size -
sizeof(uint16_t) + 1 ); icount +=
sizeof(uint16_t) )
1030 if ( dPtr >= ePtr )
break;
1031 *(dPtr++) = XMVectorSet( static_cast<float>(*sPtr++) / 65535.f, 0.f, 0.f, 1.f );
1037 case DXGI_FORMAT_R16_UINT:
1038 if ( size >=
sizeof(uint16_t) )
1040 const uint16_t * __restrict sPtr =
reinterpret_cast<const uint16_t*
>(pSource);
1041 for(
size_t icount = 0; icount < ( size -
sizeof(uint16_t) + 1 ); icount +=
sizeof(uint16_t) )
1043 if ( dPtr >= ePtr )
break;
1044 *(dPtr++) = XMVectorSet( static_cast<float>(*sPtr++), 0.f, 0.f, 1.f );
1050 case DXGI_FORMAT_R16_SNORM:
1051 if ( size >=
sizeof(int16_t) )
1053 const int16_t * __restrict sPtr =
reinterpret_cast<const int16_t*
>(pSource);
1054 for(
size_t icount = 0; icount < ( size -
sizeof(int16_t) + 1 ); icount +=
sizeof(int16_t) )
1056 if ( dPtr >= ePtr )
break;
1057 *(dPtr++) = XMVectorSet( static_cast<float>(*sPtr++) / 32767.f, 0.f, 0.f, 1.f );
1063 case DXGI_FORMAT_R16_SINT:
1064 if ( size >=
sizeof(int16_t) )
1066 const int16_t * __restrict sPtr =
reinterpret_cast<const int16_t*
>(pSource);
1067 for(
size_t icount = 0; icount < ( size -
sizeof(int16_t) + 1 ); icount +=
sizeof(int16_t) )
1069 if ( dPtr >= ePtr )
break;
1070 *(dPtr++) = XMVectorSet( static_cast<float>(*sPtr++), 0.f, 0.f, 1.f );
1076 case DXGI_FORMAT_R8_UNORM:
1077 if ( size >=
sizeof(
uint8_t) )
1079 const uint8_t * __restrict sPtr =
reinterpret_cast<const uint8_t*
>(pSource);
1080 for(
size_t icount = 0; icount <
size; icount +=
sizeof(
uint8_t) )
1082 if ( dPtr >= ePtr )
break;
1083 *(dPtr++) = XMVectorSet( static_cast<float>(*sPtr++) / 255.f, 0.f, 0.f, 1.f );
1089 case DXGI_FORMAT_R8_UINT:
1090 if ( size >=
sizeof(
uint8_t) )
1092 const uint8_t * __restrict sPtr =
reinterpret_cast<const uint8_t*
>(pSource);
1093 for(
size_t icount = 0; icount <
size; icount +=
sizeof(
uint8_t) )
1095 if ( dPtr >= ePtr )
break;
1096 *(dPtr++) = XMVectorSet( static_cast<float>(*sPtr++), 0.f, 0.f, 1.f );
1102 case DXGI_FORMAT_R8_SNORM:
1103 if ( size >=
sizeof(int8_t) )
1105 const int8_t * __restrict sPtr =
reinterpret_cast<const int8_t*
>(pSource);
1106 for(
size_t icount = 0; icount <
size; icount +=
sizeof(int8_t) )
1108 if ( dPtr >= ePtr )
break;
1109 *(dPtr++) = XMVectorSet( static_cast<float>(*sPtr++) / 127.f, 0.f, 0.f, 1.f );
1115 case DXGI_FORMAT_R8_SINT:
1116 if ( size >=
sizeof(int8_t) )
1118 const int8_t * __restrict sPtr =
reinterpret_cast<const int8_t*
>(pSource);
1119 for(
size_t icount = 0; icount <
size; icount +=
sizeof(int8_t) )
1121 if ( dPtr >= ePtr )
break;
1122 *(dPtr++) = XMVectorSet( static_cast<float>(*sPtr++), 0.f, 0.f, 1.f );
1128 case DXGI_FORMAT_A8_UNORM:
1129 if ( size >=
sizeof(
uint8_t) )
1131 const uint8_t * __restrict sPtr =
reinterpret_cast<const uint8_t*
>(pSource);
1132 for(
size_t icount = 0; icount <
size; icount +=
sizeof(
uint8_t) )
1134 if ( dPtr >= ePtr )
break;
1135 *(dPtr++) = XMVectorSet( 0.f, 0.f, 0.f, static_cast<float>(*sPtr++) / 255.f );
1141 case DXGI_FORMAT_R1_UNORM:
1142 if ( size >=
sizeof(
uint8_t) )
1144 const uint8_t * __restrict sPtr =
reinterpret_cast<const uint8_t*
>(pSource);
1145 for(
size_t icount = 0; icount <
size; icount +=
sizeof(
uint8_t) )
1147 for(
size_t bcount = 8; bcount > 0; --bcount )
1149 if ( dPtr >= ePtr )
break;
1150 *(dPtr++) = XMVectorSet( (((*sPtr >> (bcount-1)) & 0x1) ? 1.f : 0.f), 0.f, 0.f, 1.f );
1159 case DXGI_FORMAT_R9G9B9E5_SHAREDEXP:
1160 #if DIRECTX_MATH_VERSION >= 306
1163 if ( size >=
sizeof(XMFLOAT3SE) )
1165 const XMFLOAT3SE * __restrict sPtr =
reinterpret_cast<const XMFLOAT3SE*
>(pSource);
1166 for(
size_t icount = 0; icount < ( size -
sizeof(XMFLOAT3SE) + 1 ); icount +=
sizeof(XMFLOAT3SE) )
1168 union {
float f; int32_t i; } fi;
1169 fi.i = 0x33800000 + (sPtr->e << 23);
1173 Scale * float( sPtr->xm ),
1174 Scale * float( sPtr->ym ),
1175 Scale * float( sPtr->zm ),
1178 if ( dPtr >= ePtr )
break;
1186 case DXGI_FORMAT_R8G8_B8G8_UNORM:
1187 if ( size >=
sizeof(XMUBYTEN4) )
1189 const XMUBYTEN4 * __restrict sPtr =
reinterpret_cast<const XMUBYTEN4*
>(pSource);
1190 for(
size_t icount = 0; icount < ( size -
sizeof(XMUBYTEN4) + 1 ); icount +=
sizeof(XMUBYTEN4) )
1192 XMVECTOR v = XMLoadUByteN4( sPtr++ );
1193 XMVECTOR v1 = XMVectorSwizzle<0, 3, 2, 1>( v );
1194 if ( dPtr >= ePtr )
break;
1195 *(dPtr++) = XMVectorSelect( g_XMIdentityR3, v, g_XMSelect1110 );
1196 if ( dPtr >= ePtr )
break;
1197 *(dPtr++) = XMVectorSelect( g_XMIdentityR3, v1, g_XMSelect1110 );
1203 case DXGI_FORMAT_G8R8_G8B8_UNORM:
1204 if ( size >=
sizeof(XMUBYTEN4) )
1206 const XMUBYTEN4 * __restrict sPtr =
reinterpret_cast<const XMUBYTEN4*
>(pSource);
1207 for(
size_t icount = 0; icount < ( size -
sizeof(XMUBYTEN4) + 1 ); icount +=
sizeof(XMUBYTEN4) )
1209 XMVECTOR v = XMLoadUByteN4( sPtr++ );
1210 XMVECTOR v0 = XMVectorSwizzle<1, 0, 3, 2>( v );
1211 XMVECTOR v1 = XMVectorSwizzle<1, 2, 3, 0>( v );
1212 if ( dPtr >= ePtr )
break;
1213 *(dPtr++) = XMVectorSelect( g_XMIdentityR3, v0, g_XMSelect1110 );
1214 if ( dPtr >= ePtr )
break;
1215 *(dPtr++) = XMVectorSelect( g_XMIdentityR3, v1, g_XMSelect1110 );
1221 case DXGI_FORMAT_B5G6R5_UNORM:
1222 if ( size >=
sizeof(XMU565) )
1224 static XMVECTORF32 s_Scale = { 1.f/31.f, 1.f/63.f, 1.f/31.f, 1.f };
1225 const XMU565 * __restrict sPtr =
reinterpret_cast<const XMU565*
>(pSource);
1226 for(
size_t icount = 0; icount < ( size -
sizeof(XMU565) + 1 ); icount +=
sizeof(XMU565) )
1228 XMVECTOR v = XMLoadU565( sPtr++ );
1229 v = XMVectorMultiply( v, s_Scale );
1230 v = XMVectorSwizzle<2, 1, 0, 3>( v );
1231 if ( dPtr >= ePtr )
break;
1232 *(dPtr++) = XMVectorSelect( g_XMIdentityR3, v, g_XMSelect1110 );
1238 case DXGI_FORMAT_B5G5R5A1_UNORM:
1239 if ( size >=
sizeof(XMU555) )
1241 static XMVECTORF32 s_Scale = { 1.f/31.f, 1.f/31.f, 1.f/31.f, 1.f };
1242 const XMU555 * __restrict sPtr =
reinterpret_cast<const XMU555*
>(pSource);
1243 for(
size_t icount = 0; icount < ( size -
sizeof(XMU555) + 1 ); icount +=
sizeof(XMU555) )
1245 XMVECTOR v = XMLoadU555( sPtr++ );
1246 v = XMVectorMultiply( v, s_Scale );
1247 if ( dPtr >= ePtr )
break;
1248 *(dPtr++) = XMVectorSwizzle<2, 1, 0, 3>( v );
1254 case DXGI_FORMAT_B8G8R8A8_UNORM:
1255 case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB:
1256 if ( size >=
sizeof(XMUBYTEN4) )
1258 const XMUBYTEN4 * __restrict sPtr =
reinterpret_cast<const XMUBYTEN4*
>(pSource);
1259 for(
size_t icount = 0; icount < ( size -
sizeof(XMUBYTEN4) + 1 ); icount +=
sizeof(XMUBYTEN4) )
1261 XMVECTOR v = XMLoadUByteN4( sPtr++ );
1262 if ( dPtr >= ePtr )
break;
1263 *(dPtr++) = XMVectorSwizzle<2, 1, 0, 3>( v );
1269 case DXGI_FORMAT_B8G8R8X8_UNORM:
1270 case DXGI_FORMAT_B8G8R8X8_UNORM_SRGB:
1271 if ( size >=
sizeof(XMUBYTEN4) )
1273 const XMUBYTEN4 * __restrict sPtr =
reinterpret_cast<const XMUBYTEN4*
>(pSource);
1274 for(
size_t icount = 0; icount < ( size -
sizeof(XMUBYTEN4) + 1 ); icount +=
sizeof(XMUBYTEN4) )
1276 XMVECTOR v = XMLoadUByteN4( sPtr++ );
1277 v = XMVectorSwizzle<2, 1, 0, 3>( v );
1278 if ( dPtr >= ePtr )
break;
1279 *(dPtr++) = XMVectorSelect( g_XMIdentityR3, v, g_XMSelect1110 );
1285 case DXGI_FORMAT_AYUV:
1286 if ( size >=
sizeof(XMUBYTEN4) )
1288 const XMUBYTEN4 * __restrict sPtr =
reinterpret_cast<const XMUBYTEN4*
>(pSource);
1289 for(
size_t icount = 0; icount < ( size -
sizeof(XMUBYTEN4) + 1 ); icount +=
sizeof(XMUBYTEN4) )
1291 int v = int(sPtr->x) - 128;
1292 int u = int(sPtr->y) - 128;
1293 int y = int(sPtr->z) - 16;
1294 unsigned int a = sPtr->w;
1307 int r = (298 * y + 409 * v + 128) >> 8;
1308 int g = (298 * y - 100 * u - 208 * v + 128) >> 8;
1309 int b = (298 * y + 516 * u + 128) >> 8;
1311 if ( dPtr >= ePtr )
break;
1312 *(dPtr++) = XMVectorSet(
float( std::min<int>( std::max<int>( r, 0 ), 255 ) ) / 255.f,
1313 float( std::min<int>( std::max<int>( g, 0 ), 255 ) ) / 255.f,
1314 float( std::min<int>( std::max<int>( b, 0 ), 255 ) ) / 255.f,
1315 float( a / 255.f ) );
1321 case DXGI_FORMAT_Y410:
1322 if ( size >=
sizeof(XMUDECN4) )
1324 const XMUDECN4 * __restrict sPtr =
reinterpret_cast<const XMUDECN4*
>(pSource);
1325 for(
size_t icount = 0; icount < ( size -
sizeof(XMUDECN4) + 1 ); icount +=
sizeof(XMUDECN4) )
1327 int64_t u = int(sPtr->x) - 512;
1328 int64_t
y = int(sPtr->y) - 64;
1329 int64_t v = int(sPtr->z) - 512;
1330 unsigned int a = sPtr->w;
1343 int r =
static_cast<int>( (76533 * y + 104905 * v + 32768) >> 16 );
1344 int g =
static_cast<int>( (76533 * y - 25747 * u - 53425 * v + 32768) >> 16 );
1345 int b =
static_cast<int>( (76533 * y + 132590 * u + 32768) >> 16 );
1347 if ( dPtr >= ePtr )
break;
1348 *(dPtr++) = XMVectorSet(
float( std::min<int>( std::max<int>( r, 0 ), 1023 ) ) / 1023.f,
1349 float( std::min<int>( std::max<int>( g, 0 ), 1023 ) ) / 1023.f,
1350 float( std::min<int>( std::max<int>( b, 0 ), 1023 ) ) / 1023.f,
1357 case DXGI_FORMAT_Y416:
1358 if ( size >=
sizeof(XMUSHORTN4) )
1360 const XMUSHORTN4 * __restrict sPtr =
reinterpret_cast<const XMUSHORTN4*
>(pSource);
1361 for(
size_t icount = 0; icount < ( size -
sizeof(XMUSHORTN4) + 1 ); icount +=
sizeof(XMUSHORTN4) )
1363 int64_t u = int64_t(sPtr->x) - 32768;
1364 int64_t
y = int64_t(sPtr->y) - 4096;
1365 int64_t v = int64_t(sPtr->z) - 32768;
1366 unsigned int a = sPtr->w;
1379 int r =
static_cast<int>( (76607 * y + 105006 * v + 32768) >> 16 );
1380 int g =
static_cast<int>( (76607 * y - 25772 * u - 53477 * v + 32768) >> 16 );
1381 int b =
static_cast<int>( (76607 * y + 132718 * u + 32768) >> 16 );
1383 if ( dPtr >= ePtr )
break;
1384 *(dPtr++) = XMVectorSet(
float( std::min<int>( std::max<int>( r, 0 ), 65535 ) ) / 65535.f,
1385 float( std::min<int>( std::max<int>( g, 0 ), 65535 ) ) / 65535.f,
1386 float( std::min<int>( std::max<int>( b, 0 ), 65535 ) ) / 65535.f,
1387 float( std::min<int>( std::max<int>( a, 0 ), 65535 ) ) / 65535.f );
1393 case DXGI_FORMAT_YUY2:
1394 if ( size >=
sizeof(XMUBYTEN4) )
1396 const XMUBYTEN4 * __restrict sPtr =
reinterpret_cast<const XMUBYTEN4*
>(pSource);
1397 for(
size_t icount = 0; icount < ( size -
sizeof(XMUBYTEN4) + 1 ); icount +=
sizeof(XMUBYTEN4) )
1399 int y0 = int(sPtr->x) - 16;
1400 int u = int(sPtr->y) - 128;
1401 int y1 = int(sPtr->z) - 16;
1402 int v = int(sPtr->w) - 128;
1406 int r = (298 * y0 + 409 * v + 128) >> 8;
1407 int g = (298 * y0 - 100 * u - 208 * v + 128) >> 8;
1408 int b = (298 * y0 + 516 * u + 128) >> 8;
1410 if ( dPtr >= ePtr )
break;
1411 *(dPtr++) = XMVectorSet(
float( std::min<int>( std::max<int>( r, 0 ), 255 ) ) / 255.f,
1412 float( std::min<int>( std::max<int>( g, 0 ), 255 ) ) / 255.f,
1413 float( std::min<int>( std::max<int>( b, 0 ), 255 ) ) / 255.f,
1416 r = (298 * y1 + 409 * v + 128) >> 8;
1417 g = (298 * y1 - 100 * u - 208 * v + 128) >> 8;
1418 b = (298 * y1 + 516 * u + 128) >> 8;
1420 if ( dPtr >= ePtr )
break;
1421 *(dPtr++) = XMVectorSet(
float( std::min<int>( std::max<int>( r, 0 ), 255 ) ) / 255.f,
1422 float( std::min<int>( std::max<int>( g, 0 ), 255 ) ) / 255.f,
1423 float( std::min<int>( std::max<int>( b, 0 ), 255 ) ) / 255.f,
1430 case DXGI_FORMAT_Y210:
1432 if ( size >=
sizeof(XMUSHORTN4) )
1434 const XMUSHORTN4 * __restrict sPtr =
reinterpret_cast<const XMUSHORTN4*
>(pSource);
1435 for(
size_t icount = 0; icount < ( size -
sizeof(XMUSHORTN4) + 1 ); icount +=
sizeof(XMUSHORTN4) )
1437 int64_t y0 = int64_t(sPtr->x >> 6) - 64;
1438 int64_t u = int64_t(sPtr->y >> 6) - 512;
1439 int64_t y1 = int64_t(sPtr->z >> 6) - 64;
1440 int64_t v = int64_t(sPtr->w >> 6) - 512;
1444 int r =
static_cast<int>( (76533 * y0 + 104905 * v + 32768) >> 16 );
1445 int g =
static_cast<int>( (76533 * y0 - 25747 * u - 53425 * v + 32768) >> 16 );
1446 int b =
static_cast<int>( (76533 * y0 + 132590 * u + 32768) >> 16 );
1448 if ( dPtr >= ePtr )
break;
1449 *(dPtr++) = XMVectorSet(
float( std::min<int>( std::max<int>( r, 0 ), 1023 ) ) / 1023.f,
1450 float( std::min<int>( std::max<int>( g, 0 ), 1023 ) ) / 1023.f,
1451 float( std::min<int>( std::max<int>( b, 0 ), 1023 ) ) / 1023.f,
1454 r =
static_cast<int>( (76533 * y1 + 104905 * v + 32768) >> 16 );
1455 g =
static_cast<int>( (76533 * y1 - 25747 * u - 53425 * v + 32768) >> 16 );
1456 b =
static_cast<int>( (76533 * y1 + 132590 * u + 32768) >> 16 );
1458 if ( dPtr >= ePtr )
break;
1459 *(dPtr++) = XMVectorSet(
float( std::min<int>( std::max<int>( r, 0 ), 1023 ) ) / 1023.f,
1460 float( std::min<int>( std::max<int>( g, 0 ), 1023 ) ) / 1023.f,
1461 float( std::min<int>( std::max<int>( b, 0 ), 1023 ) ) / 1023.f,
1468 case DXGI_FORMAT_Y216:
1469 if ( size >=
sizeof(XMUSHORTN4) )
1471 const XMUSHORTN4 * __restrict sPtr =
reinterpret_cast<const XMUSHORTN4*
>(pSource);
1472 for(
size_t icount = 0; icount < ( size -
sizeof(XMUSHORTN4) + 1 ); icount +=
sizeof(XMUSHORTN4) )
1474 int64_t y0 = int64_t(sPtr->x) - 4096;
1475 int64_t u = int64_t(sPtr->y) - 32768;
1476 int64_t y1 = int64_t(sPtr->z) - 4096;
1477 int64_t v = int64_t(sPtr->w) - 32768;
1481 int r =
static_cast<int>( (76607 * y0 + 105006 * v + 32768) >> 16 );
1482 int g =
static_cast<int>( (76607 * y0 - 25772 * u - 53477 * v + 32768) >> 16 );
1483 int b =
static_cast<int>( (76607 * y0 + 132718 * u + 32768) >> 16 );
1485 if ( dPtr >= ePtr )
break;
1486 *(dPtr++) = XMVectorSet(
float( std::min<int>( std::max<int>( r, 0 ), 65535 ) ) / 65535.f,
1487 float( std::min<int>( std::max<int>( g, 0 ), 65535 ) ) / 65535.f,
1488 float( std::min<int>( std::max<int>( b, 0 ), 65535 ) ) / 65535.f,
1491 r =
static_cast<int>( (76607 * y1 + 105006 * v + 32768) >> 16 );
1492 g =
static_cast<int>( (76607 * y1 - 25772 * u - 53477 * v + 32768) >> 16 );
1493 b =
static_cast<int>( (76607 * y1 + 132718 * u + 32768) >> 16 );
1495 if ( dPtr >= ePtr )
break;
1496 *(dPtr++) = XMVectorSet(
float( std::min<int>( std::max<int>( r, 0 ), 65535 ) ) / 65535.f,
1497 float( std::min<int>( std::max<int>( g, 0 ), 65535 ) ) / 65535.f,
1498 float( std::min<int>( std::max<int>( b, 0 ), 65535 ) ) / 65535.f,
1505 case DXGI_FORMAT_B4G4R4A4_UNORM:
1506 if ( size >=
sizeof(XMUNIBBLE4) )
1508 static XMVECTORF32 s_Scale = { 1.f/15.f, 1.f/15.f, 1.f/15.f, 1.f/15.f };
1509 const XMUNIBBLE4 * __restrict sPtr =
reinterpret_cast<const XMUNIBBLE4*
>(pSource);
1510 for(
size_t icount = 0; icount < ( size -
sizeof(XMUNIBBLE4) + 1 ); icount +=
sizeof(XMUNIBBLE4) )
1512 XMVECTOR v = XMLoadUNibble4( sPtr++ );
1513 v = XMVectorMultiply( v, s_Scale );
1514 if ( dPtr >= ePtr )
break;
1515 *(dPtr++) = XMVectorSwizzle<2, 1, 0, 3>( v );
1523 if ( size >=
sizeof(XMUDECN4) )
1525 const XMUDECN4 * __restrict sPtr =
reinterpret_cast<const XMUDECN4*
>(pSource);
1526 for(
size_t icount = 0; icount < ( size -
sizeof(XMUDECN4) + 1 ); icount +=
sizeof(XMUDECN4) )
1528 if ( dPtr >= ePtr )
break;
1530 XMVECTORF32 vResult = {
1531 FloatFrom7e3(sPtr->x),
1532 FloatFrom7e3(sPtr->y),
1533 FloatFrom7e3(sPtr->z),
1534 (float)(sPtr->v >> 30) / 3.0f
1539 *(dPtr++) = vResult.v;
1547 if ( size >=
sizeof(XMUDECN4) )
1549 const XMUDECN4 * __restrict sPtr =
reinterpret_cast<const XMUDECN4*
>(pSource);
1550 for(
size_t icount = 0; icount < ( size -
sizeof(XMUDECN4) + 1 ); icount +=
sizeof(XMUDECN4) )
1552 if ( dPtr >= ePtr )
break;
1554 XMVECTORF32 vResult = {
1555 FloatFrom6e4(sPtr->x),
1556 FloatFrom6e4(sPtr->y),
1557 FloatFrom6e4(sPtr->z),
1558 (float)(sPtr->v >> 30) / 3.0f
1563 *(dPtr++) = vResult.v;
1576 #undef LOAD_SCANLINE
1577 #undef LOAD_SCANLINE3
1578 #undef LOAD_SCANLINE2
1584 #define STORE_SCANLINE( type, func )\
1585 if ( size >= sizeof(type) )\
1587 type * __restrict dPtr = reinterpret_cast<type*>(pDestination);\
1588 for( size_t icount = 0; icount < ( size - sizeof(type) + 1 ); icount += sizeof(type) )\
1590 if ( sPtr >= ePtr ) break;\
1591 func( dPtr++, *sPtr++ );\
1597 _Use_decl_annotations_
1601 assert( pDestination && size > 0 );
1602 assert( pSource && count > 0 && (((uintptr_t)pSource & 0xF) == 0) );
1605 const XMVECTOR* __restrict sPtr = pSource;
1609 const XMVECTOR* ePtr = pSource +
count;
1611 switch( static_cast<int>(format) )
1613 case DXGI_FORMAT_R32G32B32A32_FLOAT:
1616 case DXGI_FORMAT_R32G32B32A32_UINT:
1619 case DXGI_FORMAT_R32G32B32A32_SINT:
1622 case DXGI_FORMAT_R32G32B32_FLOAT:
1625 case DXGI_FORMAT_R32G32B32_UINT:
1628 case DXGI_FORMAT_R32G32B32_SINT:
1631 case DXGI_FORMAT_R16G16B16A16_FLOAT:
1634 case DXGI_FORMAT_R16G16B16A16_UNORM:
1637 case DXGI_FORMAT_R16G16B16A16_UINT:
1640 case DXGI_FORMAT_R16G16B16A16_SNORM:
1643 case DXGI_FORMAT_R16G16B16A16_SINT:
1646 case DXGI_FORMAT_R32G32_FLOAT:
1649 case DXGI_FORMAT_R32G32_UINT:
1652 case DXGI_FORMAT_R32G32_SINT:
1655 case DXGI_FORMAT_D32_FLOAT_S8X24_UINT:
1657 const size_t psize =
sizeof(float)+
sizeof(uint32_t);
1658 if ( size >= psize )
1660 float *dPtr =
reinterpret_cast<float*
>(pDestination);
1661 for(
size_t icount = 0; icount < ( size - psize + 1 ); icount += psize )
1663 if ( sPtr >= ePtr )
break;
1665 XMStoreFloat4( &f, *sPtr++ );
1668 ps8[0] =
static_cast<uint8_t>( std::min<float>( 255.f, std::max<float>( 0.f, f.y ) ) );
1669 ps8[1] = ps8[2] = ps8[3] = 0;
1677 case DXGI_FORMAT_R10G10B10A2_UNORM:
1680 case DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM:
1681 #if DIRECTX_MATH_VERSION >= 306
1684 if ( size >=
sizeof(XMUDECN4) )
1686 static const XMVECTORF32 Scale = { 510.0f, 510.0f, 510.0f, 3.0f };
1687 static const XMVECTORF32 Bias = { 384.0f, 384.0f, 384.0f, 0.0f };
1688 static const XMVECTORF32 C = { 1023.f, 1023.f, 1023.f, 3.f };
1690 XMUDECN4 * __restrict dPtr =
reinterpret_cast<XMUDECN4*
>(pDestination);
1691 for(
size_t icount = 0; icount < ( size -
sizeof(XMUDECN4) + 1 ); icount +=
sizeof(XMUDECN4) )
1693 if ( sPtr >= ePtr )
break;
1695 XMVECTOR N = XMVectorMultiplyAdd( *sPtr++, Scale, Bias );
1696 N = XMVectorClamp( N, g_XMZero, C );
1699 XMStoreFloat4A(&tmp, N );
1701 dPtr->v = ((uint32_t)tmp.w << 30)
1702 | (((uint32_t)tmp.z & 0x3FF) << 20)
1703 | (((uint32_t)tmp.y & 0x3FF) << 10)
1704 | (((uint32_t)tmp.x & 0x3FF));
1712 case DXGI_FORMAT_R10G10B10A2_UINT:
1715 case DXGI_FORMAT_R11G11B10_FLOAT:
1718 case DXGI_FORMAT_R8G8B8A8_UNORM:
1719 case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB:
1722 case DXGI_FORMAT_R8G8B8A8_UINT:
1725 case DXGI_FORMAT_R8G8B8A8_SNORM:
1728 case DXGI_FORMAT_R8G8B8A8_SINT:
1731 case DXGI_FORMAT_R16G16_FLOAT:
1734 case DXGI_FORMAT_R16G16_UNORM:
1737 case DXGI_FORMAT_R16G16_UINT:
1740 case DXGI_FORMAT_R16G16_SNORM:
1743 case DXGI_FORMAT_R16G16_SINT:
1746 case DXGI_FORMAT_D32_FLOAT:
1747 case DXGI_FORMAT_R32_FLOAT:
1748 if ( size >=
sizeof(
float) )
1750 float * __restrict dPtr =
reinterpret_cast<float*
>(pDestination);
1751 for(
size_t icount = 0; icount < ( size -
sizeof(float) + 1 ); icount +=
sizeof(float) )
1753 if ( sPtr >= ePtr )
break;
1754 XMStoreFloat( dPtr++, *(sPtr++) );
1760 case DXGI_FORMAT_R32_UINT:
1761 if ( size >=
sizeof(uint32_t) )
1763 uint32_t * __restrict dPtr =
reinterpret_cast<uint32_t*
>(pDestination);
1764 for(
size_t icount = 0; icount < ( size -
sizeof(uint32_t) + 1 ); icount +=
sizeof(uint32_t) )
1766 if ( sPtr >= ePtr )
break;
1767 XMVECTOR v = XMConvertVectorFloatToUInt( *(sPtr++), 0 );
1768 XMStoreInt( dPtr++, v );
1774 case DXGI_FORMAT_R32_SINT:
1775 if ( size >=
sizeof(int32_t) )
1777 uint32_t * __restrict dPtr =
reinterpret_cast<uint32_t*
>(pDestination);
1778 for(
size_t icount = 0; icount < ( size -
sizeof(int32_t) + 1 ); icount +=
sizeof(int32_t) )
1780 if ( sPtr >= ePtr )
break;
1781 XMVECTOR v = XMConvertVectorFloatToInt( *(sPtr++), 0 );
1782 XMStoreInt( dPtr++, v );
1788 case DXGI_FORMAT_D24_UNORM_S8_UINT:
1789 if ( size >=
sizeof(uint32_t) )
1791 static const XMVECTORF32 clamp = { 1.f, 255.f, 0.f, 0.f };
1792 XMVECTOR zero = XMVectorZero();
1793 uint32_t *dPtr =
reinterpret_cast<uint32_t*
>(pDestination);
1794 for(
size_t icount = 0; icount < ( size -
sizeof(uint32_t) + 1 ); icount +=
sizeof(uint32_t) )
1796 if ( sPtr >= ePtr )
break;
1798 XMStoreFloat4( &f, XMVectorClamp( *sPtr++, zero, clamp ) );
1799 *dPtr++ = (
static_cast<uint32_t
>( f.x * 16777215.f ) & 0xFFFFFF)
1800 | ((
static_cast<uint32_t
>( f.y ) & 0xFF) << 24);
1806 case DXGI_FORMAT_R8G8_UNORM:
1809 case DXGI_FORMAT_R8G8_UINT:
1812 case DXGI_FORMAT_R8G8_SNORM:
1815 case DXGI_FORMAT_R8G8_SINT:
1818 case DXGI_FORMAT_R16_FLOAT:
1819 if ( size >=
sizeof(HALF) )
1821 HALF * __restrict dPtr =
reinterpret_cast<HALF*
>(pDestination);
1822 for(
size_t icount = 0; icount < ( size -
sizeof(HALF) + 1 ); icount +=
sizeof(HALF) )
1824 if ( sPtr >= ePtr )
break;
1825 float v = XMVectorGetX( *sPtr++ );
1826 *(dPtr++) = XMConvertFloatToHalf(v);
1832 case DXGI_FORMAT_D16_UNORM:
1833 case DXGI_FORMAT_R16_UNORM:
1834 if ( size >=
sizeof(uint16_t) )
1836 uint16_t * __restrict dPtr =
reinterpret_cast<uint16_t*
>(pDestination);
1837 for(
size_t icount = 0; icount < ( size -
sizeof(uint16_t) + 1 ); icount +=
sizeof(uint16_t) )
1839 if ( sPtr >= ePtr )
break;
1840 float v = XMVectorGetX( *sPtr++ );
1841 v = std::max<float>( std::min<float>( v, 1.f ), 0.f );
1842 *(dPtr++) = static_cast<uint16_t>( v*65535.f + 0.5f );
1848 case DXGI_FORMAT_R16_UINT:
1849 if ( size >=
sizeof(uint16_t) )
1851 uint16_t * __restrict dPtr =
reinterpret_cast<uint16_t*
>(pDestination);
1852 for(
size_t icount = 0; icount < ( size -
sizeof(uint16_t) + 1 ); icount +=
sizeof(uint16_t) )
1854 if ( sPtr >= ePtr )
break;
1855 float v = XMVectorGetX( *sPtr++ );
1856 v = std::max<float>( std::min<float>( v, 65535.f ), 0.f );
1857 *(dPtr++) = static_cast<uint16_t>(v);
1863 case DXGI_FORMAT_R16_SNORM:
1864 if ( size >=
sizeof(int16_t) )
1866 int16_t * __restrict dPtr =
reinterpret_cast<int16_t*
>(pDestination);
1867 for(
size_t icount = 0; icount < ( size -
sizeof(int16_t) + 1 ); icount +=
sizeof(int16_t) )
1869 if ( sPtr >= ePtr )
break;
1870 float v = XMVectorGetX( *sPtr++ );
1871 v = std::max<float>( std::min<float>( v, 1.f ), -1.f );
1872 *(dPtr++) = static_cast<uint16_t>( v * 32767.f );
1878 case DXGI_FORMAT_R16_SINT:
1879 if ( size >=
sizeof(int16_t) )
1881 int16_t * __restrict dPtr =
reinterpret_cast<int16_t*
>(pDestination);
1882 for(
size_t icount = 0; icount < ( size -
sizeof(int16_t) + 1 ); icount +=
sizeof(int16_t) )
1884 if ( sPtr >= ePtr )
break;
1885 float v = XMVectorGetX( *sPtr++ );
1886 v = std::max<float>( std::min<float>( v, 32767.f ), -32767.f );
1887 *(dPtr++) = static_cast<int16_t>(v);
1893 case DXGI_FORMAT_R8_UNORM:
1894 if ( size >=
sizeof(
uint8_t) )
1896 uint8_t * __restrict dPtr =
reinterpret_cast<uint8_t*
>(pDestination);
1897 for(
size_t icount = 0; icount <
size; icount +=
sizeof(
uint8_t) )
1899 if ( sPtr >= ePtr )
break;
1900 float v = XMVectorGetX( *sPtr++ );
1901 v = std::max<float>( std::min<float>( v, 1.f ), 0.f );
1902 *(dPtr++) = static_cast<uint8_t>( v * 255.f);
1908 case DXGI_FORMAT_R8_UINT:
1909 if ( size >=
sizeof(
uint8_t) )
1911 uint8_t * __restrict dPtr =
reinterpret_cast<uint8_t*
>(pDestination);
1912 for(
size_t icount = 0; icount <
size; icount +=
sizeof(
uint8_t) )
1914 if ( sPtr >= ePtr )
break;
1915 float v = XMVectorGetX( *sPtr++ );
1916 v = std::max<float>( std::min<float>( v, 255.f ), 0.f );
1917 *(dPtr++) = static_cast<uint8_t>(v);
1923 case DXGI_FORMAT_R8_SNORM:
1924 if ( size >=
sizeof(int8_t) )
1926 int8_t * __restrict dPtr =
reinterpret_cast<int8_t*
>(pDestination);
1927 for(
size_t icount = 0; icount <
size; icount +=
sizeof(int8_t) )
1929 if ( sPtr >= ePtr )
break;
1930 float v = XMVectorGetX( *sPtr++ );
1931 v = std::max<float>( std::min<float>( v, 1.f ), -1.f );
1932 *(dPtr++) = static_cast<int8_t>( v * 127.f );
1938 case DXGI_FORMAT_R8_SINT:
1939 if ( size >=
sizeof(int8_t) )
1941 int8_t * __restrict dPtr =
reinterpret_cast<int8_t*
>(pDestination);
1942 for(
size_t icount = 0; icount <
size; icount +=
sizeof(int8_t) )
1944 if ( sPtr >= ePtr )
break;
1945 float v = XMVectorGetX( *sPtr++ );
1946 v = std::max<float>( std::min<float>( v, 127.f ), -127.f );
1947 *(dPtr++) = static_cast<int8_t>( v );
1953 case DXGI_FORMAT_A8_UNORM:
1954 if ( size >=
sizeof(
uint8_t) )
1956 uint8_t * __restrict dPtr =
reinterpret_cast<uint8_t*
>(pDestination);
1957 for(
size_t icount = 0; icount <
size; icount +=
sizeof(
uint8_t) )
1959 if ( sPtr >= ePtr )
break;
1960 float v = XMVectorGetW( *sPtr++ );
1961 v = std::max<float>( std::min<float>( v, 1.f ), 0.f );
1962 *(dPtr++) = static_cast<uint8_t>( v * 255.f);
1968 case DXGI_FORMAT_R1_UNORM:
1969 if ( size >=
sizeof(
uint8_t) )
1971 uint8_t * __restrict dPtr =
reinterpret_cast<uint8_t*
>(pDestination);
1972 for(
size_t icount = 0; icount <
size; icount +=
sizeof(
uint8_t) )
1975 for(
size_t bcount = 8; bcount > 0; --bcount )
1977 if ( sPtr >= ePtr )
break;
1978 float v = XMVectorGetX( *sPtr++ );
1984 pixels |= 1 << (bcount-1);
1992 case DXGI_FORMAT_R9G9B9E5_SHAREDEXP:
1993 #if DIRECTX_MATH_VERSION >= 306
1996 if ( size >=
sizeof(XMFLOAT3SE) )
1998 static const float maxf9 = float(0x1FF << 7);
1999 static const float minf9 = float(1.f / (1 << 16));
2001 XMFLOAT3SE * __restrict dPtr =
reinterpret_cast<XMFLOAT3SE*
>(pDestination);
2002 for(
size_t icount = 0; icount < ( size -
sizeof(XMFLOAT3SE) + 1 ); icount +=
sizeof(XMFLOAT3SE) )
2004 if ( sPtr >= ePtr )
break;
2007 XMStoreFloat3( &rgb, *(sPtr++) );
2009 float r = (rgb.x >= 0.f) ? ( (rgb.x > maxf9) ? maxf9 : rgb.x ) : 0.f;
2010 float g = (rgb.y >= 0.f) ? ( (rgb.y > maxf9) ? maxf9 : rgb.y ) : 0.f;
2011 float b = (rgb.z >= 0.f) ? ( (rgb.z > maxf9) ? maxf9 : rgb.z ) : 0.f;
2013 const float max_rg = (r > g) ? r : g;
2014 const float max_rgb = (max_rg >
b) ? max_rg : b;
2016 const float maxColor = (max_rgb > minf9) ? max_rgb : minf9;
2018 union {
float f; INT32 i; } fi;
2022 dPtr->e = (fi.i - 0x37800000) >> 23;
2024 fi.i = 0x83000000 - fi.i;
2025 float ScaleR = fi.f;
2027 dPtr->xm =
static_cast<uint32_t
>( round_to_nearest(r * ScaleR) );
2028 dPtr->ym =
static_cast<uint32_t
>( round_to_nearest(g * ScaleR) );
2029 dPtr->zm =
static_cast<uint32_t
>( round_to_nearest(b * ScaleR) );
2037 case DXGI_FORMAT_R8G8_B8G8_UNORM:
2038 if ( size >=
sizeof(XMUBYTEN4) )
2040 XMUBYTEN4 * __restrict dPtr =
reinterpret_cast<XMUBYTEN4*
>(pDestination);
2041 for(
size_t icount = 0; icount < ( size -
sizeof(XMUBYTEN4) + 1 ); icount +=
sizeof(XMUBYTEN4) )
2043 if ( sPtr >= ePtr )
break;
2044 XMVECTOR v0 = *sPtr++;
2045 XMVECTOR v1 = (sPtr < ePtr) ? XMVectorSplatY( *sPtr++ ) : XMVectorZero();
2046 XMVECTOR v = XMVectorSelect( v1, v0, g_XMSelect1110 );
2047 XMStoreUByteN4( dPtr++, v );
2053 case DXGI_FORMAT_G8R8_G8B8_UNORM:
2054 if ( size >=
sizeof(XMUBYTEN4) )
2056 static XMVECTORI32 select1101 = {XM_SELECT_1, XM_SELECT_1, XM_SELECT_0, XM_SELECT_1};
2058 XMUBYTEN4 * __restrict dPtr =
reinterpret_cast<XMUBYTEN4*
>(pDestination);
2059 for(
size_t icount = 0; icount < ( size -
sizeof(XMUBYTEN4) + 1 ); icount +=
sizeof(XMUBYTEN4) )
2061 if ( sPtr >= ePtr )
break;
2062 XMVECTOR v0 = XMVectorSwizzle<1, 0, 3, 2>( *sPtr++ );
2063 XMVECTOR v1 = (sPtr < ePtr) ? XMVectorSplatY( *sPtr++ ) : XMVectorZero();
2064 XMVECTOR v = XMVectorSelect( v1, v0, select1101 );
2065 XMStoreUByteN4( dPtr++, v );
2071 case DXGI_FORMAT_B5G6R5_UNORM:
2072 if ( size >=
sizeof(XMU565) )
2074 static XMVECTORF32 s_Scale = { 31.f, 63.f, 31.f, 1.f };
2075 XMU565 * __restrict dPtr =
reinterpret_cast<XMU565*
>(pDestination);
2076 for(
size_t icount = 0; icount < ( size -
sizeof(XMU565) + 1 ); icount +=
sizeof(XMU565) )
2078 if ( sPtr >= ePtr )
break;
2079 XMVECTOR v = XMVectorSwizzle<2, 1, 0, 3>( *sPtr++ );
2080 v = XMVectorMultiply( v, s_Scale );
2081 XMStoreU565( dPtr++, v );
2087 case DXGI_FORMAT_B5G5R5A1_UNORM:
2088 if ( size >=
sizeof(XMU555) )
2090 static XMVECTORF32 s_Scale = { 31.f, 31.f, 31.f, 1.f };
2091 XMU555 * __restrict dPtr =
reinterpret_cast<XMU555*
>(pDestination);
2092 for(
size_t icount = 0; icount < ( size -
sizeof(XMU555) + 1 ); icount +=
sizeof(XMU555) )
2094 if ( sPtr >= ePtr )
break;
2095 XMVECTOR v = XMVectorSwizzle<2, 1, 0, 3>( *sPtr++ );
2096 v = XMVectorMultiply( v, s_Scale );
2097 XMStoreU555( dPtr, v );
2098 dPtr->w = ( XMVectorGetW( v ) >
threshold ) ? 1 : 0;
2105 case DXGI_FORMAT_B8G8R8A8_UNORM:
2106 case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB:
2107 if ( size >=
sizeof(XMUBYTEN4) )
2109 XMUBYTEN4 * __restrict dPtr =
reinterpret_cast<XMUBYTEN4*
>(pDestination);
2110 for(
size_t icount = 0; icount < ( size -
sizeof(XMUBYTEN4) + 1 ); icount +=
sizeof(XMUBYTEN4) )
2112 if ( sPtr >= ePtr )
break;
2113 XMVECTOR v = XMVectorSwizzle<2, 1, 0, 3>( *sPtr++ );
2114 XMStoreUByteN4( dPtr++, v );
2120 case DXGI_FORMAT_B8G8R8X8_UNORM:
2121 case DXGI_FORMAT_B8G8R8X8_UNORM_SRGB:
2122 if ( size >=
sizeof(XMUBYTEN4) )
2124 XMUBYTEN4 * __restrict dPtr =
reinterpret_cast<XMUBYTEN4*
>(pDestination);
2125 for(
size_t icount = 0; icount < ( size -
sizeof(XMUBYTEN4) + 1 ); icount +=
sizeof(XMUBYTEN4) )
2127 if ( sPtr >= ePtr )
break;
2128 XMVECTOR v = XMVectorPermute<2, 1, 0, 7>( *sPtr++, g_XMIdentityR3 );
2129 XMStoreUByteN4( dPtr++, v );
2135 case DXGI_FORMAT_AYUV:
2136 if ( size >=
sizeof(XMUBYTEN4) )
2138 XMUBYTEN4 * __restrict dPtr =
reinterpret_cast<XMUBYTEN4*
>(pDestination);
2139 for(
size_t icount = 0; icount < ( size -
sizeof(XMUBYTEN4) + 1 ); icount +=
sizeof(XMUBYTEN4) )
2141 if ( sPtr >= ePtr )
break;
2144 XMStoreUByteN4( &rgba, *sPtr++ );
2152 int y = ( ( 66 * rgba.x + 129 * rgba.y + 25 * rgba.z + 128) >> 8) + 16;
2153 int u = ( ( -38 * rgba.x - 74 * rgba.y + 112 * rgba.z + 128) >> 8) + 128;
2154 int v = ( ( 112 * rgba.x - 94 * rgba.y - 18 * rgba.z + 128) >> 8) + 128;
2156 dPtr->x =
static_cast<uint8_t>( std::min<int>( std::max<int>( v, 0 ), 255 ) );
2157 dPtr->y =
static_cast<uint8_t>( std::min<int>( std::max<int>( u, 0 ), 255 ) );
2158 dPtr->z =
static_cast<uint8_t>( std::min<int>( std::max<int>(
y, 0 ), 255 ) );
2166 case DXGI_FORMAT_Y410:
2167 if ( size >=
sizeof(XMUDECN4) )
2169 XMUDECN4 * __restrict dPtr =
reinterpret_cast<XMUDECN4*
>(pDestination);
2170 for(
size_t icount = 0; icount < ( size -
sizeof(XMUDECN4) + 1 ); icount +=
sizeof(XMUDECN4) )
2172 if ( sPtr >= ePtr )
break;
2175 XMStoreUDecN4( &rgba, *sPtr++ );
2187 int y =
static_cast<int>( ( 16780 * r + 32942 * g + 6544 * b + 32768) >> 16) + 64;
2188 int u =
static_cast<int>( ( -9683 * r - 19017 * g + 28700 * b + 32768) >> 16) + 512;
2189 int v =
static_cast<int>( ( 28700 * r - 24033 * g - 4667 * b + 32768) >> 16) + 512;
2191 dPtr->x =
static_cast<uint32_t
>( std::min<int>( std::max<int>( u, 0 ), 1023 ) );
2192 dPtr->y =
static_cast<uint32_t
>( std::min<int>( std::max<int>(
y, 0 ), 1023 ) );
2193 dPtr->z =
static_cast<uint32_t
>( std::min<int>( std::max<int>( v, 0 ), 1023 ) );
2201 case DXGI_FORMAT_Y416:
2202 if ( size >=
sizeof(XMUSHORTN4) )
2204 XMUSHORTN4 * __restrict dPtr =
reinterpret_cast<XMUSHORTN4*
>(pDestination);
2205 for(
size_t icount = 0; icount < ( size -
sizeof(XMUSHORTN4) + 1 ); icount +=
sizeof(XMUSHORTN4) )
2207 if ( sPtr >= ePtr )
break;
2210 XMStoreUShortN4( &rgba, *sPtr++ );
2218 int64_t r = int64_t(rgba.x);
2219 int64_t g = int64_t(rgba.y);
2220 int64_t
b = int64_t(rgba.z);
2222 int y =
static_cast<int>( ( 16763 * r + 32910 * g + 6537 * b + 32768) >> 16) + 4096;
2223 int u =
static_cast<int>( ( -9674 * r - 18998 * g + 28672 * b + 32768) >> 16) + 32768;
2224 int v =
static_cast<int>( ( 28672 * r - 24010 * g - 4662 * b + 32768) >> 16) + 32768;
2226 dPtr->x =
static_cast<uint16_t
>( std::min<int>( std::max<int>( u, 0 ), 65535 ) );
2227 dPtr->y =
static_cast<uint16_t
>( std::min<int>( std::max<int>(
y, 0 ), 65535 ) );
2228 dPtr->z =
static_cast<uint16_t
>( std::min<int>( std::max<int>( v, 0 ), 65535 ) );
2236 case DXGI_FORMAT_YUY2:
2237 if ( size >=
sizeof(XMUBYTEN4) )
2239 XMUBYTEN4 * __restrict dPtr =
reinterpret_cast<XMUBYTEN4*
>(pDestination);
2240 for(
size_t icount = 0; icount < ( size -
sizeof(XMUBYTEN4) + 1 ); icount +=
sizeof(XMUBYTEN4) )
2242 if ( sPtr >= ePtr )
break;
2245 XMStoreUByteN4( &rgb1, *sPtr++ );
2248 int y0 = ( ( 66 * rgb1.x + 129 * rgb1.y + 25 * rgb1.z + 128) >> 8) + 16;
2249 int u0 = ( ( -38 * rgb1.x - 74 * rgb1.y + 112 * rgb1.z + 128) >> 8) + 128;
2250 int v0 = ( ( 112 * rgb1.x - 94 * rgb1.y - 18 * rgb1.z + 128) >> 8) + 128;
2255 XMStoreUByteN4( &rgb2, *sPtr++ );
2259 rgb2.x = rgb2.y = rgb2.z = rgb2.w = 0;
2262 int y1 = ( ( 66 * rgb2.x + 129 * rgb2.y + 25 * rgb2.z + 128) >> 8) + 16;
2263 int u1 = ( ( -38 * rgb2.x - 74 * rgb2.y + 112 * rgb2.z + 128) >> 8) + 128;
2264 int v1 = ( ( 112 * rgb2.x - 94 * rgb2.y - 18 * rgb2.z + 128) >> 8) + 128;
2266 dPtr->x =
static_cast<uint8_t>( std::min<int>( std::max<int>( y0, 0 ), 255 ) );
2267 dPtr->y =
static_cast<uint8_t>( std::min<int>( std::max<int>( (u0 + u1) >> 1, 0 ), 255 ) );
2268 dPtr->z =
static_cast<uint8_t>( std::min<int>( std::max<int>( y1, 0 ), 255 ) );
2269 dPtr->w =
static_cast<uint8_t>( std::min<int>( std::max<int>( (v0 + v1) >> 1, 0 ), 255 ) );
2276 case DXGI_FORMAT_Y210:
2278 if ( size >=
sizeof(XMUSHORTN4) )
2280 XMUSHORTN4 * __restrict dPtr =
reinterpret_cast<XMUSHORTN4*
>(pDestination);
2281 for(
size_t icount = 0; icount < ( size -
sizeof(XMUSHORTN4) + 1 ); icount +=
sizeof(XMUSHORTN4) )
2283 if ( sPtr >= ePtr )
break;
2286 XMStoreUDecN4( &rgb1, *sPtr++ );
2293 int y0 =
static_cast<int>( ( 16780 * r + 32942 * g + 6544 * b + 32768) >> 16) + 64;
2294 int u0 =
static_cast<int>( ( -9683 * r - 19017 * g + 28700 * b + 32768) >> 16) + 512;
2295 int v0 =
static_cast<int>( ( 28700 * r - 24033 * g - 4667 * b + 32768) >> 16) + 512;
2300 XMStoreUDecN4( &rgb2, *sPtr++ );
2304 rgb2.x = rgb2.y = rgb2.z = rgb2.w = 0;
2311 int y1 =
static_cast<int>( ( 16780 * r + 32942 * g + 6544 * b + 32768) >> 16) + 64;
2312 int u1 =
static_cast<int>( ( -9683 * r - 19017 * g + 28700 * b + 32768) >> 16) + 512;
2313 int v1 =
static_cast<int>( ( 28700 * r - 24033 * g - 4667 * b + 32768) >> 16) + 512;
2315 dPtr->x =
static_cast<uint16_t
>( std::min<int>( std::max<int>( y0, 0 ), 1023 ) << 6 );
2316 dPtr->y =
static_cast<uint16_t
>( std::min<int>( std::max<int>( (u0 + u1) >> 1, 0 ), 1023 ) << 6 );
2317 dPtr->z =
static_cast<uint16_t
>( std::min<int>( std::max<int>( y1, 0 ), 1023 ) << 6 );
2318 dPtr->w =
static_cast<uint16_t
>( std::min<int>( std::max<int>( (v0 + v1) >> 1, 0 ), 1023 ) << 6 );
2325 case DXGI_FORMAT_Y216:
2326 if ( size >=
sizeof(XMUSHORTN4) )
2328 XMUSHORTN4 * __restrict dPtr =
reinterpret_cast<XMUSHORTN4*
>(pDestination);
2329 for(
size_t icount = 0; icount < ( size -
sizeof(XMUSHORTN4) + 1 ); icount +=
sizeof(XMUSHORTN4) )
2331 if ( sPtr >= ePtr )
break;
2334 XMStoreUShortN4( &rgb1, *sPtr++ );
2337 int64_t r = int64_t(rgb1.x);
2338 int64_t g = int64_t(rgb1.y);
2339 int64_t
b = int64_t(rgb1.z);
2341 int y0 =
static_cast<int>( ( 16763 * r + 32910 * g + 6537 * b + 32768) >> 16) + 4096;
2342 int u0 =
static_cast<int>( (-9674 * r - 18998 * g + 28672 * b + 32768) >> 16) + 32768;
2343 int v0 =
static_cast<int>( ( 28672 * r - 24010 * g - 4662 * b + 32768) >> 16) + 32768;
2348 XMStoreUShortN4( &rgb2, *sPtr++ );
2352 rgb2.x = rgb2.y = rgb2.z = rgb2.w = 0;
2355 r = int64_t(rgb2.x);
2356 g = int64_t(rgb2.y);
2357 b = int64_t(rgb2.z);
2359 int y1 =
static_cast<int>( ( 16763 * r + 32910 * g + 6537 * b + 32768) >> 16) + 4096;
2360 int u1 =
static_cast<int>( (-9674 * r - 18998 * g + 28672 * b + 32768) >> 16) + 32768;
2361 int v1 =
static_cast<int>( ( 28672 * r - 24010 * g - 4662 * b + 32768) >> 16) + 32768;
2363 dPtr->x =
static_cast<uint16_t
>( std::min<int>( std::max<int>( y0, 0 ), 65535 ) );
2364 dPtr->y =
static_cast<uint16_t
>( std::min<int>( std::max<int>( (u0 + u1) >> 1, 0 ), 65535 ) );
2365 dPtr->z =
static_cast<uint16_t
>( std::min<int>( std::max<int>( y1, 0 ), 65535 ) );
2366 dPtr->w =
static_cast<uint16_t
>( std::min<int>( std::max<int>( (v0 + v1) >> 1, 0 ), 65535 ) );
2373 case DXGI_FORMAT_B4G4R4A4_UNORM:
2374 if ( size >=
sizeof(XMUNIBBLE4) )
2376 static XMVECTORF32 s_Scale = { 15.f, 15.f, 15.f, 15.f };
2377 XMUNIBBLE4 * __restrict dPtr =
reinterpret_cast<XMUNIBBLE4*
>(pDestination);
2378 for(
size_t icount = 0; icount < ( size -
sizeof(XMUNIBBLE4) + 1 ); icount +=
sizeof(XMUNIBBLE4) )
2380 if ( sPtr >= ePtr )
break;
2381 XMVECTOR v = XMVectorSwizzle<2, 1, 0, 3>( *sPtr++ );
2382 v = XMVectorMultiply( v, s_Scale );
2383 XMStoreUNibble4( dPtr++, v );
2391 if ( size >=
sizeof(XMUDECN4) )
2393 static const XMVECTORF32 Scale = { 1.0f, 1.0f, 1.0f, 3.0f };
2394 static const XMVECTORF32 C = { 31.875f, 31.875f, 31.875f, 3.f };
2396 XMUDECN4 * __restrict dPtr =
reinterpret_cast<XMUDECN4*
>(pDestination);
2397 for(
size_t icount = 0; icount < ( size -
sizeof(XMUDECN4) + 1 ); icount +=
sizeof(XMUDECN4) )
2399 if ( sPtr >= ePtr )
break;
2401 XMVECTOR V = XMVectorMultiply( *sPtr++, Scale );
2402 V = XMVectorClamp( V, g_XMZero, C );
2405 XMStoreFloat4A( &tmp, V );
2407 dPtr->x = FloatTo7e3( tmp.x );
2408 dPtr->y = FloatTo7e3( tmp.y );
2409 dPtr->z = FloatTo7e3( tmp.z );
2410 dPtr->w = (uint32_t)tmp.w;
2419 if ( size >=
sizeof(XMUDECN4) )
2421 static const XMVECTORF32 Scale = { 1.0f, 1.0f, 1.0f, 3.0f };
2422 static const XMVECTORF32 C = { 508.f, 508.f, 508.f, 3.f };
2424 XMUDECN4 * __restrict dPtr =
reinterpret_cast<XMUDECN4*
>(pDestination);
2425 for(
size_t icount = 0; icount < ( size -
sizeof(XMUDECN4) + 1 ); icount +=
sizeof(XMUDECN4) )
2427 if ( sPtr >= ePtr )
break;
2429 XMVECTOR V = XMVectorMultiply( *sPtr++, Scale );
2430 V = XMVectorClamp( V, g_XMZero, C );
2433 XMStoreFloat4A( &tmp, V );
2435 dPtr->x = FloatTo6e4( tmp.x );
2436 dPtr->y = FloatTo6e4( tmp.y );
2437 dPtr->z = FloatTo6e4( tmp.z );
2438 dPtr->w = (uint32_t)tmp.w;
2452 #undef STORE_SCANLINE
2458 _Use_decl_annotations_
2483 for(
size_t h = 0; h < srcImage.
height; ++h )
2498 _Use_decl_annotations_
2501 assert( srcImage.
format == DXGI_FORMAT_R32G32B32A32_FLOAT );
2512 for(
size_t h = 0; h < srcImage.
height; ++h )
2524 _Use_decl_annotations_
2551 _Use_decl_annotations_
2559 assert( metadata.
format == DXGI_FORMAT_R32G32B32A32_FLOAT );
2580 for(
size_t index=0; index < nimages; ++index )
2582 const Image& src = srcImages[ index ];
2583 const Image& dst = dest[ index ];
2585 assert( src.
format == DXGI_FORMAT_R32G32B32A32_FLOAT );
2596 if ( !pSrc || !pDest )
2602 for(
size_t h=0; h < src.
height; ++h )
2626 #if DIRECTX_MATH_VERSION < 306
2629 static const XMVECTORF32 Cutoff = { 0.0031308f, 0.0031308f, 0.0031308f, 1.f };
2630 static const XMVECTORF32 Linear = { 12.92f, 12.92f, 12.92f, 1.f };
2631 static const XMVECTORF32 Scale = { 1.055f, 1.055f, 1.055f, 1.f };
2632 static const XMVECTORF32 Bias = { 0.055f, 0.055f, 0.055f, 0.f };
2633 static const XMVECTORF32 InvGamma = { 1.0f/2.4f, 1.0f/2.4f, 1.0f/2.4f, 1.f };
2635 XMVECTOR V = XMVectorSaturate(rgb);
2636 XMVECTOR V0 = XMVectorMultiply( V, Linear );
2637 XMVECTOR V1 = Scale * XMVectorPow( V, InvGamma ) - Bias;
2638 XMVECTOR select = XMVectorLess( V, Cutoff );
2639 V = XMVectorSelect( V1, V0, select );
2640 return XMVectorSelect( rgb, V, g_XMSelect1110 );
2644 _Use_decl_annotations_
2646 XMVECTOR* pSource,
size_t count, DWORD
flags )
2648 assert( pDestination && size > 0 );
2649 assert( pSource && count > 0 && (((uintptr_t)pSource & 0xF) == 0) );
2654 case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB:
2655 case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB:
2656 case DXGI_FORMAT_B8G8R8X8_UNORM_SRGB:
2660 case DXGI_FORMAT_R32G32B32A32_FLOAT:
2661 case DXGI_FORMAT_R32G32B32_FLOAT:
2662 case DXGI_FORMAT_R16G16B16A16_FLOAT:
2663 case DXGI_FORMAT_R16G16B16A16_UNORM:
2664 case DXGI_FORMAT_R32G32_FLOAT:
2665 case DXGI_FORMAT_R10G10B10A2_UNORM:
2666 case DXGI_FORMAT_R11G11B10_FLOAT:
2667 case DXGI_FORMAT_R8G8B8A8_UNORM:
2668 case DXGI_FORMAT_R16G16_FLOAT:
2669 case DXGI_FORMAT_R16G16_UNORM:
2670 case DXGI_FORMAT_R32_FLOAT:
2671 case DXGI_FORMAT_R8G8_UNORM:
2672 case DXGI_FORMAT_R16_FLOAT:
2673 case DXGI_FORMAT_R16_UNORM:
2674 case DXGI_FORMAT_R8_UNORM:
2675 case DXGI_FORMAT_R9G9B9E5_SHAREDEXP:
2676 case DXGI_FORMAT_R8G8_B8G8_UNORM:
2677 case DXGI_FORMAT_G8R8_G8B8_UNORM:
2678 case DXGI_FORMAT_B5G6R5_UNORM:
2679 case DXGI_FORMAT_B5G5R5A1_UNORM:
2680 case DXGI_FORMAT_B8G8R8A8_UNORM:
2681 case DXGI_FORMAT_B8G8R8X8_UNORM:
2682 case DXGI_FORMAT_B4G4R4A4_UNORM:
2696 XMVECTOR* ptr = pSource;
2697 for(
size_t i=0; i <
count; ++i, ++ptr )
2703 return _StoreScanline( pDestination, size, format, pSource, count );
2714 #if DIRECTX_MATH_VERSION < 306
2717 static const XMVECTORF32 Cutoff = { 0.04045f, 0.04045f, 0.04045f, 1.f };
2718 static const XMVECTORF32 ILinear = { 1.f/12.92f, 1.f/12.92f, 1.f/12.92f, 1.f };
2719 static const XMVECTORF32 Scale = { 1.f/1.055f, 1.f/1.055f, 1.f/1.055f, 1.f };
2720 static const XMVECTORF32 Bias = { 0.055f, 0.055f, 0.055f, 0.f };
2721 static const XMVECTORF32 Gamma = { 2.4f, 2.4f, 2.4f, 1.f };
2723 XMVECTOR V = XMVectorSaturate(srgb);
2724 XMVECTOR V0 = XMVectorMultiply( V, ILinear );
2725 XMVECTOR V1 = XMVectorPow( (V + Bias) * Scale, Gamma );
2726 XMVECTOR select = XMVectorGreater( V, Cutoff );
2727 V = XMVectorSelect( V0, V1, select );
2728 return XMVectorSelect( srgb, V, g_XMSelect1110 );
2732 _Use_decl_annotations_
2736 assert( pDestination && count > 0 && (((uintptr_t)pDestination & 0xF) == 0) );
2737 assert( pSource && size > 0 );
2742 case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB:
2743 case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB:
2744 case DXGI_FORMAT_B8G8R8X8_UNORM_SRGB:
2748 case DXGI_FORMAT_R32G32B32A32_FLOAT:
2749 case DXGI_FORMAT_R32G32B32_FLOAT:
2750 case DXGI_FORMAT_R16G16B16A16_FLOAT:
2751 case DXGI_FORMAT_R16G16B16A16_UNORM:
2752 case DXGI_FORMAT_R32G32_FLOAT:
2753 case DXGI_FORMAT_R10G10B10A2_UNORM:
2754 case DXGI_FORMAT_R11G11B10_FLOAT:
2755 case DXGI_FORMAT_R8G8B8A8_UNORM:
2756 case DXGI_FORMAT_R16G16_FLOAT:
2757 case DXGI_FORMAT_R16G16_UNORM:
2758 case DXGI_FORMAT_R32_FLOAT:
2759 case DXGI_FORMAT_R8G8_UNORM:
2760 case DXGI_FORMAT_R16_FLOAT:
2761 case DXGI_FORMAT_R16_UNORM:
2762 case DXGI_FORMAT_R8_UNORM:
2763 case DXGI_FORMAT_R9G9B9E5_SHAREDEXP:
2764 case DXGI_FORMAT_R8G8_B8G8_UNORM:
2765 case DXGI_FORMAT_G8R8_G8B8_UNORM:
2766 case DXGI_FORMAT_B5G6R5_UNORM:
2767 case DXGI_FORMAT_B5G5R5A1_UNORM:
2768 case DXGI_FORMAT_B8G8R8A8_UNORM:
2769 case DXGI_FORMAT_B8G8R8X8_UNORM:
2770 case DXGI_FORMAT_B4G4R4A4_UNORM:
2779 if (
_LoadScanline( pDestination, count, pSource, size, format ) )
2784 XMVECTOR* ptr = pDestination;
2785 for(
size_t i=0; i <
count; ++i, ++ptr )
2895 #pragma prefast( suppress : 25004, "Signature must match bsearch_s" );
2898 UNREFERENCED_PARAMETER(context);
2905 _Use_decl_annotations_
2922 return (in) ? in->
flags : 0;
2925 _Use_decl_annotations_
2928 assert( pBuffer && count > 0 && (((uintptr_t)pBuffer & 0xF) == 0) );
2965 case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB:
2966 case DXGI_FORMAT_BC1_UNORM_SRGB:
2967 case DXGI_FORMAT_BC2_UNORM_SRGB:
2968 case DXGI_FORMAT_BC3_UNORM_SRGB:
2969 case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB:
2970 case DXGI_FORMAT_B8G8R8X8_UNORM_SRGB:
2971 case DXGI_FORMAT_BC7_UNORM_SRGB:
2975 case DXGI_FORMAT_A8_UNORM:
2976 case DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM:
2981 switch ( outFormat )
2983 case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB:
2984 case DXGI_FORMAT_BC1_UNORM_SRGB:
2985 case DXGI_FORMAT_BC2_UNORM_SRGB:
2986 case DXGI_FORMAT_BC3_UNORM_SRGB:
2987 case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB:
2988 case DXGI_FORMAT_B8G8R8X8_UNORM_SRGB:
2989 case DXGI_FORMAT_BC7_UNORM_SRGB:
2993 case DXGI_FORMAT_A8_UNORM:
2994 case DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM:
3009 XMVECTOR* ptr = pBuffer;
3010 for(
size_t i=0; i <
count; ++i, ++ptr )
3019 if ( diffFlags != 0)
3026 XMVECTOR* ptr = pBuffer;
3027 for(
size_t i=0; i <
count; ++i )
3030 *ptr++ = XMVectorMultiplyAdd( v, g_XMOneHalf, g_XMOneHalf );
3036 XMVECTOR* ptr = pBuffer;
3037 for(
size_t i=0; i <
count; ++i )
3040 *ptr++ = XMVectorSaturate( v );
3049 static XMVECTORF32 two = { 2.0f, 2.0f, 2.0f, 2.0f };
3050 XMVECTOR* ptr = pBuffer;
3051 for(
size_t i=0; i <
count; ++i )
3054 *ptr++ = XMVectorMultiplyAdd( v, two, g_XMNegativeOne );
3060 XMVECTOR* ptr = pBuffer;
3061 for(
size_t i=0; i <
count; ++i )
3064 *ptr++ = XMVectorClamp( v, g_XMNegativeOne, g_XMOne );
3076 XMVECTOR* ptr = pBuffer;
3077 for(
size_t i=0; i <
count; ++i )
3080 *ptr++ = XMVectorSplatX( v );
3086 XMVECTOR* ptr = pBuffer;
3087 for(
size_t i=0; i <
count; ++i )
3090 *ptr++ = XMVectorSplatW( v );
3098 XMVECTOR* ptr = pBuffer;
3099 for(
size_t i=0; i <
count; ++i )
3102 XMVECTOR v1 = XMVectorSplatX( v );
3103 *ptr++ = XMVectorSelect( v, v1, g_XMSelect1110 );
3109 XMVECTOR* ptr = pBuffer;
3110 for(
size_t i=0; i <
count; ++i )
3113 XMVECTOR v1 = XMVectorSplatX( v );
3114 *ptr++ = XMVectorSelect( v, v1, g_XMSelect1100 );
3131 XMVECTOR* ptr = pBuffer;
3132 for(
size_t i=0; i <
count; ++i )
3135 XMVECTOR v1 = XMVectorSplatY( v );
3136 *ptr++ = XMVectorSelect( v, v1, g_XMSelect1110 );
3143 XMVECTOR* ptr = pBuffer;
3144 for(
size_t i=0; i <
count; ++i )
3147 XMVECTOR v1 = XMVectorSplatZ( v );
3148 *ptr++ = XMVectorSelect( v, v1, g_XMSelect1110 );
3155 XMVECTOR* ptr = pBuffer;
3156 for(
size_t i=0; i <
count; ++i )
3160 *ptr++ = XMVectorSelect( v, v1, g_XMSelect1110 );
3173 XMVECTOR* ptr = pBuffer;
3174 for(
size_t i=0; i <
count; ++i )
3177 XMVECTOR v1 = XMVectorSwizzle<0,2,0,2>( v );
3178 *ptr++ = XMVectorSelect( v, v1, g_XMSelect1100 );
3185 XMVECTOR* ptr = pBuffer;
3186 for(
size_t i=0; i <
count; ++i )
3189 XMVECTOR v1 = XMVectorSwizzle<1,2,3,0>( v );
3190 *ptr++ = XMVectorSelect( v, v1, g_XMSelect1100 );
3209 XMVECTOR* ptr = pBuffer;
3210 for(
size_t i=0; i <
count; ++i, ++ptr )
3227 0.468750f, -0.031250f, 0.343750f, -0.156250f, 0.468750f, -0.031250f, 0.343750f, -0.156250f,
3228 -0.281250f, 0.218750f, -0.406250f, 0.093750f, -0.281250f, 0.218750f, -0.406250f, 0.093750f,
3229 0.281250f, -0.218750f, 0.406250f, -0.093750f, 0.281250f, -0.218750f, 0.406250f, -0.093750f,
3230 -0.468750f, 0.031250f, -0.343750f, 0.156250f, -0.468750f, 0.031250f, -0.343750f, 0.156250f,
3233 static const XMVECTORF32
g_Scale16pc = { 65535.f, 65535.f, 65535.f, 65535.f };
3234 static const XMVECTORF32
g_Scale15pc = { 32767.f, 32767.f, 32767.f, 32767.f };
3235 static const XMVECTORF32
g_Scale10pc = { 1023.f, 1023.f, 1023.f, 3.f };
3236 static const XMVECTORF32
g_Scale8pc = { 255.f, 255.f, 255.f, 255.f };
3237 static const XMVECTORF32
g_Scale7pc = { 127.f, 127.f, 127.f, 127.f };
3240 static const XMVECTORF32
g_Scale4pc = { 15.f, 15.f, 15.f, 15.f };
3242 static const XMVECTORF32
g_ErrorWeight3 = { 3.f/16.f, 3.f/16.f, 3.f/16.f, 3.f/16.f };
3243 static const XMVECTORF32
g_ErrorWeight5 = { 5.f/16.f, 5.f/16.f, 5.f/16.f, 5.f/16.f };
3244 static const XMVECTORF32
g_ErrorWeight1 = { 1.f/16.f, 1.f/16.f, 1.f/16.f, 1.f/16.f };
3245 static const XMVECTORF32
g_ErrorWeight7 = { 7.f/16.f, 7.f/16.f, 7.f/16.f, 7.f/16.f };
3247 #define STORE_SCANLINE( type, scalev, clampzero, norm, itype, mask, row, bgr ) \
3248 if ( size >= sizeof(type) ) \
3250 type * __restrict dest = reinterpret_cast<type*>(pDestination); \
3251 for( size_t i = 0; i < count; ++i ) \
3253 ptrdiff_t index = static_cast<ptrdiff_t>( ( row & 1 ) ? ( count - i - 1 ) : i ); \
3254 ptrdiff_t delta = ( row & 1 ) ? -2 : 0; \
3256 XMVECTOR v = sPtr[ index ]; \
3257 if ( bgr ) { v = XMVectorSwizzle<2, 1, 0, 3>( v ); } \
3258 if ( norm && clampzero ) v = XMVectorSaturate( v ) ; \
3259 else if ( clampzero ) v = XMVectorClamp( v, g_XMZero, scalev ); \
3260 else if ( norm ) v = XMVectorClamp( v, g_XMNegativeOne, g_XMOne ); \
3261 else v = XMVectorClamp( v, -scalev + g_XMOne, scalev ); \
3262 v = XMVectorAdd( v, vError ); \
3263 if ( norm ) v = XMVectorMultiply( v, scalev ); \
3266 if ( pDiffusionErrors ) \
3268 target = XMVectorRound( v ); \
3269 vError = XMVectorSubtract( v, target ); \
3270 if (norm) vError = XMVectorDivide( vError, scalev ); \
3273 pDiffusionErrors[ index-delta ] += XMVectorMultiply( g_ErrorWeight3, vError ); \
3274 pDiffusionErrors[ index+1 ] += XMVectorMultiply( g_ErrorWeight5, vError ); \
3275 pDiffusionErrors[ index+2+delta ] += XMVectorMultiply( g_ErrorWeight1, vError ); \
3276 vError = XMVectorMultiply( vError, g_ErrorWeight7 ); \
3281 target = XMVectorAdd( v, ordered[ index & 3 ] ); \
3282 target = XMVectorRound( target ); \
3285 target = XMVectorMin( scalev, target ); \
3286 target = XMVectorMax( (clampzero) ? g_XMZero : ( -scalev + g_XMOne ), target ); \
3289 XMStoreFloat4A( &tmp, target ); \
3291 auto dPtr = &dest[ index ]; \
3292 dPtr->x = static_cast<itype>( tmp.x ) & mask; \
3293 dPtr->y = static_cast<itype>( tmp.y ) & mask; \
3294 dPtr->z = static_cast<itype>( tmp.z ) & mask; \
3295 dPtr->w = static_cast<itype>( tmp.w ) & mask; \
3301 #define STORE_SCANLINE2( type, scalev, clampzero, norm, itype, mask, row ) \
3303 if ( size >= sizeof(type) ) \
3305 type * __restrict dest = reinterpret_cast<type*>(pDestination); \
3306 for( size_t i = 0; i < count; ++i ) \
3308 ptrdiff_t index = static_cast<ptrdiff_t>( ( row & 1 ) ? ( count - i - 1 ) : i ); \
3309 ptrdiff_t delta = ( row & 1 ) ? -2 : 0; \
3311 XMVECTOR v = sPtr[ index ]; \
3312 if ( norm && clampzero ) v = XMVectorSaturate( v ) ; \
3313 else if ( clampzero ) v = XMVectorClamp( v, g_XMZero, scalev ); \
3314 else if ( norm ) v = XMVectorClamp( v, g_XMNegativeOne, g_XMOne ); \
3315 else v = XMVectorClamp( v, -scalev + g_XMOne, scalev ); \
3316 v = XMVectorAdd( v, vError ); \
3317 if ( norm ) v = XMVectorMultiply( v, scalev ); \
3320 if ( pDiffusionErrors ) \
3322 target = XMVectorRound( v ); \
3323 vError = XMVectorSubtract( v, target ); \
3324 if (norm) vError = XMVectorDivide( vError, scalev ); \
3327 pDiffusionErrors[ index-delta ] += XMVectorMultiply( g_ErrorWeight3, vError ); \
3328 pDiffusionErrors[ index+1 ] += XMVectorMultiply( g_ErrorWeight5, vError ); \
3329 pDiffusionErrors[ index+2+delta ] += XMVectorMultiply( g_ErrorWeight1, vError ); \
3330 vError = XMVectorMultiply( vError, g_ErrorWeight7 ); \
3335 target = XMVectorAdd( v, ordered[ index & 3 ] ); \
3336 target = XMVectorRound( target ); \
3339 target = XMVectorMin( scalev, target ); \
3340 target = XMVectorMax( (clampzero) ? g_XMZero : ( -scalev + g_XMOne ), target ); \
3343 XMStoreFloat4A( &tmp, target ); \
3345 auto dPtr = &dest[ index ]; \
3346 dPtr->x = static_cast<itype>( tmp.x ) & mask; \
3347 dPtr->y = static_cast<itype>( tmp.y ) & mask; \
3353 #define STORE_SCANLINE1( type, scalev, clampzero, norm, mask, row, selectw ) \
3355 if ( size >= sizeof(type) ) \
3357 type * __restrict dest = reinterpret_cast<type*>(pDestination); \
3358 for( size_t i = 0; i < count; ++i ) \
3360 ptrdiff_t index = static_cast<ptrdiff_t>( ( row & 1 ) ? ( count - i - 1 ) : i ); \
3361 ptrdiff_t delta = ( row & 1 ) ? -2 : 0; \
3363 XMVECTOR v = sPtr[ index ]; \
3364 if ( norm && clampzero ) v = XMVectorSaturate( v ) ; \
3365 else if ( clampzero ) v = XMVectorClamp( v, g_XMZero, scalev ); \
3366 else if ( norm ) v = XMVectorClamp( v, g_XMNegativeOne, g_XMOne ); \
3367 else v = XMVectorClamp( v, -scalev + g_XMOne, scalev ); \
3368 v = XMVectorAdd( v, vError ); \
3369 if ( norm ) v = XMVectorMultiply( v, scalev ); \
3372 if ( pDiffusionErrors ) \
3374 target = XMVectorRound( v ); \
3375 vError = XMVectorSubtract( v, target ); \
3376 if (norm) vError = XMVectorDivide( vError, scalev ); \
3379 pDiffusionErrors[ index-delta ] += XMVectorMultiply( g_ErrorWeight3, vError ); \
3380 pDiffusionErrors[ index+1 ] += XMVectorMultiply( g_ErrorWeight5, vError ); \
3381 pDiffusionErrors[ index+2+delta ] += XMVectorMultiply( g_ErrorWeight1, vError ); \
3382 vError = XMVectorMultiply( vError, g_ErrorWeight7 ); \
3387 target = XMVectorAdd( v, ordered[ index & 3 ] ); \
3388 target = XMVectorRound( target ); \
3391 target = XMVectorMin( scalev, target ); \
3392 target = XMVectorMax( (clampzero) ? g_XMZero : ( -scalev + g_XMOne ), target ); \
3394 dest[ index ] = static_cast<type>( (selectw) ? XMVectorGetW( target ) : XMVectorGetX( target ) ) & mask; \
3400 #pragma warning(push)
3401 #pragma warning( disable : 4127 )
3403 _Use_decl_annotations_
3405 XMVECTOR* pSource,
size_t count,
float threshold,
size_t y,
size_t z, XMVECTOR* pDiffusionErrors )
3407 assert( pDestination && size > 0 );
3408 assert( pSource && count > 0 && (((uintptr_t)pSource & 0xF) == 0) );
3411 XMVECTOR ordered[4];
3412 if ( pDiffusionErrors )
3419 XMVECTOR* ptr = pSource;
3420 const XMVECTOR* err = pDiffusionErrors + 1;
3421 for(
size_t i=0; i <
count; ++i )
3424 XMVECTOR v = XMVectorAdd( *ptr, *err++ );
3429 memset( pDiffusionErrors, 0,
sizeof(XMVECTOR)*(count+2) );
3435 XMVECTOR dither = XMLoadFloat4( reinterpret_cast<const XMFLOAT4*>(
g_Dither + (z & 3) + ( (y & 3) * 8 ) ) );
3437 ordered[0] = XMVectorSplatX( dither );
3438 ordered[1] = XMVectorSplatY( dither );
3439 ordered[2] = XMVectorSplatZ( dither );
3440 ordered[3] = XMVectorSplatW( dither );
3443 const XMVECTOR* __restrict sPtr = pSource;
3447 XMVECTOR vError = XMVectorZero();
3451 case DXGI_FORMAT_R16G16B16A16_UNORM:
3454 case DXGI_FORMAT_R16G16B16A16_UINT:
3457 case DXGI_FORMAT_R16G16B16A16_SNORM:
3460 case DXGI_FORMAT_R16G16B16A16_SINT:
3463 case DXGI_FORMAT_R10G10B10A2_UNORM:
3466 case DXGI_FORMAT_R10G10B10A2_UINT:
3469 case DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM:
3470 if ( size >=
sizeof(XMUDEC4) )
3472 static const XMVECTORF32 Scale = { 510.0f, 510.0f, 510.0f, 3.0f };
3473 static const XMVECTORF32 Bias = { 384.0f, 384.0f, 384.0f, 0.0f };
3474 static const XMVECTORF32 MinXR = { -0.7529f, -0.7529f, -0.7529f, 0.f };
3475 static const XMVECTORF32 MaxXR = { 1.2529f, 1.2529f, 1.2529f, 1.0f };
3477 XMUDEC4 * __restrict
dest =
reinterpret_cast<XMUDEC4*
>(pDestination);
3478 for(
size_t i = 0; i <
count; ++i )
3480 ptrdiff_t index =
static_cast<ptrdiff_t
>( ( y & 1 ) ? ( count - i - 1 ) : i );
3481 ptrdiff_t delta = ( y & 1 ) ? -2 : 0;
3483 XMVECTOR v = XMVectorClamp( sPtr[ index ], MinXR, MaxXR );
3484 v = XMVectorMultiplyAdd( v, Scale, vError );
3487 if ( pDiffusionErrors )
3489 target = XMVectorRound( v );
3490 vError = XMVectorSubtract( v, target );
3491 vError = XMVectorDivide( vError, Scale );
3494 pDiffusionErrors[ index-delta ] += XMVectorMultiply(
g_ErrorWeight3, vError );
3495 pDiffusionErrors[ index+1 ] += XMVectorMultiply(
g_ErrorWeight5, vError );
3496 pDiffusionErrors[ index+2+delta ] += XMVectorMultiply(
g_ErrorWeight1, vError );
3502 target = XMVectorAdd( v, ordered[ index & 3 ] );
3503 target = XMVectorRound( target );
3506 target = XMVectorAdd( target, Bias );
3507 target = XMVectorClamp( target, g_XMZero,
g_Scale10pc );
3510 XMStoreFloat4A( &tmp, target );
3512 auto dPtr = &dest[ index ];
3513 dPtr->x =
static_cast<uint16_t
>( tmp.x ) & 0x3FF;
3514 dPtr->y =
static_cast<uint16_t
>( tmp.y ) & 0x3FF;
3515 dPtr->z =
static_cast<uint16_t
>( tmp.z ) & 0x3FF;
3516 dPtr->w =
static_cast<uint16_t
>( tmp.w );
3522 case DXGI_FORMAT_R8G8B8A8_UNORM:
3523 case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB:
3526 case DXGI_FORMAT_R8G8B8A8_UINT:
3529 case DXGI_FORMAT_R8G8B8A8_SNORM:
3532 case DXGI_FORMAT_R8G8B8A8_SINT:
3535 case DXGI_FORMAT_R16G16_UNORM:
3538 case DXGI_FORMAT_R16G16_UINT:
3541 case DXGI_FORMAT_R16G16_SNORM:
3544 case DXGI_FORMAT_R16G16_SINT:
3547 case DXGI_FORMAT_D24_UNORM_S8_UINT:
3548 if ( size >=
sizeof(uint32_t) )
3550 static const XMVECTORF32 Clamp = { 1.f, 255.f, 0.f, 0.f };
3551 static const XMVECTORF32 Scale = { 16777215.f, 1.f, 0.f, 0.f };
3552 static const XMVECTORF32 Scale2 = { 16777215.f, 255.f, 0.f, 0.f };
3554 uint32_t * __restrict
dest =
reinterpret_cast<uint32_t*
>(pDestination);
3555 for(
size_t i = 0; i <
count; ++i )
3557 ptrdiff_t index =
static_cast<ptrdiff_t
>( ( y & 1 ) ? ( count - i - 1 ) : i );
3558 ptrdiff_t delta = ( y & 1 ) ? -2 : 0;
3560 XMVECTOR v = XMVectorClamp( sPtr[ index ], g_XMZero, Clamp );
3561 v = XMVectorAdd( v, vError );
3562 v = XMVectorMultiply( v, Scale );
3565 if ( pDiffusionErrors )
3567 target = XMVectorRound( v );
3568 vError = XMVectorSubtract( v, target );
3569 vError = XMVectorDivide( vError, Scale );
3572 pDiffusionErrors[ index-delta ] += XMVectorMultiply(
g_ErrorWeight3, vError );
3573 pDiffusionErrors[ index+1 ] += XMVectorMultiply(
g_ErrorWeight5, vError );
3574 pDiffusionErrors[ index+2+delta ] += XMVectorMultiply(
g_ErrorWeight1, vError );
3580 target = XMVectorAdd( v, ordered[ index & 3 ] );
3581 target = XMVectorRound( target );
3584 target = XMVectorClamp( target, g_XMZero, Scale2 );
3587 XMStoreFloat4A( &tmp, target );
3589 auto dPtr = &dest[ index ];
3590 *dPtr = (
static_cast<uint32_t
>( tmp.x ) & 0xFFFFFF)
3591 | ((
static_cast<uint32_t
>( tmp.y ) & 0xFF) << 24);
3597 case DXGI_FORMAT_R8G8_UNORM:
3600 case DXGI_FORMAT_R8G8_UINT:
3603 case DXGI_FORMAT_R8G8_SNORM:
3606 case DXGI_FORMAT_R8G8_SINT:
3609 case DXGI_FORMAT_D16_UNORM:
3610 case DXGI_FORMAT_R16_UNORM:
3613 case DXGI_FORMAT_R16_UINT:
3616 case DXGI_FORMAT_R16_SNORM:
3619 case DXGI_FORMAT_R16_SINT:
3622 case DXGI_FORMAT_R8_UNORM:
3625 case DXGI_FORMAT_R8_UINT:
3628 case DXGI_FORMAT_R8_SNORM:
3631 case DXGI_FORMAT_R8_SINT:
3634 case DXGI_FORMAT_A8_UNORM:
3637 case DXGI_FORMAT_B5G6R5_UNORM:
3638 if ( size >=
sizeof(XMU565) )
3640 XMU565 * __restrict
dest =
reinterpret_cast<XMU565*
>(pDestination);
3641 for(
size_t i = 0; i <
count; ++i )
3643 ptrdiff_t index =
static_cast<ptrdiff_t
>( ( y & 1 ) ? ( count - i - 1 ) : i );
3644 ptrdiff_t delta = ( y & 1 ) ? -2 : 0;
3646 XMVECTOR v = XMVectorSwizzle<2, 1, 0, 3>( sPtr[ index ] );
3647 v = XMVectorSaturate( v );
3648 v = XMVectorAdd( v, vError );
3652 if ( pDiffusionErrors )
3654 target = XMVectorRound( v );
3655 vError = XMVectorSubtract( v, target );
3659 pDiffusionErrors[ index-delta ] += XMVectorMultiply(
g_ErrorWeight3, vError );
3660 pDiffusionErrors[ index+1 ] += XMVectorMultiply(
g_ErrorWeight5, vError );
3661 pDiffusionErrors[ index+2+delta ] += XMVectorMultiply(
g_ErrorWeight1, vError );
3667 target = XMVectorAdd( v, ordered[ index & 3 ] );
3668 target = XMVectorRound( target );
3671 target = XMVectorClamp( target, g_XMZero,
g_Scale565pc );
3674 XMStoreFloat4A( &tmp, target );
3676 auto dPtr = &dest[ index ];
3677 dPtr->x =
static_cast<uint16_t
>( tmp.x ) & 0x1F;
3678 dPtr->y =
static_cast<uint16_t
>( tmp.y ) & 0x3F;
3679 dPtr->z =
static_cast<uint16_t
>( tmp.z ) & 0x1F;
3685 case DXGI_FORMAT_B5G5R5A1_UNORM:
3686 if ( size >=
sizeof(XMU555) )
3688 XMU555 * __restrict
dest =
reinterpret_cast<XMU555*
>(pDestination);
3689 for(
size_t i = 0; i <
count; ++i )
3691 ptrdiff_t index =
static_cast<ptrdiff_t
>( ( y & 1 ) ? ( count - i - 1 ) : i );
3692 ptrdiff_t delta = ( y & 1 ) ? -2 : 0;
3694 XMVECTOR v = XMVectorSwizzle<2, 1, 0, 3>( sPtr[ index ] );
3695 v = XMVectorSaturate( v );
3696 v = XMVectorAdd( v, vError );
3700 if ( pDiffusionErrors )
3702 target = XMVectorRound( v );
3703 vError = XMVectorSubtract( v, target );
3707 pDiffusionErrors[ index-delta ] += XMVectorMultiply(
g_ErrorWeight3, vError );
3708 pDiffusionErrors[ index+1 ] += XMVectorMultiply(
g_ErrorWeight5, vError );
3709 pDiffusionErrors[ index+2+delta ] += XMVectorMultiply(
g_ErrorWeight1, vError );
3715 target = XMVectorAdd( v, ordered[ index & 3 ] );
3716 target = XMVectorRound( target );
3722 XMStoreFloat4A( &tmp, target );
3724 auto dPtr = &dest[ index ];
3725 dPtr->x =
static_cast<uint16_t
>( tmp.x ) & 0x1F;
3726 dPtr->y =
static_cast<uint16_t
>( tmp.y ) & 0x1F;
3727 dPtr->z =
static_cast<uint16_t
>( tmp.z ) & 0x1F;
3728 dPtr->w = ( XMVectorGetW( target ) >
threshold ) ? 1 : 0;
3734 case DXGI_FORMAT_B8G8R8A8_UNORM:
3735 case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB:
3738 case DXGI_FORMAT_B8G8R8X8_UNORM:
3739 case DXGI_FORMAT_B8G8R8X8_UNORM_SRGB:
3740 if ( size >=
sizeof(XMUBYTEN4) )
3742 XMUBYTEN4 * __restrict
dest =
reinterpret_cast<XMUBYTEN4*
>(pDestination);
3743 for(
size_t i = 0; i <
count; ++i )
3745 ptrdiff_t index =
static_cast<ptrdiff_t
>( ( y & 1 ) ? ( count - i - 1 ) : i );
3746 ptrdiff_t delta = ( y & 1 ) ? -2 : 0;
3748 XMVECTOR v = XMVectorSwizzle<2, 1, 0, 3>( sPtr[ index ] );
3749 v = XMVectorSaturate( v );
3750 v = XMVectorAdd( v, vError );
3754 if ( pDiffusionErrors )
3756 target = XMVectorRound( v );
3757 vError = XMVectorSubtract( v, target );
3758 vError = XMVectorDivide( vError,
g_Scale8pc );
3761 pDiffusionErrors[ index-delta ] += XMVectorMultiply(
g_ErrorWeight3, vError );
3762 pDiffusionErrors[ index+1 ] += XMVectorMultiply(
g_ErrorWeight5, vError );
3763 pDiffusionErrors[ index+2+delta ] += XMVectorMultiply(
g_ErrorWeight1, vError );
3769 target = XMVectorAdd( v, ordered[ index & 3 ] );
3770 target = XMVectorRound( target );
3773 target = XMVectorClamp( target, g_XMZero,
g_Scale8pc );
3776 XMStoreFloat4A( &tmp, target );
3778 auto dPtr = &dest[ index ];
3779 dPtr->x =
static_cast<uint8_t>( tmp.x ) & 0xFF;
3780 dPtr->y =
static_cast<uint8_t>( tmp.y ) & 0xFF;
3781 dPtr->z =
static_cast<uint8_t>( tmp.z ) & 0xFF;
3788 case DXGI_FORMAT_B4G4R4A4_UNORM:
3792 return _StoreScanline( pDestination, size, format, pSource, count, threshold );
3796 #pragma warning(pop)
3798 #undef STORE_SCANLINE
3799 #undef STORE_SCANLINE2
3800 #undef STORE_SCANLINE1
3806 static inline bool _UseWICConversion( _In_ DWORD filter, _In_ DXGI_FORMAT sformat, _In_ DXGI_FORMAT tformat,
3807 _Out_ WICPixelFormatGUID& pfGUID, _Out_ WICPixelFormatGUID& targetGUID )
3809 memcpy( &pfGUID, &GUID_NULL,
sizeof(GUID) );
3810 memcpy( &targetGUID, &GUID_NULL,
sizeof(GUID) );
3839 case DXGI_FORMAT_R32G32B32A32_FLOAT:
3840 case DXGI_FORMAT_R32G32B32_FLOAT:
3841 case DXGI_FORMAT_R16G16B16A16_FLOAT:
3844 case DXGI_FORMAT_R16_FLOAT:
3845 case DXGI_FORMAT_R32_FLOAT:
3846 case DXGI_FORMAT_D32_FLOAT:
3848 case DXGI_FORMAT_A8_UNORM:
3854 case DXGI_FORMAT_R16_FLOAT:
3857 case DXGI_FORMAT_R32_FLOAT:
3858 case DXGI_FORMAT_D32_FLOAT:
3860 case DXGI_FORMAT_A8_UNORM:
3866 case DXGI_FORMAT_A8_UNORM:
3873 case DXGI_FORMAT_A8_UNORM:
3907 _In_
const WICPixelFormatGUID& targetGUID,
3908 _In_ DWORD filter, _In_
float threshold, _In_
const Image& destImage )
3910 assert( srcImage.width == destImage.width );
3911 assert( srcImage.height == destImage.height );
3913 IWICImagingFactory* pWIC =
_GetWIC();
3915 return E_NOINTERFACE;
3917 ComPtr<IWICFormatConverter> FC;
3918 HRESULT hr = pWIC->CreateFormatConverter( FC.GetAddressOf() );
3925 BOOL canConvert = FALSE;
3926 hr = FC->CanConvert( pfGUID, targetGUID, &canConvert );
3927 if ( FAILED(hr) || !canConvert )
3930 return E_UNEXPECTED;
3933 ComPtr<IWICBitmap> source;
3934 hr = pWIC->CreateBitmapFromMemory( static_cast<UINT>( srcImage.width ), static_cast<UINT>( srcImage.height ), pfGUID,
3935 static_cast<UINT>( srcImage.rowPitch ), static_cast<UINT>( srcImage.slicePitch ),
3936 srcImage.pixels, source.GetAddressOf() );
3940 hr = FC->Initialize( source.Get(), targetGUID,
_GetWICDither( filter ), 0, threshold * 100.f, WICBitmapPaletteTypeCustom );
3944 hr = FC->CopyPixels( 0, static_cast<UINT>( destImage.rowPitch ), static_cast<UINT>( destImage.slicePitch ), destImage.pixels );
3957 assert( srcImage.width == destImage.width );
3958 assert( srcImage.height == destImage.height );
3960 const uint8_t *pSrc = srcImage.pixels;
3961 uint8_t *pDest = destImage.pixels;
3962 if ( !pSrc || !pDest )
3965 size_t width = srcImage.width;
3970 ScopedAlignedArrayXMVECTOR scanline( reinterpret_cast<XMVECTOR*>( _aligned_malloc( (
sizeof(XMVECTOR)*(width*2 + 2)), 16 ) ) );
3972 return E_OUTOFMEMORY;
3974 XMVECTOR* pDiffusionErrors = scanline.get() + width;
3975 memset( pDiffusionErrors, 0,
sizeof(XMVECTOR)*(width+2) );
3977 for(
size_t h = 0; h < srcImage.height; ++h )
3979 if ( !
_LoadScanline( scanline.get(), width, pSrc, srcImage.rowPitch, srcImage.format ) )
3982 _ConvertScanline( scanline.get(), width, destImage.format, srcImage.format, filter );
3987 pSrc += srcImage.rowPitch;
3988 pDest += destImage.rowPitch;
3995 return E_OUTOFMEMORY;
4000 for(
size_t h = 0; h < srcImage.height; ++h )
4002 if ( !
_LoadScanline( scanline.get(), width, pSrc, srcImage.rowPitch, srcImage.format ) )
4005 _ConvertScanline( scanline.get(), width, destImage.format, srcImage.format, filter );
4010 pSrc += srcImage.rowPitch;
4011 pDest += destImage.rowPitch;
4017 for(
size_t h = 0; h < srcImage.height; ++h )
4019 if ( !
_LoadScanline( scanline.get(), width, pSrc, srcImage.rowPitch, srcImage.format ) )
4022 _ConvertScanline( scanline.get(), width, destImage.format, srcImage.format, filter );
4027 pSrc += srcImage.rowPitch;
4028 pDest += destImage.rowPitch;
4042 case DXGI_FORMAT_NV12:
4043 case DXGI_FORMAT_NV11:
4044 return DXGI_FORMAT_YUY2;
4046 case DXGI_FORMAT_P010:
4047 return DXGI_FORMAT_Y210;
4049 case DXGI_FORMAT_P016:
4050 return DXGI_FORMAT_Y216;
4057 return DXGI_FORMAT_UNKNOWN;
4065 #define CONVERT_420_TO_422( srcType, destType )\
4067 size_t rowPitch = srcImage.rowPitch;\
4069 auto sourceE = reinterpret_cast<const srcType*>( pSrc + srcImage.slicePitch );\
4070 auto pSrcUV = pSrc + ( srcImage.height * rowPitch );\
4072 for( size_t y = 0; y < srcImage.height; y+= 2 )\
4074 auto sPtrY0 = reinterpret_cast<const srcType*>( pSrc );\
4075 auto sPtrY2 = reinterpret_cast<const srcType*>( pSrc + rowPitch );\
4076 auto sPtrUV = reinterpret_cast<const srcType*>( pSrcUV );\
4078 destType * __restrict dPtr0 = reinterpret_cast<destType*>(pDest);\
4079 destType * __restrict dPtr1 = reinterpret_cast<destType*>(pDest + destImage.rowPitch);\
4081 for( size_t x = 0; x < srcImage.width; x+= 2 )\
4083 if ( (sPtrUV+1) >= sourceE ) break;\
4085 srcType u = *(sPtrUV++);\
4086 srcType v = *(sPtrUV++);\
4088 dPtr0->x = *(sPtrY0++);\
4090 dPtr0->z = *(sPtrY0++);\
4094 dPtr1->x = *(sPtrY2++);\
4096 dPtr1->z = *(sPtrY2++);\
4101 pSrc += rowPitch * 2;\
4102 pSrcUV += rowPitch;\
4104 pDest += destImage.rowPitch * 2;\
4110 assert( srcImage.width == destImage.width );
4111 assert( srcImage.height == destImage.height );
4113 const uint8_t *pSrc = srcImage.pixels;
4114 uint8_t *pDest = destImage.pixels;
4115 if ( !pSrc || !pDest )
4118 switch ( srcImage.format )
4120 case DXGI_FORMAT_NV12:
4121 assert( destImage.format == DXGI_FORMAT_YUY2 );
4125 case DXGI_FORMAT_P010:
4126 assert( destImage.format == DXGI_FORMAT_Y210 );
4130 case DXGI_FORMAT_P016:
4131 assert( destImage.format == DXGI_FORMAT_Y216 );
4135 case DXGI_FORMAT_NV11:
4136 assert( destImage.format == DXGI_FORMAT_YUY2 );
4139 size_t rowPitch = srcImage.rowPitch;
4141 const uint8_t* sourceE = pSrc + srcImage.slicePitch;
4142 const uint8_t* pSrcUV = pSrc + ( srcImage.height * rowPitch );
4144 for(
size_t y = 0;
y < srcImage.height; ++
y )
4147 const uint8_t* sPtrUV = pSrcUV;
4149 XMUBYTEN4 * __restrict dPtr =
reinterpret_cast<XMUBYTEN4*
>(pDest);
4151 for(
size_t x = 0; x < srcImage.width; x+= 4 )
4153 if ( (sPtrUV+1) >= sourceE )
break;
4158 dPtr->x = *(sPtrY++);
4160 dPtr->z = *(sPtrY++);
4164 dPtr->x = *(sPtrY++);
4166 dPtr->z = *(sPtrY++);
4172 pSrcUV += (rowPitch >> 1);
4174 pDest += destImage.rowPitch;
4180 return E_UNEXPECTED;
4184 #undef CONVERT_420_TO_422
4194 _Use_decl_annotations_
4198 return E_INVALIDARG;
4207 return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED );
4210 if ( (srcImage.
width > 0xFFFFFFFF) || (srcImage.
height > 0xFFFFFFFF) )
4211 return E_INVALIDARG;
4225 WICPixelFormatGUID pfGUID, targetGUID;
4228 hr =
_ConvertUsingWIC( srcImage, pfGUID, targetGUID, filter, threshold, *rimage );
4232 hr =
_Convert( srcImage, filter, *rimage, threshold, 0 );
4248 _Use_decl_annotations_
4252 if ( !srcImages || !nimages || (metadata.
format == format) || !
IsValid(format) )
4253 return E_INVALIDARG;
4259 return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED );
4262 if ( (metadata.
width > 0xFFFFFFFF) || (metadata.
height > 0xFFFFFFFF) )
4263 return E_INVALIDARG;
4285 WICPixelFormatGUID pfGUID, targetGUID;
4292 for(
size_t index=0; index < nimages; ++index )
4294 const Image& src = srcImages[ index ];
4302 if ( (src.
width > 0xFFFFFFFF) || (src.
height > 0xFFFFFFFF) )
4306 const Image& dst = dest[ index ];
4321 hr =
_Convert( src, filter, dst, threshold, 0 );
4335 size_t d = metadata.
depth;
4336 for(
size_t level = 0; level < metadata.
mipLevels; ++level )
4338 for(
size_t slice = 0; slice < d; ++slice, ++index )
4340 if ( index >= nimages )
4343 const Image& src = srcImages[ index ];
4351 if ( (src.
width > 0xFFFFFFFF) || (src.
height > 0xFFFFFFFF) )
4355 const Image& dst = dest[ index ];
4370 hr =
_Convert( src, filter, dst, threshold, slice );
4397 _Use_decl_annotations_
4401 return E_INVALIDARG;
4407 if ( format == DXGI_FORMAT_UNKNOWN )
4408 return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED );
4411 if ( (srcImage.
width > 0xFFFFFFFF) || (srcImage.
height > 0xFFFFFFFF) )
4412 return E_INVALIDARG;
4440 _Use_decl_annotations_
4444 if ( !srcImages || !nimages )
4445 return E_INVALIDARG;
4450 return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED );
4454 if ( format == DXGI_FORMAT_UNKNOWN )
4455 return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED );
4458 if ( (metadata.
width > 0xFFFFFFFF) || (metadata.
height > 0xFFFFFFFF) )
4459 return E_INVALIDARG;
4481 for(
size_t index=0; index < nimages; ++index )
4483 const Image& src = srcImages[ index ];
4491 if ( (src.
width > 0xFFFFFFFF) || (src.
height > 0xFFFFFFFF) )
4495 const Image& dst = dest[ index ];
std::unique_ptr< DirectX::XMVECTOR, aligned_deleter > ScopedAlignedArrayXMVECTOR
_Use_decl_annotations_ bool _ExpandScanline(LPVOID pDestination, size_t outSize, DXGI_FORMAT outFormat, LPCVOID pSource, size_t inSize, DXGI_FORMAT inFormat, DWORD flags)
static const XMVECTORF32 g_Scale7pc
const Image * GetImage(_In_ size_t mip, _In_ size_t item, _In_ size_t slice) const
bool IsPlanar(_In_ DXGI_FORMAT fmt)
static XMVECTOR XMColorRGBToSRGB(FXMVECTOR rgb)
static HRESULT _Convert(_In_ const Image &srcImage, _In_ DWORD filter, _In_ const Image &destImage, _In_ float threshold, _In_ size_t z)
static const XMVECTORF32 g_Scale16pc
_In_ size_t _In_ DXGI_FORMAT _In_ size_t _In_ DXGI_FORMAT _In_ DWORD flags
#define STORE_SCANLINE(type, func)
void _CopyScanline(_When_(pDestination==pSource, _Inout_updates_bytes_(outSize)) _When_(pDestination!=pSource, _Out_writes_bytes_(outSize)) LPVOID pDestination, _In_ size_t outSize, _In_reads_bytes_(inSize) LPCVOID pSource, _In_ size_t inSize, _In_ DXGI_FORMAT format, _In_ DWORD flags)
static const XMVECTORF32 g_ErrorWeight7
_Use_decl_annotations_ HRESULT _ConvertFromR32G32B32A32(const Image &srcImage, const Image &destImage)
#define LOAD_SCANLINE3(type, func, defvec)
_Use_decl_annotations_ HRESULT _ConvertToR32G32B32A32(const Image &srcImage, ScratchImage &image)
size_t _In_ DXGI_FORMAT size_t _In_ TEXP_LEGACY_FORMAT _In_ DWORD flags assert(pDestination &&outSize > 0)
_In_ size_t _In_ DXGI_FORMAT _In_ size_t _In_ float threshold
_In_ size_t _In_ DXGI_FORMAT _In_ size_t _In_ float size_t y
HRESULT Initialize(_In_ const TexMetadata &mdata, _In_ DWORD flags=CP_FLAGS_NONE)
HRESULT Initialize2D(_In_ DXGI_FORMAT fmt, _In_ size_t width, _In_ size_t height, _In_ size_t arraySize, _In_ size_t mipLevels, _In_ DWORD flags=CP_FLAGS_NONE)
_Use_decl_annotations_ bool _LoadScanline(XMVECTOR *pDestination, size_t count, LPCVOID pSource, size_t size, DXGI_FORMAT format)
static XMVECTOR XMColorSRGBToRGB(FXMVECTOR srgb)
static const XMVECTORF32 g_ErrorWeight3
IWICImagingFactory * _GetWIC()
static const XMVECTORF32 g_Scale8pc
static const XMVECTORF32 g_Scale565pc
HRESULT ConvertToSinglePlane(_In_ const Image &srcImage, _Out_ ScratchImage &image)
bool IsCompressed(_In_ DXGI_FORMAT fmt)
size_t GetImageCount() const
#define STORE_SCANLINE1(type, scalev, clampzero, norm, mask, row, selectw)
_In_ size_t _In_ const TexMetadata & metadata
_Use_decl_annotations_ void _ConvertScanline(XMVECTOR *pBuffer, size_t count, DXGI_FORMAT outFormat, DXGI_FORMAT inFormat, DWORD flags)
_Use_decl_annotations_ void _SwizzleScanline(LPVOID pDestination, size_t outSize, LPCVOID pSource, size_t inSize, DXGI_FORMAT format, DWORD flags)
static int __cdecl _ConvertCompare(void *context, const void *ptr1, const void *ptr2)
static HRESULT _ConvertToSinglePlane(_In_ const Image &srcImage, _In_ const Image &destImage)
_Use_decl_annotations_ DWORD _GetConvertFlags(DXGI_FORMAT format)
bool IsValid(_In_ DXGI_FORMAT fmt)
size_t _In_ DXGI_FORMAT outFormat
size_t _In_ DXGI_FORMAT size_t _In_ TEXP_LEGACY_FORMAT inFormat
static HRESULT _ConvertUsingWIC(_In_ const Image &srcImage, _In_ const WICPixelFormatGUID &pfGUID, _In_ const WICPixelFormatGUID &targetGUID, _In_ DWORD filter, _In_ float threshold, _In_ const Image &destImage)
size_t _In_ DXGI_FORMAT size_t inSize
bool IsPalettized(_In_ DXGI_FORMAT fmt)
static const XMVECTORF32 g_Grayscale
static bool _UseWICConversion(_In_ DWORD filter, _In_ DXGI_FORMAT sformat, _In_ DXGI_FORMAT tformat, _Out_ WICPixelFormatGUID &pfGUID, _Out_ WICPixelFormatGUID &targetGUID)
static DXGI_FORMAT _PlanarToSingle(_In_ DXGI_FORMAT format)
size_t _In_ DXGI_FORMAT _In_reads_bytes_(inSize) LPCVOID pSource
HRESULT Convert(_In_ const Image &srcImage, _In_ DXGI_FORMAT format, _In_ DWORD filter, _In_ float threshold, _Out_ ScratchImage &image)
static const XMVECTORF32 g_Scale15pc
WICBitmapDitherType _GetWICDither(_In_ DWORD flags)
static const XMVECTORF32 g_Scale5551pc
DWORD _CheckWICColorSpace(_In_ const GUID &sourceGUID, _In_ const GUID &targetGUID)
static const XMVECTORF32 g_ErrorWeight5
_Use_decl_annotations_ bool _LoadScanlineLinear(XMVECTOR *pDestination, size_t count, LPCVOID pSource, size_t size, DXGI_FORMAT format, DWORD flags)
_Use_decl_annotations_ bool _StoreScanlineLinear(LPVOID pDestination, size_t size, DXGI_FORMAT format, XMVECTOR *pSource, size_t count, DWORD flags)
bool IsTypeless(_In_ DXGI_FORMAT fmt, _In_ bool partialTypeless=true)
static const XMVECTORF32 g_Scale10pc
#define LOAD_SCANLINE(type, func)
static const ConvertData g_ConvertTable[]
#define CONVERT_420_TO_422(srcType, destType)
bool IsSRGB(_In_ DXGI_FORMAT fmt)
_In_ size_t _In_ size_t _In_ DXGI_FORMAT format
static const float g_Dither[]
const Image * GetImages() const
bool _DXGIToWIC(_In_ DXGI_FORMAT format, _Out_ GUID &guid, _In_ bool ignoreRGBvsBGR=false)
_In_ size_t _In_ size_t size
static const XMVECTORF32 g_ErrorWeight1
static const XMVECTORF32 g_Scale4pc
#define STORE_SCANLINE2(type, scalev, clampzero, norm, itype, mask, row)
_Use_decl_annotations_ bool _StoreScanline(LPVOID pDestination, size_t size, DXGI_FORMAT format, const XMVECTOR *pSource, size_t count, float threshold)
_Use_decl_annotations_ bool _StoreScanlineDither(LPVOID pDestination, size_t size, DXGI_FORMAT format, XMVECTOR *pSource, size_t count, float threshold, size_t y, size_t z, XMVECTOR *pDiffusionErrors)
_In_ size_t _In_ DXGI_FORMAT _In_ size_t _In_ float size_t size_t z
#define LOAD_SCANLINE2(type, func, defvec)