18 #if defined(_DEBUG) || defined(PROFILE)
19 #pragma comment(lib,"dxguid.lib")
22 using Microsoft::WRL::ComPtr;
39 struct ConstantsBC6HBC7
46 UINT num_total_blocks;
51 static_assert(
sizeof(ConstantsBC6HBC7) ==
sizeof(UINT)*8,
"Constant buffer size mismatch" );
53 inline void RunComputeShader( ID3D11DeviceContext* pContext,
54 ID3D11ComputeShader* shader,
55 ID3D11ShaderResourceView** pSRVs,
58 ID3D11UnorderedAccessView* pUAV,
62 ID3D11UnorderedAccessView* nullUAV =
nullptr;
63 pContext->CSSetUnorderedAccessViews( 0, 1, &nullUAV,
nullptr );
65 pContext->CSSetShader( shader,
nullptr, 0 );
66 pContext->CSSetShaderResources( 0, srvCount, pSRVs );
67 pContext->CSSetUnorderedAccessViews( 0, 1, &pUAV,
nullptr );
68 pContext->CSSetConstantBuffers( 0, 1, &pCB );
69 pContext->Dispatch( X, 1, 1 );
72 inline void ResetContext( ID3D11DeviceContext* pContext )
74 ID3D11UnorderedAccessView* nullUAV =
nullptr;
75 pContext->CSSetUnorderedAccessViews( 0, 1, &nullUAV,
nullptr );
77 ID3D11ShaderResourceView* nullSRV[3] = {
nullptr,
nullptr,
nullptr };
78 pContext->CSSetShaderResources( 0, 3, nullSRV );
80 ID3D11Buffer* nullBuffer[1] = {
nullptr };
81 pContext->CSSetConstantBuffers( 0, 1, nullBuffer );
89 m_bcformat(DXGI_FORMAT_UNKNOWN),
90 m_srcformat(DXGI_FORMAT_UNKNOWN),
99 _Use_decl_annotations_
106 D3D_FEATURE_LEVEL fl = pDevice->GetFeatureLevel();
108 if ( fl < D3D_FEATURE_LEVEL_10_0 )
111 return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED );
114 if ( fl < D3D_FEATURE_LEVEL_11_0 )
117 D3D11_FEATURE_DATA_D3D10_X_HARDWARE_OPTIONS hwopts;
118 HRESULT hr = pDevice->CheckFeatureSupport( D3D11_FEATURE_D3D10_X_HARDWARE_OPTIONS, &hwopts,
sizeof(hwopts) );
121 memset( &hwopts, 0,
sizeof(hwopts) );
124 if ( !hwopts.ComputeShaders_Plus_RawAndStructuredBuffers_Via_Shader_4_x )
126 return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED );
133 pDevice->GetImmediateContext( m_context.ReleaseAndGetAddressOf() );
139 HRESULT hr = pDevice->CreateComputeShader( BC6HEncode_TryModeG10CS,
sizeof(BC6HEncode_TryModeG10CS),
nullptr, m_BC6H_tryModeG10CS.ReleaseAndGetAddressOf() );
144 hr = pDevice->CreateComputeShader( BC6HEncode_TryModeLE10CS,
sizeof(BC6HEncode_TryModeLE10CS),
nullptr, m_BC6H_tryModeLE10CS.ReleaseAndGetAddressOf() );
149 hr = pDevice->CreateComputeShader( BC6HEncode_EncodeBlockCS,
sizeof(BC6HEncode_EncodeBlockCS),
nullptr, m_BC6H_encodeBlockCS.ReleaseAndGetAddressOf() );
156 hr = pDevice->CreateComputeShader( BC7Encode_TryMode456CS,
sizeof(BC7Encode_TryMode456CS),
nullptr, m_BC7_tryMode456CS.ReleaseAndGetAddressOf() );
161 hr = pDevice->CreateComputeShader( BC7Encode_TryMode137CS,
sizeof(BC7Encode_TryMode137CS),
nullptr, m_BC7_tryMode137CS.ReleaseAndGetAddressOf() );
166 hr = pDevice->CreateComputeShader( BC7Encode_TryMode02CS,
sizeof(BC7Encode_TryMode02CS),
nullptr, m_BC7_tryMode02CS.ReleaseAndGetAddressOf() );
171 hr = pDevice->CreateComputeShader( BC7Encode_EncodeBlockCS,
sizeof(BC7Encode_EncodeBlockCS),
nullptr, m_BC7_encodeBlockCS.ReleaseAndGetAddressOf() );
180 _Use_decl_annotations_
183 if ( !width || !height || alphaWeight < 0.f )
187 if ( (width > 0xFFFFFFFF) || (height > 0xFFFFFFFF) )
194 m_alphaWeight = alphaWeight;
196 size_t xblocks = std::max<size_t>( 1, (width + 3) >> 2 );
197 size_t yblocks = std::max<size_t>( 1, (height + 3) >> 2 );
198 size_t num_blocks = xblocks * yblocks;
203 case DXGI_FORMAT_BC6H_TYPELESS:
204 case DXGI_FORMAT_BC6H_UF16:
205 case DXGI_FORMAT_BC6H_SF16:
206 m_srcformat = DXGI_FORMAT_R32G32B32A32_FLOAT;
210 case DXGI_FORMAT_BC7_TYPELESS:
211 case DXGI_FORMAT_BC7_UNORM:
212 m_srcformat = DXGI_FORMAT_R8G8B8A8_UNORM;
215 case DXGI_FORMAT_BC7_UNORM_SRGB:
216 m_srcformat = DXGI_FORMAT_R8G8B8A8_UNORM_SRGB;
220 m_bcformat = m_srcformat = DXGI_FORMAT_UNKNOWN;
221 return HRESULT_FROM_WIN32( ERROR_NOT_SUPPORTED );
226 auto pDevice = m_device.Get();
231 size_t bufferSize = num_blocks *
sizeof( BufferBC6HBC7 );
233 D3D11_BUFFER_DESC desc;
234 memset( &desc, 0,
sizeof(desc) );
235 desc.BindFlags = D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE;
236 desc.Usage = D3D11_USAGE_DEFAULT;
237 desc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED;
238 desc.StructureByteStride =
sizeof( BufferBC6HBC7 );
239 desc.ByteWidth =
static_cast<UINT
>( bufferSize );
241 HRESULT hr = pDevice->CreateBuffer( &desc,
nullptr, m_output.ReleaseAndGetAddressOf() );
247 hr = pDevice->CreateBuffer( &desc,
nullptr, m_err1.ReleaseAndGetAddressOf() );
253 hr = pDevice->CreateBuffer( &desc,
nullptr, m_err2.ReleaseAndGetAddressOf() );
262 D3D11_BUFFER_DESC desc;
263 memset( &desc, 0,
sizeof(desc) );
264 desc.Usage = D3D11_USAGE_STAGING;
265 desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
266 desc.ByteWidth =
static_cast<UINT
>( bufferSize );
268 HRESULT hr = pDevice->CreateBuffer( &desc,
nullptr, m_outputCPU.ReleaseAndGetAddressOf() );
277 D3D11_BUFFER_DESC desc;
278 memset( &desc, 0,
sizeof(desc) );
279 desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
280 desc.Usage = D3D11_USAGE_DYNAMIC;
281 desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
282 desc.ByteWidth =
sizeof( ConstantsBC6HBC7 );
284 HRESULT hr = pDevice->CreateBuffer( &desc,
nullptr, m_constBuffer.ReleaseAndGetAddressOf() );
293 D3D11_SHADER_RESOURCE_VIEW_DESC desc;
294 memset( &desc, 0,
sizeof(desc) );
295 desc.Buffer.NumElements =
static_cast<UINT
>( num_blocks );
296 desc.ViewDimension = D3D11_SRV_DIMENSION_BUFFER;
298 HRESULT hr = pDevice->CreateShaderResourceView( m_err1.Get(), &desc, m_err1SRV.ReleaseAndGetAddressOf() );
304 hr = pDevice->CreateShaderResourceView( m_err2.Get(), &desc, m_err2SRV.ReleaseAndGetAddressOf() );
313 D3D11_UNORDERED_ACCESS_VIEW_DESC desc;
314 memset( &desc, 0,
sizeof(desc) );
315 desc.Buffer.NumElements =
static_cast<UINT
>( num_blocks );
316 desc.ViewDimension = D3D11_UAV_DIMENSION_BUFFER;
318 HRESULT hr = pDevice->CreateUnorderedAccessView( m_output.Get(), &desc, m_outputUAV.ReleaseAndGetAddressOf() );
324 hr = pDevice->CreateUnorderedAccessView( m_err1.Get(), &desc, m_err1UAV.ReleaseAndGetAddressOf() );
330 hr = pDevice->CreateUnorderedAccessView( m_err2.Get(), &desc, m_err2UAV.ReleaseAndGetAddressOf() );
342 _Use_decl_annotations_
350 || srcImage.
width != m_width
351 || srcImage.
height != m_height
352 || srcImage.
format != m_srcformat
353 || destImage.
format != m_bcformat )
359 auto pDevice = m_device.Get();
364 DXGI_FORMAT inputFormat = ( m_srcformat == DXGI_FORMAT_R8G8B8A8_UNORM_SRGB ) ? DXGI_FORMAT_R8G8B8A8_UNORM : m_srcformat;
366 ComPtr<ID3D11Texture2D> sourceTex;
368 D3D11_TEXTURE2D_DESC desc;
369 memset( &desc, 0,
sizeof(desc) );
370 desc.Width =
static_cast<UINT
>( srcImage.
width );
371 desc.Height =
static_cast<UINT
>( srcImage.
height );
374 desc.Format = inputFormat;
375 desc.SampleDesc.Count = 1;
376 desc.Usage = D3D11_USAGE_DEFAULT;
377 desc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
379 D3D11_SUBRESOURCE_DATA initData;
380 initData.pSysMem = srcImage.
pixels;
381 initData.SysMemPitch =
static_cast<DWORD
>( srcImage.
rowPitch );
382 initData.SysMemSlicePitch =
static_cast<DWORD
>( srcImage.
slicePitch );
384 HRESULT hr = pDevice->CreateTexture2D( &desc, &initData, sourceTex.GetAddressOf() );
391 ComPtr<ID3D11ShaderResourceView> sourceSRV;
393 D3D11_SHADER_RESOURCE_VIEW_DESC desc;
394 memset( &desc, 0,
sizeof(desc) );
395 desc.Texture2D.MipLevels = 1;
396 desc.Format = inputFormat;
397 desc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D;
399 HRESULT hr = pDevice->CreateShaderResourceView( sourceTex.Get(), &desc, sourceSRV.GetAddressOf() );
410 case DXGI_FORMAT_BC6H_TYPELESS:
411 case DXGI_FORMAT_BC6H_UF16:
412 case DXGI_FORMAT_BC6H_SF16:
415 case DXGI_FORMAT_BC7_TYPELESS:
416 case DXGI_FORMAT_BC7_UNORM:
417 case DXGI_FORMAT_BC7_UNORM_SRGB:
425 const UINT MAX_BLOCK_BATCH = 64;
427 auto pContext = m_context.Get();
431 size_t xblocks = std::max<size_t>( 1, (m_width + 3) >> 2 );
432 size_t yblocks = std::max<size_t>( 1, (m_height + 3) >> 2 );
434 UINT num_total_blocks =
static_cast<UINT
>( xblocks * yblocks );
435 UINT num_blocks = num_total_blocks;
436 int start_block_id = 0;
437 while (num_blocks > 0)
439 UINT n = std::min<UINT>( num_blocks, MAX_BLOCK_BATCH );
440 UINT uThreadGroupCount = n;
443 D3D11_MAPPED_SUBRESOURCE mapped;
444 HRESULT hr = pContext->Map( m_constBuffer.Get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped );
448 ConstantsBC6HBC7 param;
449 param.tex_width =
static_cast<UINT
>( srcImage.
width );
450 param.num_block_x =
static_cast<UINT
>( xblocks );
451 param.format = m_bcformat;
453 param.start_block_id = start_block_id;
454 param.num_total_blocks = num_total_blocks;
455 param.alpha_weight = m_alphaWeight;
456 memcpy( mapped.pData, ¶m,
sizeof( param ) );
458 pContext->Unmap( m_constBuffer.Get(), 0 );
464 ID3D11ShaderResourceView* pSRVs[] = { sourceSRV.Get(),
nullptr };
465 RunComputeShader( pContext, m_BC7_tryMode456CS.Get(), pSRVs, 2, m_constBuffer.Get(),
466 m_err1UAV.Get(), std::max<UINT>( (uThreadGroupCount + 3) / 4, 1) );
468 for ( UINT i = 0; i < 3; ++i )
470 static const UINT modes[] = { 1, 3, 7 };
472 D3D11_MAPPED_SUBRESOURCE mapped;
473 HRESULT hr = pContext->Map( m_constBuffer.Get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped );
476 ResetContext( pContext );
480 ConstantsBC6HBC7 param;
481 param.tex_width =
static_cast<UINT
>( srcImage.
width );
482 param.num_block_x =
static_cast<UINT
>( xblocks );
483 param.format = m_bcformat;
484 param.mode_id = modes[i];
485 param.start_block_id = start_block_id;
486 param.num_total_blocks = num_total_blocks;
487 param.alpha_weight = m_alphaWeight;
488 memcpy( mapped.pData, ¶m,
sizeof( param ) );
489 pContext->Unmap( m_constBuffer.Get(), 0 );
492 pSRVs[1] = (i & 1) ? m_err2SRV.Get() : m_err1SRV.Get();
493 RunComputeShader( pContext, m_BC7_tryMode137CS.Get(), pSRVs, 2, m_constBuffer.Get(),
494 (i & 1) ? m_err1UAV.Get() : m_err2UAV.Get(), uThreadGroupCount );
497 for ( UINT i = 0; i < 2; ++i )
499 static const UINT modes[] = { 0, 2 };
501 D3D11_MAPPED_SUBRESOURCE mapped;
502 HRESULT hr = pContext->Map( m_constBuffer.Get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped );
505 ResetContext( pContext );
509 ConstantsBC6HBC7 param;
510 param.tex_width =
static_cast<UINT
>( srcImage.
width );
511 param.num_block_x =
static_cast<UINT
>( xblocks );
512 param.format = m_bcformat;
513 param.mode_id = modes[i];
514 param.start_block_id = start_block_id;
515 param.num_total_blocks = num_total_blocks;
516 param.alpha_weight = m_alphaWeight;
517 memcpy( mapped.pData, ¶m,
sizeof( param ) );
518 pContext->Unmap( m_constBuffer.Get(), 0 );
521 pSRVs[1] = (i & 1) ? m_err1SRV.Get() : m_err2SRV.Get();
522 RunComputeShader( pContext, m_BC7_tryMode02CS.Get(), pSRVs, 2, m_constBuffer.Get(),
523 (i & 1) ? m_err2UAV.Get() : m_err1UAV.Get(), uThreadGroupCount );
526 pSRVs[1] = m_err2SRV.Get();
527 RunComputeShader( pContext, m_BC7_encodeBlockCS.Get(), pSRVs, 2, m_constBuffer.Get(),
528 m_outputUAV.Get(), std::max<UINT>( (uThreadGroupCount + 3) / 4, 1) );
533 ID3D11ShaderResourceView* pSRVs[] = { sourceSRV.Get(),
nullptr };
534 RunComputeShader( pContext, m_BC6H_tryModeG10CS.Get(), pSRVs, 2, m_constBuffer.Get(),
535 m_err1UAV.Get(), std::max<UINT>( (uThreadGroupCount + 3) / 4, 1) );
537 for ( UINT i = 0; i < 10; ++i )
540 D3D11_MAPPED_SUBRESOURCE mapped;
541 HRESULT hr = pContext->Map( m_constBuffer.Get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped );
544 ResetContext( pContext );
548 ConstantsBC6HBC7 param;
549 param.tex_width =
static_cast<UINT
>( srcImage.
width );
550 param.num_block_x =
static_cast<UINT
>( xblocks );
551 param.format = m_bcformat;
553 param.start_block_id = start_block_id;
554 param.num_total_blocks = num_total_blocks;
555 memcpy( mapped.pData, ¶m,
sizeof( param ) );
556 pContext->Unmap( m_constBuffer.Get(), 0 );
559 pSRVs[1] = (i & 1) ? m_err2SRV.Get() : m_err1SRV.Get();
560 RunComputeShader( pContext, m_BC6H_tryModeLE10CS.Get(), pSRVs, 2, m_constBuffer.Get(),
561 (i & 1) ? m_err1UAV.Get() : m_err2UAV.Get(), std::max<UINT>( (uThreadGroupCount + 1) / 2, 1) );
564 pSRVs[1] = m_err1SRV.Get();
565 RunComputeShader( pContext, m_BC6H_encodeBlockCS.Get(), pSRVs, 2, m_constBuffer.Get(),
566 m_outputUAV.Get(), std::max<UINT>( (uThreadGroupCount + 1) / 2, 1) );
573 ResetContext( pContext );
577 pContext->CopyResource( m_outputCPU.Get(), m_output.Get() );
579 D3D11_MAPPED_SUBRESOURCE mapped;
580 HRESULT hr = pContext->Map( m_outputCPU.Get(), 0, D3D11_MAP_READ, 0, &mapped );
583 const uint8_t *pSrc =
reinterpret_cast<const uint8_t *
>( mapped.pData );
586 size_t pitch = xblocks *
sizeof( BufferBC6HBC7 );
588 size_t rows = std::max<size_t>( 1, ( destImage.
height + 3 ) >> 2 );
590 for(
size_t h = 0; h < rows; ++h )
592 memcpy( pDest, pSrc, destImage.
rowPitch );
598 pContext->Unmap( m_outputCPU.Get(), 0 );
HRESULT Initialize(_In_ ID3D11Device *pDevice)
HRESULT Compress(_In_ const Image &srcImage, _In_ const Image &destImage)
size_t _In_ DXGI_FORMAT size_t _In_ TEXP_LEGACY_FORMAT _In_ DWORD flags assert(pDestination &&outSize > 0)
_In_ size_t _In_ size_t _In_ DXGI_FORMAT format
HRESULT Prepare(_In_ size_t width, _In_ size_t height, _In_ DXGI_FORMAT format, _In_ float alphaWeight=1.f)