Paradox Game Engine  v1.0.0 beta06
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Events Macros Pages
BC6HBC7.cpp
Go to the documentation of this file.
1 //-------------------------------------------------------------------------------------
2 // BC6HBC7.cpp
3 //
4 // Block-compression (BC) functionality for BC6H and BC7 (DirectX 11 texture compression)
5 //
6 // THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
7 // ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
8 // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
9 // PARTICULAR PURPOSE.
10 //
11 // Copyright (c) Microsoft Corporation. All rights reserved.
12 //
13 // http://go.microsoft.com/fwlink/?LinkId=248926
14 //-------------------------------------------------------------------------------------
15 
16 #include "directxtexp.h"
17 
18 #include "BC.h"
19 
20 using namespace DirectX::PackedVector;
21 
22 namespace DirectX
23 {
24 
25 //-------------------------------------------------------------------------------------
26 // Constants
27 //-------------------------------------------------------------------------------------
28 
29 static const float fEpsilon = (0.25f / 64.0f) * (0.25f / 64.0f);
30 static const float pC3[] = { 2.0f/2.0f, 1.0f/2.0f, 0.0f/2.0f };
31 static const float pD3[] = { 0.0f/2.0f, 1.0f/2.0f, 2.0f/2.0f };
32 static const float pC4[] = { 3.0f/3.0f, 2.0f/3.0f, 1.0f/3.0f, 0.0f/3.0f };
33 static const float pD4[] = { 0.0f/3.0f, 1.0f/3.0f, 2.0f/3.0f, 3.0f/3.0f };
34 
35 const int g_aWeights2[] = {0, 21, 43, 64};
36 const int g_aWeights3[] = {0, 9, 18, 27, 37, 46, 55, 64};
37 const int g_aWeights4[] = {0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64};
38 
39 // Partition, Shape, Pixel (index into 4x4 block)
40 static const uint8_t g_aPartitionTable[3][64][16] =
41 {
42  { // 1 Region case has no subsets (all 0)
43  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
44  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
45  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
46  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
47  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
48  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
49  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
50  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
51  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
52  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
53  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
54  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
55  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
56  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
57  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
58  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
59  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
60  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
61  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
62  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
63  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
64  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
65  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
66  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
67  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
68  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
69  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
70  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
71  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
72  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
73  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
74  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
75  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
76  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
77  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
78  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
79  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
80  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
81  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
82  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
83  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
84  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
85  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
86  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
87  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
88  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
89  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
90  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
91  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
92  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
93  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
94  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
95  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
96  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
97  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
98  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
99  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
100  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
101  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
102  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
103  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
104  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
105  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
106  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
107  },
108 
109  { // BC6H/BC7 Partition Set for 2 Subsets
110  { 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1 }, // Shape 0
111  { 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1 }, // Shape 1
112  { 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1 }, // Shape 2
113  { 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1 }, // Shape 3
114  { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1 }, // Shape 4
115  { 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1 }, // Shape 5
116  { 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1 }, // Shape 6
117  { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1 }, // Shape 7
118  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1 }, // Shape 8
119  { 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, // Shape 9
120  { 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1 }, // Shape 10
121  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1 }, // Shape 11
122  { 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, // Shape 12
123  { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 }, // Shape 13
124  { 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, // Shape 14
125  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1 }, // Shape 15
126  { 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1 }, // Shape 16
127  { 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 }, // Shape 17
128  { 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0 }, // Shape 18
129  { 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0 }, // Shape 19
130  { 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 }, // Shape 20
131  { 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0 }, // Shape 21
132  { 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0 }, // Shape 22
133  { 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1 }, // Shape 23
134  { 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0 }, // Shape 24
135  { 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0 }, // Shape 25
136  { 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0 }, // Shape 26
137  { 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0 }, // Shape 27
138  { 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0 }, // Shape 28
139  { 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0 }, // Shape 29
140  { 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0 }, // Shape 30
141  { 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0 }, // Shape 31
142 
143  // BC7 Partition Set for 2 Subsets (second-half)
144  { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 }, // Shape 32
145  { 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1 }, // Shape 33
146  { 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0 }, // Shape 34
147  { 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0 }, // Shape 35
148  { 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0 }, // Shape 36
149  { 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0 }, // Shape 37
150  { 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1 }, // Shape 38
151  { 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1 }, // Shape 39
152  { 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0 }, // Shape 40
153  { 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0 }, // Shape 41
154  { 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0 }, // Shape 42
155  { 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0 }, // Shape 43
156  { 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0 }, // Shape 44
157  { 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1 }, // Shape 45
158  { 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1 }, // Shape 46
159  { 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0 }, // Shape 47
160  { 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0 }, // Shape 48
161  { 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0 }, // Shape 49
162  { 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0 }, // Shape 50
163  { 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0 }, // Shape 51
164  { 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1 }, // Shape 52
165  { 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1 }, // Shape 53
166  { 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0 }, // Shape 54
167  { 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0 }, // Shape 55
168  { 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1 }, // Shape 56
169  { 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1 }, // Shape 57
170  { 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1 }, // Shape 58
171  { 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1 }, // Shape 59
172  { 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1 }, // Shape 60
173  { 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0 }, // Shape 61
174  { 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0 }, // Shape 62
175  { 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1 } // Shape 63
176  },
177 
178  { // BC7 Partition Set for 3 Subsets
179  { 0, 0, 1, 1, 0, 0, 1, 1, 0, 2, 2, 1, 2, 2, 2, 2 }, // Shape 0
180  { 0, 0, 0, 1, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2, 2, 1 }, // Shape 1
181  { 0, 0, 0, 0, 2, 0, 0, 1, 2, 2, 1, 1, 2, 2, 1, 1 }, // Shape 2
182  { 0, 2, 2, 2, 0, 0, 2, 2, 0, 0, 1, 1, 0, 1, 1, 1 }, // Shape 3
183  { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2 }, // Shape 4
184  { 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 2, 2, 0, 0, 2, 2 }, // Shape 5
185  { 0, 0, 2, 2, 0, 0, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1 }, // Shape 6
186  { 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1 }, // Shape 7
187  { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2 }, // Shape 8
188  { 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2 }, // Shape 9
189  { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2 }, // Shape 10
190  { 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2 }, // Shape 11
191  { 0, 1, 1, 2, 0, 1, 1, 2, 0, 1, 1, 2, 0, 1, 1, 2 }, // Shape 12
192  { 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2 }, // Shape 13
193  { 0, 0, 1, 1, 0, 1, 1, 2, 1, 1, 2, 2, 1, 2, 2, 2 }, // Shape 14
194  { 0, 0, 1, 1, 2, 0, 0, 1, 2, 2, 0, 0, 2, 2, 2, 0 }, // Shape 15
195  { 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 2, 1, 1, 2, 2 }, // Shape 16
196  { 0, 1, 1, 1, 0, 0, 1, 1, 2, 0, 0, 1, 2, 2, 0, 0 }, // Shape 17
197  { 0, 0, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2 }, // Shape 18
198  { 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 1, 1, 1, 1 }, // Shape 19
199  { 0, 1, 1, 1, 0, 1, 1, 1, 0, 2, 2, 2, 0, 2, 2, 2 }, // Shape 20
200  { 0, 0, 0, 1, 0, 0, 0, 1, 2, 2, 2, 1, 2, 2, 2, 1 }, // Shape 21
201  { 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 2, 2, 0, 1, 2, 2 }, // Shape 22
202  { 0, 0, 0, 0, 1, 1, 0, 0, 2, 2, 1, 0, 2, 2, 1, 0 }, // Shape 23
203  { 0, 1, 2, 2, 0, 1, 2, 2, 0, 0, 1, 1, 0, 0, 0, 0 }, // Shape 24
204  { 0, 0, 1, 2, 0, 0, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2 }, // Shape 25
205  { 0, 1, 1, 0, 1, 2, 2, 1, 1, 2, 2, 1, 0, 1, 1, 0 }, // Shape 26
206  { 0, 0, 0, 0, 0, 1, 1, 0, 1, 2, 2, 1, 1, 2, 2, 1 }, // Shape 27
207  { 0, 0, 2, 2, 1, 1, 0, 2, 1, 1, 0, 2, 0, 0, 2, 2 }, // Shape 28
208  { 0, 1, 1, 0, 0, 1, 1, 0, 2, 0, 0, 2, 2, 2, 2, 2 }, // Shape 29
209  { 0, 0, 1, 1, 0, 1, 2, 2, 0, 1, 2, 2, 0, 0, 1, 1 }, // Shape 30
210  { 0, 0, 0, 0, 2, 0, 0, 0, 2, 2, 1, 1, 2, 2, 2, 1 }, // Shape 31
211  { 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2, 2, 1, 2, 2, 2 }, // Shape 32
212  { 0, 2, 2, 2, 0, 0, 2, 2, 0, 0, 1, 2, 0, 0, 1, 1 }, // Shape 33
213  { 0, 0, 1, 1, 0, 0, 1, 2, 0, 0, 2, 2, 0, 2, 2, 2 }, // Shape 34
214  { 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0 }, // Shape 35
215  { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 0, 0, 0, 0 }, // Shape 36
216  { 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0 }, // Shape 37
217  { 0, 1, 2, 0, 2, 0, 1, 2, 1, 2, 0, 1, 0, 1, 2, 0 }, // Shape 38
218  { 0, 0, 1, 1, 2, 2, 0, 0, 1, 1, 2, 2, 0, 0, 1, 1 }, // Shape 39
219  { 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 0, 0, 0, 0, 1, 1 }, // Shape 40
220  { 0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2 }, // Shape 41
221  { 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 2, 1, 2, 1, 2, 1 }, // Shape 42
222  { 0, 0, 2, 2, 1, 1, 2, 2, 0, 0, 2, 2, 1, 1, 2, 2 }, // Shape 43
223  { 0, 0, 2, 2, 0, 0, 1, 1, 0, 0, 2, 2, 0, 0, 1, 1 }, // Shape 44
224  { 0, 2, 2, 0, 1, 2, 2, 1, 0, 2, 2, 0, 1, 2, 2, 1 }, // Shape 45
225  { 0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 0, 1, 0, 1 }, // Shape 46
226  { 0, 0, 0, 0, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1 }, // Shape 47
227  { 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 2 }, // Shape 48
228  { 0, 2, 2, 2, 0, 1, 1, 1, 0, 2, 2, 2, 0, 1, 1, 1 }, // Shape 49
229  { 0, 0, 0, 2, 1, 1, 1, 2, 0, 0, 0, 2, 1, 1, 1, 2 }, // Shape 50
230  { 0, 0, 0, 0, 2, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2 }, // Shape 51
231  { 0, 2, 2, 2, 0, 1, 1, 1, 0, 1, 1, 1, 0, 2, 2, 2 }, // Shape 52
232  { 0, 0, 0, 2, 1, 1, 1, 2, 1, 1, 1, 2, 0, 0, 0, 2 }, // Shape 53
233  { 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 2, 2, 2, 2 }, // Shape 54
234  { 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2, 2, 1, 1, 2 }, // Shape 55
235  { 0, 1, 1, 0, 0, 1, 1, 0, 2, 2, 2, 2, 2, 2, 2, 2 }, // Shape 56
236  { 0, 0, 2, 2, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 2, 2 }, // Shape 57
237  { 0, 0, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 0, 0, 2, 2 }, // Shape 58
238  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2 }, // Shape 59
239  { 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 1 }, // Shape 60
240  { 0, 2, 2, 2, 1, 2, 2, 2, 0, 2, 2, 2, 1, 2, 2, 2 }, // Shape 61
241  { 0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 }, // Shape 62
242  { 0, 1, 1, 1, 2, 0, 1, 1, 2, 2, 0, 1, 2, 2, 2, 0 } // Shape 63
243  }
244 };
245 
246 // Partition, Shape, Fixup
247 static const uint8_t g_aFixUp[3][64][3] =
248 {
249  { // No fix-ups for 1st subset for BC6H or BC7
250  { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
251  { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
252  { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
253  { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
254  { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
255  { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
256  { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
257  { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
258  { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
259  { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
260  { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
261  { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
262  { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
263  { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
264  { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0},
265  { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}, { 0, 0, 0}
266  },
267 
268  { // BC6H/BC7 Partition Set Fixups for 2 Subsets
269  { 0,15, 0}, { 0,15, 0}, { 0,15, 0}, { 0,15, 0},
270  { 0,15, 0}, { 0,15, 0}, { 0,15, 0}, { 0,15, 0},
271  { 0,15, 0}, { 0,15, 0}, { 0,15, 0}, { 0,15, 0},
272  { 0,15, 0}, { 0,15, 0}, { 0,15, 0}, { 0,15, 0},
273  { 0,15, 0}, { 0, 2, 0}, { 0, 8, 0}, { 0, 2, 0},
274  { 0, 2, 0}, { 0, 8, 0}, { 0, 8, 0}, { 0,15, 0},
275  { 0, 2, 0}, { 0, 8, 0}, { 0, 2, 0}, { 0, 2, 0},
276  { 0, 8, 0}, { 0, 8, 0}, { 0, 2, 0}, { 0, 2, 0},
277 
278  // BC7 Partition Set Fixups for 2 Subsets (second-half)
279  { 0,15, 0}, { 0,15, 0}, { 0, 6, 0}, { 0, 8, 0},
280  { 0, 2, 0}, { 0, 8, 0}, { 0,15, 0}, { 0,15, 0},
281  { 0, 2, 0}, { 0, 8, 0}, { 0, 2, 0}, { 0, 2, 0},
282  { 0, 2, 0}, { 0,15, 0}, { 0,15, 0}, { 0, 6, 0},
283  { 0, 6, 0}, { 0, 2, 0}, { 0, 6, 0}, { 0, 8, 0},
284  { 0,15, 0}, { 0,15, 0}, { 0, 2, 0}, { 0, 2, 0},
285  { 0,15, 0}, { 0,15, 0}, { 0,15, 0}, { 0,15, 0},
286  { 0,15, 0}, { 0, 2, 0}, { 0, 2, 0}, { 0,15, 0}
287  },
288 
289  { // BC7 Partition Set Fixups for 3 Subsets
290  { 0, 3,15}, { 0, 3, 8}, { 0,15, 8}, { 0,15, 3},
291  { 0, 8,15}, { 0, 3,15}, { 0,15, 3}, { 0,15, 8},
292  { 0, 8,15}, { 0, 8,15}, { 0, 6,15}, { 0, 6,15},
293  { 0, 6,15}, { 0, 5,15}, { 0, 3,15}, { 0, 3, 8},
294  { 0, 3,15}, { 0, 3, 8}, { 0, 8,15}, { 0,15, 3},
295  { 0, 3,15}, { 0, 3, 8}, { 0, 6,15}, { 0,10, 8},
296  { 0, 5, 3}, { 0, 8,15}, { 0, 8, 6}, { 0, 6,10},
297  { 0, 8,15}, { 0, 5,15}, { 0,15,10}, { 0,15, 8},
298  { 0, 8,15}, { 0,15, 3}, { 0, 3,15}, { 0, 5,10},
299  { 0, 6,10}, { 0,10, 8}, { 0, 8, 9}, { 0,15,10},
300  { 0,15, 6}, { 0, 3,15}, { 0,15, 8}, { 0, 5,15},
301  { 0,15, 3}, { 0,15, 6}, { 0,15, 6}, { 0,15, 8},
302  { 0, 3,15}, { 0,15, 3}, { 0, 5,15}, { 0, 5,15},
303  { 0, 5,15}, { 0, 8,15}, { 0, 5,15}, { 0,10,15},
304  { 0, 5,15}, { 0,10,15}, { 0, 8,15}, { 0,13,15},
305  { 0,15, 3}, { 0,12,15}, { 0, 3,15}, { 0, 3, 8}
306  }
307 };
308 
309 // BC6H Compression
310 const D3DX_BC6H::ModeDescriptor D3DX_BC6H::ms_aDesc[14][82] =
311 {
312  { // Mode 1 (0x00) - 10 5 5 5
313  { M, 0}, { M, 1}, {GY, 4}, {BY, 4}, {BZ, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
314  {RW, 5}, {RW, 6}, {RW, 7}, {RW, 8}, {RW, 9}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
315  {GW, 5}, {GW, 6}, {GW, 7}, {GW, 8}, {GW, 9}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
316  {BW, 5}, {BW, 6}, {BW, 7}, {BW, 8}, {BW, 9}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RX, 4},
317  {GZ, 4}, {GY, 0}, {GY, 1}, {GY, 2}, {GY, 3}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GX, 4},
318  {BZ, 0}, {GZ, 0}, {GZ, 1}, {GZ, 2}, {GZ, 3}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BX, 4},
319  {BZ, 1}, {BY, 0}, {BY, 1}, {BY, 2}, {BY, 3}, {RY, 0}, {RY, 1}, {RY, 2}, {RY, 3}, {RY, 4},
320  {BZ, 2}, {RZ, 0}, {RZ, 1}, {RZ, 2}, {RZ, 3}, {RZ, 4}, {BZ, 3}, { D, 0}, { D, 1}, { D, 2},
321  { D, 3}, { D, 4},
322  },
323 
324  { // Mode 2 (0x01) - 7 6 6 6
325  { M, 0}, { M, 1}, {GY, 5}, {GZ, 4}, {GZ, 5}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
326  {RW, 5}, {RW, 6}, {BZ, 0}, {BZ, 1}, {BY, 4}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
327  {GW, 5}, {GW, 6}, {BY, 5}, {BZ, 2}, {GY, 4}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
328  {BW, 5}, {BW, 6}, {BZ, 3}, {BZ, 5}, {BZ, 4}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RX, 4},
329  {RX, 5}, {GY, 0}, {GY, 1}, {GY, 2}, {GY, 3}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GX, 4},
330  {GX, 5}, {GZ, 0}, {GZ, 1}, {GZ, 2}, {GZ, 3}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BX, 4},
331  {BX, 5}, {BY, 0}, {BY, 1}, {BY, 2}, {BY, 3}, {RY, 0}, {RY, 1}, {RY, 2}, {RY, 3}, {RY, 4},
332  {RY, 5}, {RZ, 0}, {RZ, 1}, {RZ, 2}, {RZ, 3}, {RZ, 4}, {RZ, 5}, { D, 0}, { D, 1}, { D, 2},
333  { D, 3}, { D, 4},
334  },
335 
336  { // Mode 3 (0x02) - 11 5 4 4
337  { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
338  {RW, 5}, {RW, 6}, {RW, 7}, {RW, 8}, {RW, 9}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
339  {GW, 5}, {GW, 6}, {GW, 7}, {GW, 8}, {GW, 9}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
340  {BW, 5}, {BW, 6}, {BW, 7}, {BW, 8}, {BW, 9}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RX, 4},
341  {RW,10}, {GY, 0}, {GY, 1}, {GY, 2}, {GY, 3}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GW,10},
342  {BZ, 0}, {GZ, 0}, {GZ, 1}, {GZ, 2}, {GZ, 3}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BW,10},
343  {BZ, 1}, {BY, 0}, {BY, 1}, {BY, 2}, {BY, 3}, {RY, 0}, {RY, 1}, {RY, 2}, {RY, 3}, {RY, 4},
344  {BZ, 2}, {RZ, 0}, {RZ, 1}, {RZ, 2}, {RZ, 3}, {RZ, 4}, {BZ, 3}, { D, 0}, { D, 1}, { D, 2},
345  { D, 3}, { D, 4},
346  },
347 
348  { // Mode 4 (0x06) - 11 4 5 4
349  { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
350  {RW, 5}, {RW, 6}, {RW, 7}, {RW, 8}, {RW, 9}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
351  {GW, 5}, {GW, 6}, {GW, 7}, {GW, 8}, {GW, 9}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
352  {BW, 5}, {BW, 6}, {BW, 7}, {BW, 8}, {BW, 9}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RW,10},
353  {GZ, 4}, {GY, 0}, {GY, 1}, {GY, 2}, {GY, 3}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GX, 4},
354  {GW,10}, {GZ, 0}, {GZ, 1}, {GZ, 2}, {GZ, 3}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BW,10},
355  {BZ, 1}, {BY, 0}, {BY, 1}, {BY, 2}, {BY, 3}, {RY, 0}, {RY, 1}, {RY, 2}, {RY, 3}, {BZ, 0},
356  {BZ, 2}, {RZ, 0}, {RZ, 1}, {RZ, 2}, {RZ, 3}, {GY, 4}, {BZ, 3}, { D, 0}, { D, 1}, { D, 2},
357  { D, 3}, { D, 4},
358  },
359 
360  { // Mode 5 (0x0a) - 11 4 4 5
361  { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
362  {RW, 5}, {RW, 6}, {RW, 7}, {RW, 8}, {RW, 9}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
363  {GW, 5}, {GW, 6}, {GW, 7}, {GW, 8}, {GW, 9}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
364  {BW, 5}, {BW, 6}, {BW, 7}, {BW, 8}, {BW, 9}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RW,10},
365  {BY, 4}, {GY, 0}, {GY, 1}, {GY, 2}, {GY, 3}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GW,10},
366  {BZ, 0}, {GZ, 0}, {GZ, 1}, {GZ, 2}, {GZ, 3}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BX, 4},
367  {BW,10}, {BY, 0}, {BY, 1}, {BY, 2}, {BY, 3}, {RY, 0}, {RY, 1}, {RY, 2}, {RY, 3}, {BZ, 1},
368  {BZ, 2}, {RZ, 0}, {RZ, 1}, {RZ, 2}, {RZ, 3}, {BZ, 4}, {BZ, 3}, { D, 0}, { D, 1}, { D, 2},
369  { D, 3}, { D, 4},
370  },
371 
372  { // Mode 6 (0x0e) - 9 5 5 5
373  { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
374  {RW, 5}, {RW, 6}, {RW, 7}, {RW, 8}, {BY, 4}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
375  {GW, 5}, {GW, 6}, {GW, 7}, {GW, 8}, {GY, 4}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
376  {BW, 5}, {BW, 6}, {BW, 7}, {BW, 8}, {BZ, 4}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RX, 4},
377  {GZ, 4}, {GY, 0}, {GY, 1}, {GY, 2}, {GY, 3}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GX, 4},
378  {BZ, 0}, {GZ, 0}, {GZ, 1}, {GZ, 2}, {GZ, 3}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BX, 4},
379  {BZ, 1}, {BY, 0}, {BY, 1}, {BY, 2}, {BY, 3}, {RY, 0}, {RY, 1}, {RY, 2}, {RY, 3}, {RY, 4},
380  {BZ, 2}, {RZ, 0}, {RZ, 1}, {RZ, 2}, {RZ, 3}, {RZ, 4}, {BZ, 3}, { D, 0}, { D, 1}, { D, 2},
381  { D, 3}, { D, 4},
382  },
383 
384  { // Mode 7 (0x12) - 8 6 5 5
385  { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
386  {RW, 5}, {RW, 6}, {RW, 7}, {GZ, 4}, {BY, 4}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
387  {GW, 5}, {GW, 6}, {GW, 7}, {BZ, 2}, {GY, 4}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
388  {BW, 5}, {BW, 6}, {BW, 7}, {BZ, 3}, {BZ, 4}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RX, 4},
389  {RX, 5}, {GY, 0}, {GY, 1}, {GY, 2}, {GY, 3}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GX, 4},
390  {BZ, 0}, {GZ, 0}, {GZ, 1}, {GZ, 2}, {GZ, 3}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BX, 4},
391  {BZ, 1}, {BY, 0}, {BY, 1}, {BY, 2}, {BY, 3}, {RY, 0}, {RY, 1}, {RY, 2}, {RY, 3}, {RY, 4},
392  {RY, 5}, {RZ, 0}, {RZ, 1}, {RZ, 2}, {RZ, 3}, {RZ, 4}, {RZ, 5}, { D, 0}, { D, 1}, { D, 2},
393  { D, 3}, { D, 4},
394  },
395 
396  { // Mode 8 (0x16) - 8 5 6 5
397  { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
398  {RW, 5}, {RW, 6}, {RW, 7}, {BZ, 0}, {BY, 4}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
399  {GW, 5}, {GW, 6}, {GW, 7}, {GY, 5}, {GY, 4}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
400  {BW, 5}, {BW, 6}, {BW, 7}, {GZ, 5}, {BZ, 4}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RX, 4},
401  {GZ, 4}, {GY, 0}, {GY, 1}, {GY, 2}, {GY, 3}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GX, 4},
402  {GX, 5}, {GZ, 0}, {GZ, 1}, {GZ, 2}, {GZ, 3}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BX, 4},
403  {BZ, 1}, {BY, 0}, {BY, 1}, {BY, 2}, {BY, 3}, {RY, 0}, {RY, 1}, {RY, 2}, {RY, 3}, {RY, 4},
404  {BZ, 2}, {RZ, 0}, {RZ, 1}, {RZ, 2}, {RZ, 3}, {RZ, 4}, {BZ, 3}, { D, 0}, { D, 1}, { D, 2},
405  { D, 3}, { D, 4},
406  },
407 
408  { // Mode 9 (0x1a) - 8 5 5 6
409  { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
410  {RW, 5}, {RW, 6}, {RW, 7}, {BZ, 1}, {BY, 4}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
411  {GW, 5}, {GW, 6}, {GW, 7}, {BY, 5}, {GY, 4}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
412  {BW, 5}, {BW, 6}, {BW, 7}, {BZ, 5}, {BZ, 4}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RX, 4},
413  {GZ, 4}, {GY, 0}, {GY, 1}, {GY, 2}, {GY, 3}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GX, 4},
414  {BZ, 0}, {GZ, 0}, {GZ, 1}, {GZ, 2}, {GZ, 3}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BX, 4},
415  {BX, 5}, {BY, 0}, {BY, 1}, {BY, 2}, {BY, 3}, {RY, 0}, {RY, 1}, {RY, 2}, {RY, 3}, {RY, 4},
416  {BZ, 2}, {RZ, 0}, {RZ, 1}, {RZ, 2}, {RZ, 3}, {RZ, 4}, {BZ, 3}, { D, 0}, { D, 1}, { D, 2},
417  { D, 3}, { D, 4},
418  },
419 
420  { // Mode 10 (0x1e) - 6 6 6 6
421  { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
422  {RW, 5}, {GZ, 4}, {BZ, 0}, {BZ, 1}, {BY, 4}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
423  {GW, 5}, {GY, 5}, {BY, 5}, {BZ, 2}, {GY, 4}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
424  {BW, 5}, {GZ, 5}, {BZ, 3}, {BZ, 5}, {BZ, 4}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RX, 4},
425  {RX, 5}, {GY, 0}, {GY, 1}, {GY, 2}, {GY, 3}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GX, 4},
426  {GX, 5}, {GZ, 0}, {GZ, 1}, {GZ, 2}, {GZ, 3}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BX, 4},
427  {BX, 5}, {BY, 0}, {BY, 1}, {BY, 2}, {BY, 3}, {RY, 0}, {RY, 1}, {RY, 2}, {RY, 3}, {RY, 4},
428  {RY, 5}, {RZ, 0}, {RZ, 1}, {RZ, 2}, {RZ, 3}, {RZ, 4}, {RZ, 5}, { D, 0}, { D, 1}, { D, 2},
429  { D, 3}, { D, 4},
430  },
431 
432  { // Mode 11 (0x03) - 10 10
433  { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
434  {RW, 5}, {RW, 6}, {RW, 7}, {RW, 8}, {RW, 9}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
435  {GW, 5}, {GW, 6}, {GW, 7}, {GW, 8}, {GW, 9}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
436  {BW, 5}, {BW, 6}, {BW, 7}, {BW, 8}, {BW, 9}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RX, 4},
437  {RX, 5}, {RX, 6}, {RX, 7}, {RX, 8}, {RX, 9}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GX, 4},
438  {GX, 5}, {GX, 6}, {GX, 7}, {GX, 8}, {GX, 9}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BX, 4},
439  {BX, 5}, {BX, 6}, {BX, 7}, {BX, 8}, {BX, 9}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0},
440  {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0},
441  {NA, 0}, {NA, 0},
442  },
443 
444  { // Mode 12 (0x07) - 11 9
445  { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
446  {RW, 5}, {RW, 6}, {RW, 7}, {RW, 8}, {RW, 9}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
447  {GW, 5}, {GW, 6}, {GW, 7}, {GW, 8}, {GW, 9}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
448  {BW, 5}, {BW, 6}, {BW, 7}, {BW, 8}, {BW, 9}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RX, 4},
449  {RX, 5}, {RX, 6}, {RX, 7}, {RX, 8}, {RW,10}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GX, 4},
450  {GX, 5}, {GX, 6}, {GX, 7}, {GX, 8}, {GW,10}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BX, 4},
451  {BX, 5}, {BX, 6}, {BX, 7}, {BX, 8}, {BW,10}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0},
452  {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0},
453  {NA, 0}, {NA, 0},
454  },
455 
456  { // Mode 13 (0x0b) - 12 8
457  { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
458  {RW, 5}, {RW, 6}, {RW, 7}, {RW, 8}, {RW, 9}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
459  {GW, 5}, {GW, 6}, {GW, 7}, {GW, 8}, {GW, 9}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
460  {BW, 5}, {BW, 6}, {BW, 7}, {BW, 8}, {BW, 9}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RX, 4},
461  {RX, 5}, {RX, 6}, {RX, 7}, {RW,11}, {RW,10}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GX, 4},
462  {GX, 5}, {GX, 6}, {GX, 7}, {GW,11}, {GW,10}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BX, 4},
463  {BX, 5}, {BX, 6}, {BX, 7}, {BW,11}, {BW,10}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0},
464  {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0},
465  {NA, 0}, {NA, 0},
466  },
467 
468  { // Mode 14 (0x0f) - 16 4
469  { M, 0}, { M, 1}, { M, 2}, { M, 3}, { M, 4}, {RW, 0}, {RW, 1}, {RW, 2}, {RW, 3}, {RW, 4},
470  {RW, 5}, {RW, 6}, {RW, 7}, {RW, 8}, {RW, 9}, {GW, 0}, {GW, 1}, {GW, 2}, {GW, 3}, {GW, 4},
471  {GW, 5}, {GW, 6}, {GW, 7}, {GW, 8}, {GW, 9}, {BW, 0}, {BW, 1}, {BW, 2}, {BW, 3}, {BW, 4},
472  {BW, 5}, {BW, 6}, {BW, 7}, {BW, 8}, {BW, 9}, {RX, 0}, {RX, 1}, {RX, 2}, {RX, 3}, {RW,15},
473  {RW,14}, {RW,13}, {RW,12}, {RW,11}, {RW,10}, {GX, 0}, {GX, 1}, {GX, 2}, {GX, 3}, {GW,15},
474  {GW,14}, {GW,13}, {GW,12}, {GW,11}, {GW,10}, {BX, 0}, {BX, 1}, {BX, 2}, {BX, 3}, {BW,15},
475  {BW,14}, {BW,13}, {BW,12}, {BW,11}, {BW,10}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0},
476  {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0}, {NA, 0},
477  {NA, 0}, {NA, 0},
478  },
479 };
480 
481 // Mode, Partitions, Transformed, IndexPrec, RGBAPrec
482 const D3DX_BC6H::ModeInfo D3DX_BC6H::ms_aInfo[] =
483 {
484  {0x00, 1, true, 3, LDRColorA(10,10,10,0), LDRColorA( 5, 5, 5,0), LDRColorA(5,5,5,0), LDRColorA(5,5,5,0)}, // Mode 1
485  {0x01, 1, true, 3, LDRColorA( 7, 7, 7,0), LDRColorA( 6, 6, 6,0), LDRColorA(6,6,6,0), LDRColorA(6,6,6,0)}, // Mode 2
486  {0x02, 1, true, 3, LDRColorA(11,11,11,0), LDRColorA( 5, 4, 4,0), LDRColorA(5,4,4,0), LDRColorA(5,4,4,0)}, // Mode 3
487  {0x06, 1, true, 3, LDRColorA(11,11,11,0), LDRColorA( 4, 5, 4,0), LDRColorA(4,5,4,0), LDRColorA(4,5,4,0)}, // Mode 4
488  {0x0a, 1, true, 3, LDRColorA(11,11,11,0), LDRColorA( 4, 4, 5,0), LDRColorA(4,4,5,0), LDRColorA(4,4,5,0)}, // Mode 5
489  {0x0e, 1, true, 3, LDRColorA( 9, 9, 9,0), LDRColorA( 5, 5, 5,0), LDRColorA(5,5,5,0), LDRColorA(5,5,5,0)}, // Mode 6
490  {0x12, 1, true, 3, LDRColorA( 8, 8, 8,0), LDRColorA( 6, 5, 5,0), LDRColorA(6,5,5,0), LDRColorA(6,5,5,0)}, // Mode 7
491  {0x16, 1, true, 3, LDRColorA( 8, 8, 8,0), LDRColorA( 5, 6, 5,0), LDRColorA(5,6,5,0), LDRColorA(5,6,5,0)}, // Mode 8
492  {0x1a, 1, true, 3, LDRColorA( 8, 8, 8,0), LDRColorA( 5, 5, 6,0), LDRColorA(5,5,6,0), LDRColorA(5,5,6,0)}, // Mode 9
493  {0x1e, 1, false, 3, LDRColorA( 6, 6, 6,0), LDRColorA( 6, 6, 6,0), LDRColorA(6,6,6,0), LDRColorA(6,6,6,0)}, // Mode 10
494  {0x03, 0, false, 4, LDRColorA(10,10,10,0), LDRColorA(10,10,10,0), LDRColorA(0,0,0,0), LDRColorA(0,0,0,0)}, // Mode 11
495  {0x07, 0, true, 4, LDRColorA(11,11,11,0), LDRColorA( 9, 9, 9,0), LDRColorA(0,0,0,0), LDRColorA(0,0,0,0)}, // Mode 12
496  {0x0b, 0, true, 4, LDRColorA(12,12,12,0), LDRColorA( 8, 8, 8,0), LDRColorA(0,0,0,0), LDRColorA(0,0,0,0)}, // Mode 13
497  {0x0f, 0, true, 4, LDRColorA(16,16,16,0), LDRColorA( 4, 4, 4,0), LDRColorA(0,0,0,0), LDRColorA(0,0,0,0)}, // Mode 14
498 };
499 
500 const int D3DX_BC6H::ms_aModeToInfo[] =
501 {
502  0, // Mode 1 - 0x00
503  1, // Mode 2 - 0x01
504  2, // Mode 3 - 0x02
505  10, // Mode 11 - 0x03
506  -1, // Invalid - 0x04
507  -1, // Invalid - 0x05
508  3, // Mode 4 - 0x06
509  11, // Mode 12 - 0x07
510  -1, // Invalid - 0x08
511  -1, // Invalid - 0x09
512  4, // Mode 5 - 0x0a
513  12, // Mode 13 - 0x0b
514  -1, // Invalid - 0x0c
515  -1, // Invalid - 0x0d
516  5, // Mode 6 - 0x0e
517  13, // Mode 14 - 0x0f
518  -1, // Invalid - 0x10
519  -1, // Invalid - 0x11
520  6, // Mode 7 - 0x12
521  -1, // Reserved - 0x13
522  -1, // Invalid - 0x14
523  -1, // Invalid - 0x15
524  7, // Mode 8 - 0x16
525  -1, // Reserved - 0x17
526  -1, // Invalid - 0x18
527  -1, // Invalid - 0x19
528  8, // Mode 9 - 0x1a
529  -1, // Reserved - 0x1b
530  -1, // Invalid - 0x1c
531  -1, // Invalid - 0x1d
532  9, // Mode 10 - 0x1e
533  -1, // Resreved - 0x1f
534 };
535 
536 // BC7 compression: uPartitions, uPartitionBits, uPBits, uRotationBits, uIndexModeBits, uIndexPrec, uIndexPrec2, RGBAPrec, RGBAPrecWithP
537 const D3DX_BC7::ModeInfo D3DX_BC7::ms_aInfo[] =
538 {
539  {2, 4, 6, 0, 0, 3, 0, LDRColorA(4,4,4,0), LDRColorA(5,5,5,0)},
540  // Mode 0: Color only, 3 Subsets, RGBP 4441 (unique P-bit), 3-bit indecies, 16 partitions
541  {1, 6, 2, 0, 0, 3, 0, LDRColorA(6,6,6,0), LDRColorA(7,7,7,0)},
542  // Mode 1: Color only, 2 Subsets, RGBP 6661 (shared P-bit), 3-bit indecies, 64 partitions
543  {2, 6, 0, 0, 0, 2, 0, LDRColorA(5,5,5,0), LDRColorA(5,5,5,0)},
544  // Mode 2: Color only, 3 Subsets, RGB 555, 2-bit indecies, 64 partitions
545  {1, 6, 4, 0, 0, 2, 0, LDRColorA(7,7,7,0), LDRColorA(8,8,8,0)},
546  // Mode 3: Color only, 2 Subsets, RGBP 7771 (unique P-bit), 2-bits indecies, 64 partitions
547  {0, 0, 0, 2, 1, 2, 3, LDRColorA(5,5,5,6), LDRColorA(5,5,5,6)},
548  // Mode 4: Color w/ Separate Alpha, 1 Subset, RGB 555, A6, 16x2/16x3-bit indices, 2-bit rotation, 1-bit index selector
549  {0, 0, 0, 2, 0, 2, 2, LDRColorA(7,7,7,8), LDRColorA(7,7,7,8)},
550  // Mode 5: Color w/ Separate Alpha, 1 Subset, RGB 777, A8, 16x2/16x2-bit indices, 2-bit rotation
551  {0, 0, 2, 0, 0, 4, 0, LDRColorA(7,7,7,7), LDRColorA(8,8,8,8)},
552  // Mode 6: Color+Alpha, 1 Subset, RGBAP 77771 (unique P-bit), 16x4-bit indecies
553  {1, 6, 4, 0, 0, 2, 0, LDRColorA(5,5,5,5), LDRColorA(6,6,6,6)}
554  // Mode 7: Color+Alpha, 2 Subsets, RGBAP 55551 (unique P-bit), 2-bit indices, 64 partitions
555 };
556 
557 
558 //-------------------------------------------------------------------------------------
559 // Helper functions
560 //-------------------------------------------------------------------------------------
561 inline static bool IsFixUpOffset(_In_range_(0,2) size_t uPartitions, _In_range_(0,63) size_t uShape, _In_range_(0,15) size_t uOffset)
562 {
563  assert(uPartitions < 3 && uShape < 64 && uOffset < 16);
564  _Analysis_assume_(uPartitions < 3 && uShape < 64 && uOffset < 16);
565  for(size_t p = 0; p <= uPartitions; p++)
566  {
567  if(uOffset == g_aFixUp[uPartitions][uShape][p])
568  {
569  return true;
570  }
571  }
572  return false;
573 }
574 
576 {
577  aEndPts[0].B -= aEndPts[0].A;
578  aEndPts[1].A -= aEndPts[0].A;
579  aEndPts[1].B -= aEndPts[0].A;
580 }
581 
582 inline static void TransformInverse(_Inout_updates_all_(BC6H_MAX_REGIONS) INTEndPntPair aEndPts[], _In_ const LDRColorA& Prec, _In_ bool bSigned)
583 {
584  INTColor WrapMask((1 << Prec.r) - 1, (1 << Prec.g) - 1, (1 << Prec.b) - 1);
585  aEndPts[0].B += aEndPts[0].A; aEndPts[0].B &= WrapMask;
586  aEndPts[1].A += aEndPts[0].A; aEndPts[1].A &= WrapMask;
587  aEndPts[1].B += aEndPts[0].A; aEndPts[1].B &= WrapMask;
588  if(bSigned)
589  {
590  aEndPts[0].B.SignExtend(Prec);
591  aEndPts[1].A.SignExtend(Prec);
592  aEndPts[1].B.SignExtend(Prec);
593  }
594 }
595 
596 inline static float Norm(_In_ const INTColor& a, _In_ const INTColor& b)
597 {
598  float dr = float(a.r) - float(b.r);
599  float dg = float(a.g) - float(b.g);
600  float db = float(a.b) - float(b.b);
601  return dr * dr + dg * dg + db * db;
602 }
603 
604 // return # of bits needed to store n. handle signed or unsigned cases properly
605 inline static int NBits(_In_ int n, _In_ bool bIsSigned)
606 {
607  int nb;
608  if(n == 0)
609  {
610  return 0; // no bits needed for 0, signed or not
611  }
612  else if(n > 0)
613  {
614  for(nb = 0; n; ++nb, n >>= 1);
615  return nb + (bIsSigned ? 1 : 0);
616  }
617  else
618  {
619  assert(bIsSigned);
620  for(nb = 0; n < -1; ++nb, n >>= 1) ;
621  return nb + 1;
622  }
623 }
624 
625 
626 //-------------------------------------------------------------------------------------
627 static float OptimizeRGB(_In_reads_(NUM_PIXELS_PER_BLOCK) const HDRColorA* const pPoints,
628  _Out_ HDRColorA* pX, _Out_ HDRColorA* pY,
629  _In_ size_t cSteps, _In_ size_t cPixels, _In_reads_(cPixels) const size_t* pIndex)
630 {
631  float fError = FLT_MAX;
632  const float *pC = (3 == cSteps) ? pC3 : pC4;
633  const float *pD = (3 == cSteps) ? pD3 : pD4;
634 
635  // Find Min and Max points, as starting point
636  HDRColorA X(1.0f, 1.0f, 1.0f, 0.0f);
637  HDRColorA Y(0.0f, 0.0f, 0.0f, 0.0f);
638 
639  for(size_t iPoint = 0; iPoint < cPixels; iPoint++)
640  {
641  if(pPoints[pIndex[iPoint]].r < X.r) X.r = pPoints[pIndex[iPoint]].r;
642  if(pPoints[pIndex[iPoint]].g < X.g) X.g = pPoints[pIndex[iPoint]].g;
643  if(pPoints[pIndex[iPoint]].b < X.b) X.b = pPoints[pIndex[iPoint]].b;
644  if(pPoints[pIndex[iPoint]].r > Y.r) Y.r = pPoints[pIndex[iPoint]].r;
645  if(pPoints[pIndex[iPoint]].g > Y.g) Y.g = pPoints[pIndex[iPoint]].g;
646  if(pPoints[pIndex[iPoint]].b > Y.b) Y.b = pPoints[pIndex[iPoint]].b;
647  }
648 
649  // Diagonal axis
650  HDRColorA AB;
651  AB.r = Y.r - X.r;
652  AB.g = Y.g - X.g;
653  AB.b = Y.b - X.b;
654 
655  float fAB = AB.r * AB.r + AB.g * AB.g + AB.b * AB.b;
656 
657  // Single color block.. no need to root-find
658  if(fAB < FLT_MIN)
659  {
660  pX->r = X.r; pX->g = X.g; pX->b = X.b;
661  pY->r = Y.r; pY->g = Y.g; pY->b = Y.b;
662  return 0.0f;
663  }
664 
665  // Try all four axis directions, to determine which diagonal best fits data
666  float fABInv = 1.0f / fAB;
667 
668  HDRColorA Dir;
669  Dir.r = AB.r * fABInv;
670  Dir.g = AB.g * fABInv;
671  Dir.b = AB.b * fABInv;
672 
673  HDRColorA Mid;
674  Mid.r = (X.r + Y.r) * 0.5f;
675  Mid.g = (X.g + Y.g) * 0.5f;
676  Mid.b = (X.b + Y.b) * 0.5f;
677 
678  float fDir[4];
679  fDir[0] = fDir[1] = fDir[2] = fDir[3] = 0.0f;
680 
681  for(size_t iPoint = 0; iPoint < cPixels; iPoint++)
682  {
683  HDRColorA Pt;
684  Pt.r = (pPoints[pIndex[iPoint]].r - Mid.r) * Dir.r;
685  Pt.g = (pPoints[pIndex[iPoint]].g - Mid.g) * Dir.g;
686  Pt.b = (pPoints[pIndex[iPoint]].b - Mid.b) * Dir.b;
687 
688  float f;
689  f = Pt.r + Pt.g + Pt.b; fDir[0] += f * f;
690  f = Pt.r + Pt.g - Pt.b; fDir[1] += f * f;
691  f = Pt.r - Pt.g + Pt.b; fDir[2] += f * f;
692  f = Pt.r - Pt.g - Pt.b; fDir[3] += f * f;
693  }
694 
695  float fDirMax = fDir[0];
696  size_t iDirMax = 0;
697 
698  for(size_t iDir = 1; iDir < 4; iDir++)
699  {
700  if(fDir[iDir] > fDirMax)
701  {
702  fDirMax = fDir[iDir];
703  iDirMax = iDir;
704  }
705  }
706 
707  if(iDirMax & 2) std::swap( X.g, Y.g );
708  if(iDirMax & 1) std::swap( X.b, Y.b );
709 
710  // Two color block.. no need to root-find
711  if(fAB < 1.0f / 4096.0f)
712  {
713  pX->r = X.r; pX->g = X.g; pX->b = X.b;
714  pY->r = Y.r; pY->g = Y.g; pY->b = Y.b;
715  return 0.0f;
716  }
717 
718  // Use Newton's Method to find local minima of sum-of-squares error.
719  float fSteps = (float) (cSteps - 1);
720 
721  for(size_t iIteration = 0; iIteration < 8; iIteration++)
722  {
723  // Calculate new steps
724  HDRColorA pSteps[4];
725 
726  for(size_t iStep = 0; iStep < cSteps; iStep++)
727  {
728  pSteps[iStep].r = X.r * pC[iStep] + Y.r * pD[iStep];
729  pSteps[iStep].g = X.g * pC[iStep] + Y.g * pD[iStep];
730  pSteps[iStep].b = X.b * pC[iStep] + Y.b * pD[iStep];
731  }
732 
733  // Calculate color direction
734  Dir.r = Y.r - X.r;
735  Dir.g = Y.g - X.g;
736  Dir.b = Y.b - X.b;
737 
738  float fLen = (Dir.r * Dir.r + Dir.g * Dir.g + Dir.b * Dir.b);
739 
740  if(fLen < (1.0f / 4096.0f))
741  break;
742 
743  float fScale = fSteps / fLen;
744 
745  Dir.r *= fScale;
746  Dir.g *= fScale;
747  Dir.b *= fScale;
748 
749  // Evaluate function, and derivatives
750  float d2X = 0.0f, d2Y = 0.0f;
751  HDRColorA dX(0.0f, 0.0f, 0.0f, 0.0f), dY(0.0f, 0.0f, 0.0f, 0.0f);
752 
753  for(size_t iPoint = 0; iPoint < cPixels; iPoint++)
754  {
755  float fDot = (pPoints[pIndex[iPoint]].r - X.r) * Dir.r +
756  (pPoints[pIndex[iPoint]].g - X.g) * Dir.g +
757  (pPoints[pIndex[iPoint]].b - X.b) * Dir.b;
758 
759  size_t iStep;
760  if(fDot <= 0.0f)
761  iStep = 0;
762  if(fDot >= fSteps)
763  iStep = cSteps - 1;
764  else
765  iStep = size_t(fDot + 0.5f);
766 
767  HDRColorA Diff;
768  Diff.r = pSteps[iStep].r - pPoints[pIndex[iPoint]].r;
769  Diff.g = pSteps[iStep].g - pPoints[pIndex[iPoint]].g;
770  Diff.b = pSteps[iStep].b - pPoints[pIndex[iPoint]].b;
771 
772  float fC = pC[iStep] * (1.0f / 8.0f);
773  float fD = pD[iStep] * (1.0f / 8.0f);
774 
775  d2X += fC * pC[iStep];
776  dX.r += fC * Diff.r;
777  dX.g += fC * Diff.g;
778  dX.b += fC * Diff.b;
779 
780  d2Y += fD * pD[iStep];
781  dY.r += fD * Diff.r;
782  dY.g += fD * Diff.g;
783  dY.b += fD * Diff.b;
784  }
785 
786  // Move endpoints
787  if(d2X > 0.0f)
788  {
789  float f = -1.0f / d2X;
790 
791  X.r += dX.r * f;
792  X.g += dX.g * f;
793  X.b += dX.b * f;
794  }
795 
796  if(d2Y > 0.0f)
797  {
798  float f = -1.0f / d2Y;
799 
800  Y.r += dY.r * f;
801  Y.g += dY.g * f;
802  Y.b += dY.b * f;
803  }
804 
805  if((dX.r * dX.r < fEpsilon) && (dX.g * dX.g < fEpsilon) && (dX.b * dX.b < fEpsilon) &&
806  (dY.r * dY.r < fEpsilon) && (dY.g * dY.g < fEpsilon) && (dY.b * dY.b < fEpsilon))
807  {
808  break;
809  }
810  }
811 
812  pX->r = X.r; pX->g = X.g; pX->b = X.b;
813  pY->r = Y.r; pY->g = Y.g; pY->b = Y.b;
814  return fError;
815 }
816 
817 
818 //-------------------------------------------------------------------------------------
819 static float OptimizeRGBA(_In_reads_(NUM_PIXELS_PER_BLOCK) const HDRColorA* const pPoints,
820  _Out_ HDRColorA* pX, _Out_ HDRColorA* pY,
821  _In_ size_t cSteps, _In_ size_t cPixels, _In_reads_(cPixels) const size_t* pIndex)
822 {
823  float fError = FLT_MAX;
824  const float *pC = (3 == cSteps) ? pC3 : pC4;
825  const float *pD = (3 == cSteps) ? pD3 : pD4;
826 
827  // Find Min and Max points, as starting point
828  HDRColorA X(1.0f, 1.0f, 1.0f, 1.0f);
829  HDRColorA Y(0.0f, 0.0f, 0.0f, 0.0f);
830 
831  for(size_t iPoint = 0; iPoint < cPixels; iPoint++)
832  {
833  if(pPoints[pIndex[iPoint]].r < X.r) X.r = pPoints[pIndex[iPoint]].r;
834  if(pPoints[pIndex[iPoint]].g < X.g) X.g = pPoints[pIndex[iPoint]].g;
835  if(pPoints[pIndex[iPoint]].b < X.b) X.b = pPoints[pIndex[iPoint]].b;
836  if(pPoints[pIndex[iPoint]].a < X.a) X.a = pPoints[pIndex[iPoint]].a;
837  if(pPoints[pIndex[iPoint]].r > Y.r) Y.r = pPoints[pIndex[iPoint]].r;
838  if(pPoints[pIndex[iPoint]].g > Y.g) Y.g = pPoints[pIndex[iPoint]].g;
839  if(pPoints[pIndex[iPoint]].b > Y.b) Y.b = pPoints[pIndex[iPoint]].b;
840  if(pPoints[pIndex[iPoint]].a > Y.a) Y.a = pPoints[pIndex[iPoint]].a;
841  }
842 
843  // Diagonal axis
844  HDRColorA AB = Y - X;
845  float fAB = AB * AB;
846 
847  // Single color block.. no need to root-find
848  if(fAB < FLT_MIN)
849  {
850  *pX = X;
851  *pY = Y;
852  return 0.0f;
853  }
854 
855  // Try all four axis directions, to determine which diagonal best fits data
856  float fABInv = 1.0f / fAB;
857  HDRColorA Dir = AB * fABInv;
858  HDRColorA Mid = (X + Y) * 0.5f;
859 
860  float fDir[8];
861  fDir[0] = fDir[1] = fDir[2] = fDir[3] = fDir[4] = fDir[5] = fDir[6] = fDir[7] = 0.0f;
862 
863  for(size_t iPoint = 0; iPoint < cPixels; iPoint++)
864  {
865  HDRColorA Pt;
866  Pt.r = (pPoints[pIndex[iPoint]].r - Mid.r) * Dir.r;
867  Pt.g = (pPoints[pIndex[iPoint]].g - Mid.g) * Dir.g;
868  Pt.b = (pPoints[pIndex[iPoint]].b - Mid.b) * Dir.b;
869  Pt.a = (pPoints[pIndex[iPoint]].a - Mid.a) * Dir.a;
870 
871  float f;
872  f = Pt.r + Pt.g + Pt.b + Pt.a; fDir[0] += f * f;
873  f = Pt.r + Pt.g + Pt.b - Pt.a; fDir[1] += f * f;
874  f = Pt.r + Pt.g - Pt.b + Pt.a; fDir[2] += f * f;
875  f = Pt.r + Pt.g - Pt.b - Pt.a; fDir[3] += f * f;
876  f = Pt.r - Pt.g + Pt.b + Pt.a; fDir[4] += f * f;
877  f = Pt.r - Pt.g + Pt.b - Pt.a; fDir[5] += f * f;
878  f = Pt.r - Pt.g - Pt.b + Pt.a; fDir[6] += f * f;
879  f = Pt.r - Pt.g - Pt.b - Pt.a; fDir[7] += f * f;
880  }
881 
882  float fDirMax = fDir[0];
883  size_t iDirMax = 0;
884 
885  for(size_t iDir = 1; iDir < 8; iDir++)
886  {
887  if(fDir[iDir] > fDirMax)
888  {
889  fDirMax = fDir[iDir];
890  iDirMax = iDir;
891  }
892  }
893 
894  if(iDirMax & 4) std::swap(X.g, Y.g);
895  if(iDirMax & 2) std::swap(X.b, Y.b);
896  if(iDirMax & 1) std::swap(X.a, Y.a);
897 
898  // Two color block.. no need to root-find
899  if(fAB < 1.0f / 4096.0f)
900  {
901  *pX = X;
902  *pY = Y;
903  return 0.0f;
904  }
905 
906  // Use Newton's Method to find local minima of sum-of-squares error.
907  float fSteps = (float) (cSteps - 1);
908 
909  for(size_t iIteration = 0; iIteration < 8 && fError > 0.0f; iIteration++)
910  {
911  // Calculate new steps
912  HDRColorA pSteps[BC7_MAX_INDICES];
913 
914  LDRColorA aSteps[BC7_MAX_INDICES];
915  LDRColorA lX, lY;
916  lX = (X * 255.0f).ToLDRColorA();
917  lY = (Y * 255.0f).ToLDRColorA();
918 
919  for(size_t iStep = 0; iStep < cSteps; iStep++)
920  {
921  pSteps[iStep] = X * pC[iStep] + Y * pD[iStep];
922  //LDRColorA::Interpolate(lX, lY, i, i, wcprec, waprec, aSteps[i]);
923  }
924 
925  // Calculate color direction
926  Dir = Y - X;
927  float fLen = Dir * Dir;
928  if(fLen < (1.0f / 4096.0f))
929  break;
930 
931  float fScale = fSteps / fLen;
932  Dir *= fScale;
933 
934  // Evaluate function, and derivatives
935  float d2X = 0.0f, d2Y = 0.0f;
936  HDRColorA dX(0.0f, 0.0f, 0.0f, 0.0f), dY(0.0f, 0.0f, 0.0f, 0.0f);
937 
938  for(size_t iPoint = 0; iPoint < cPixels; ++iPoint)
939  {
940  float fDot = (pPoints[pIndex[iPoint]] - X) * Dir;
941  size_t iStep;
942  if(fDot <= 0.0f)
943  iStep = 0;
944  if(fDot >= fSteps)
945  iStep = cSteps - 1;
946  else
947  iStep = size_t(fDot + 0.5f);
948 
949  HDRColorA Diff = pSteps[iStep] - pPoints[pIndex[iPoint]];
950  float fC = pC[iStep] * (1.0f / 8.0f);
951  float fD = pD[iStep] * (1.0f / 8.0f);
952 
953  d2X += fC * pC[iStep];
954  dX += Diff * fC;
955 
956  d2Y += fD * pD[iStep];
957  dY += Diff * fD;
958  }
959 
960  // Move endpoints
961  if(d2X > 0.0f)
962  {
963  float f = -1.0f / d2X;
964  X += dX * f;
965  }
966 
967  if(d2Y > 0.0f)
968  {
969  float f = -1.0f / d2Y;
970  Y += dY * f;
971  }
972 
973  if((dX * dX < fEpsilon) && (dY * dY < fEpsilon))
974  break;
975  }
976 
977  *pX = X;
978  *pY = Y;
979  return fError;
980 }
981 
982 
983 //-------------------------------------------------------------------------------------
984 
985 static float ComputeError(_Inout_ const LDRColorA& pixel, _In_reads_(1 << uIndexPrec) const LDRColorA aPalette[],
986  _In_ uint8_t uIndexPrec, _In_ uint8_t uIndexPrec2, _Out_opt_ size_t* pBestIndex = nullptr, _Out_opt_ size_t* pBestIndex2 = nullptr)
987 {
988  const size_t uNumIndices = size_t(1) << uIndexPrec;
989  const size_t uNumIndices2 = size_t(1) << uIndexPrec2;
990  float fTotalErr = 0;
991  float fBestErr = FLT_MAX;
992 
993  if(pBestIndex)
994  *pBestIndex = 0;
995  if(pBestIndex2)
996  *pBestIndex2 = 0;
997 
998  XMVECTOR vpixel = XMLoadUByte4( reinterpret_cast<const XMUBYTE4*>( &pixel ) );
999 
1000  if(uIndexPrec2 == 0)
1001  {
1002  for(register size_t i = 0; i < uNumIndices && fBestErr > 0; i++)
1003  {
1004  XMVECTOR tpixel = XMLoadUByte4( reinterpret_cast<const XMUBYTE4*>( &aPalette[i] ) );
1005  // Compute ErrorMetric
1006  tpixel = XMVectorSubtract( vpixel, tpixel );
1007  float fErr = XMVectorGetX( XMVector4Dot( tpixel, tpixel ) );
1008  if(fErr > fBestErr) // error increased, so we're done searching
1009  break;
1010  if(fErr < fBestErr)
1011  {
1012  fBestErr = fErr;
1013  if(pBestIndex)
1014  *pBestIndex = i;
1015  }
1016  }
1017  fTotalErr += fBestErr;
1018  }
1019  else
1020  {
1021  for(register size_t i = 0; i < uNumIndices && fBestErr > 0; i++)
1022  {
1023  XMVECTOR tpixel = XMLoadUByte4( reinterpret_cast<const XMUBYTE4*>( &aPalette[i] ) );
1024  // Compute ErrorMetricRGB
1025  tpixel = XMVectorSubtract( vpixel, tpixel );
1026  float fErr = XMVectorGetX( XMVector3Dot( tpixel, tpixel ) );
1027  if(fErr > fBestErr) // error increased, so we're done searching
1028  break;
1029  if(fErr < fBestErr)
1030  {
1031  fBestErr = fErr;
1032  if(pBestIndex)
1033  *pBestIndex = i;
1034  }
1035  }
1036  fTotalErr += fBestErr;
1037  fBestErr = FLT_MAX;
1038  for(register size_t i = 0; i < uNumIndices2 && fBestErr > 0; i++)
1039  {
1040  // Compute ErrorMetricAlpha
1041  float ea = float(pixel.a) - float(aPalette[i].a);
1042  float fErr = ea*ea;
1043  if(fErr > fBestErr) // error increased, so we're done searching
1044  break;
1045  if(fErr < fBestErr)
1046  {
1047  fBestErr = fErr;
1048  if(pBestIndex2)
1049  *pBestIndex2 = i;
1050  }
1051  }
1052  fTotalErr += fBestErr;
1053  }
1054 
1055  return fTotalErr;
1056 }
1057 
1058 
1060 {
1061  for(size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
1062  {
1063 #ifdef _DEBUG
1064  // Use Magenta in debug as a highly-visible error color
1065  pOut[i] = HDRColorA(1.0f, 0.0f, 1.0f, 1.0f);
1066 #else
1067  // In production use, default to black
1068  pOut[i] = HDRColorA(0.0f, 0.0f, 0.0f, 1.0f);
1069 #endif
1070  }
1071 }
1072 
1073 
1074 //-------------------------------------------------------------------------------------
1075 // BC6H Compression
1076 //-------------------------------------------------------------------------------------
1077 _Use_decl_annotations_
1078 void D3DX_BC6H::Decode(bool bSigned, HDRColorA* pOut) const
1079 {
1080  assert(pOut );
1081 
1082  size_t uStartBit = 0;
1083  uint8_t uMode = GetBits(uStartBit, 2);
1084  if(uMode != 0x00 && uMode != 0x01)
1085  {
1086  uMode = (GetBits(uStartBit, 3) << 2) | uMode;
1087  }
1088 
1089  assert( uMode < 32 );
1090  _Analysis_assume_( uMode < 32 );
1091 
1092  if ( ms_aModeToInfo[uMode] >= 0 )
1093  {
1094  assert(ms_aModeToInfo[uMode] < ARRAYSIZE(ms_aInfo));
1095  _Analysis_assume_(ms_aModeToInfo[uMode] < ARRAYSIZE(ms_aInfo));
1096  const ModeDescriptor* desc = ms_aDesc[ms_aModeToInfo[uMode]];
1097 
1098  assert(ms_aModeToInfo[uMode] < ARRAYSIZE(ms_aDesc));
1099  _Analysis_assume_(ms_aModeToInfo[uMode] < ARRAYSIZE(ms_aDesc));
1100  const ModeInfo& info = ms_aInfo[ms_aModeToInfo[uMode]];
1101 
1103  memset(aEndPts, 0, BC6H_MAX_REGIONS * 2 * sizeof(INTColor));
1104  uint32_t uShape = 0;
1105 
1106  // Read header
1107  const size_t uHeaderBits = info.uPartitions > 0 ? 82 : 65;
1108  while(uStartBit < uHeaderBits)
1109  {
1110  size_t uCurBit = uStartBit;
1111  if(GetBit(uStartBit))
1112  {
1113  switch(desc[uCurBit].m_eField)
1114  {
1115  case D: uShape |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
1116  case RW: aEndPts[0].A.r |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
1117  case RX: aEndPts[0].B.r |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
1118  case RY: aEndPts[1].A.r |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
1119  case RZ: aEndPts[1].B.r |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
1120  case GW: aEndPts[0].A.g |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
1121  case GX: aEndPts[0].B.g |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
1122  case GY: aEndPts[1].A.g |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
1123  case GZ: aEndPts[1].B.g |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
1124  case BW: aEndPts[0].A.b |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
1125  case BX: aEndPts[0].B.b |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
1126  case BY: aEndPts[1].A.b |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
1127  case BZ: aEndPts[1].B.b |= 1 << uint32_t(desc[uCurBit].m_uBit); break;
1128  default:
1129  {
1130 #ifdef _DEBUG
1131  OutputDebugStringA( "BC6H: Invalid header bits encountered during decoding\n" );
1132 #endif
1133  FillWithErrorColors( pOut );
1134  return;
1135  }
1136  }
1137  }
1138  }
1139 
1140  assert( uShape < 64 );
1141  _Analysis_assume_( uShape < 64 );
1142 
1143  // Sign extend necessary end points
1144  if(bSigned)
1145  {
1146  aEndPts[0].A.SignExtend(info.RGBAPrec[0][0]);
1147  }
1148  if(bSigned || info.bTransformed)
1149  {
1150  assert( info.uPartitions < BC6H_MAX_REGIONS );
1151  _Analysis_assume_( info.uPartitions < BC6H_MAX_REGIONS );
1152  for(size_t p = 0; p <= info.uPartitions; ++p)
1153  {
1154  if(p != 0)
1155  {
1156  aEndPts[p].A.SignExtend(info.RGBAPrec[p][0]);
1157  }
1158  aEndPts[p].B.SignExtend(info.RGBAPrec[p][1]);
1159  }
1160  }
1161 
1162  // Inverse transform the end points
1163  if(info.bTransformed)
1164  {
1165  TransformInverse(aEndPts, info.RGBAPrec[0][0], bSigned);
1166  }
1167 
1168  // Read indices
1169  for(size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
1170  {
1171  size_t uNumBits = IsFixUpOffset(info.uPartitions, uShape, i) ? info.uIndexPrec-1 : info.uIndexPrec;
1172  if ( uStartBit + uNumBits > 128 )
1173  {
1174 #ifdef _DEBUG
1175  OutputDebugStringA( "BC6H: Invalid block encountered during decoding\n" );
1176 #endif
1177  FillWithErrorColors( pOut );
1178  return;
1179  }
1180  uint8_t uIndex = GetBits(uStartBit, uNumBits);
1181 
1182  if ( uIndex >= ((info.uPartitions > 0) ? 8 : 16) )
1183  {
1184 #ifdef _DEBUG
1185  OutputDebugStringA( "BC6H: Invalid index encountered during decoding\n" );
1186 #endif
1187  FillWithErrorColors( pOut );
1188  return;
1189  }
1190 
1191  size_t uRegion = g_aPartitionTable[info.uPartitions][uShape][i];
1192  assert( uRegion < BC6H_MAX_REGIONS );
1193  _Analysis_assume_( uRegion < BC6H_MAX_REGIONS );
1194 
1195  // Unquantize endpoints and interpolate
1196  int r1 = Unquantize(aEndPts[uRegion].A.r, info.RGBAPrec[0][0].r, bSigned);
1197  int g1 = Unquantize(aEndPts[uRegion].A.g, info.RGBAPrec[0][0].g, bSigned);
1198  int b1 = Unquantize(aEndPts[uRegion].A.b, info.RGBAPrec[0][0].b, bSigned);
1199  int r2 = Unquantize(aEndPts[uRegion].B.r, info.RGBAPrec[0][0].r, bSigned);
1200  int g2 = Unquantize(aEndPts[uRegion].B.g, info.RGBAPrec[0][0].g, bSigned);
1201  int b2 = Unquantize(aEndPts[uRegion].B.b, info.RGBAPrec[0][0].b, bSigned);
1202  const int* aWeights = info.uPartitions > 0 ? g_aWeights3 : g_aWeights4;
1203  INTColor fc;
1204  fc.r = FinishUnquantize((r1 * (BC67_WEIGHT_MAX - aWeights[uIndex]) + r2 * aWeights[uIndex] + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT, bSigned);
1205  fc.g = FinishUnquantize((g1 * (BC67_WEIGHT_MAX - aWeights[uIndex]) + g2 * aWeights[uIndex] + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT, bSigned);
1206  fc.b = FinishUnquantize((b1 * (BC67_WEIGHT_MAX - aWeights[uIndex]) + b2 * aWeights[uIndex] + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT, bSigned);
1207 
1208  HALF rgb[3];
1209  fc.ToF16(rgb, bSigned);
1210 
1211  pOut[i].r = XMConvertHalfToFloat( rgb[0] );
1212  pOut[i].g = XMConvertHalfToFloat( rgb[1] );
1213  pOut[i].b = XMConvertHalfToFloat( rgb[2] );
1214  pOut[i].a = 1.0f;
1215  }
1216  }
1217  else
1218  {
1219 #ifdef _DEBUG
1220  const char* warnstr = "BC6H: Invalid mode encountered during decoding\n";
1221  switch( uMode )
1222  {
1223  case 0x13: warnstr = "BC6H: Reserved mode 10011 encountered during decoding\n"; break;
1224  case 0x17: warnstr = "BC6H: Reserved mode 10111 encountered during decoding\n"; break;
1225  case 0x1B: warnstr = "BC6H: Reserved mode 11011 encountered during decoding\n"; break;
1226  case 0x1F: warnstr = "BC6H: Reserved mode 11111 encountered during decoding\n"; break;
1227  }
1228  OutputDebugStringA( warnstr );
1229 #endif
1230  // Per the BC6H format spec, we must return opaque black
1231  for(size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
1232  {
1233  pOut[i] = HDRColorA(0.0f, 0.0f, 0.0f, 1.0f);
1234  }
1235  }
1236 }
1237 
1238 _Use_decl_annotations_
1239 void D3DX_BC6H::Encode(bool bSigned, const HDRColorA* const pIn)
1240 {
1241  assert( pIn );
1242 
1243  EncodeParams EP(pIn, bSigned);
1244 
1245  for(EP.uMode = 0; EP.uMode < ARRAYSIZE(ms_aInfo) && EP.fBestErr > 0; ++EP.uMode)
1246  {
1247  const uint8_t uShapes = ms_aInfo[EP.uMode].uPartitions ? 32 : 1;
1248  // Number of rough cases to look at. reasonable values of this are 1, uShapes/4, and uShapes
1249  // uShapes/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out
1250  const size_t uItems = std::max<size_t>(1, uShapes >> 2);
1251  float afRoughMSE[BC6H_MAX_SHAPES];
1252  uint8_t auShape[BC6H_MAX_SHAPES];
1253 
1254  // pick the best uItems shapes and refine these.
1255  for(EP.uShape = 0; EP.uShape < uShapes; ++EP.uShape)
1256  {
1257  size_t uShape = EP.uShape;
1258  afRoughMSE[uShape] = RoughMSE(&EP);
1259  auShape[uShape] = static_cast<uint8_t>(uShape);
1260  }
1261 
1262  // Bubble up the first uItems items
1263  for(register size_t i = 0; i < uItems; i++)
1264  {
1265  for(register size_t j = i + 1; j < uShapes; j++)
1266  {
1267  if(afRoughMSE[i] > afRoughMSE[j])
1268  {
1269  std::swap(afRoughMSE[i], afRoughMSE[j]);
1270  std::swap(auShape[i], auShape[j]);
1271  }
1272  }
1273  }
1274 
1275  for(size_t i = 0; i < uItems && EP.fBestErr > 0; i++)
1276  {
1277  EP.uShape = auShape[i];
1278  Refine(&EP);
1279  }
1280  }
1281 }
1282 
1283 
1284 //-------------------------------------------------------------------------------------
1285 _Use_decl_annotations_
1286 int D3DX_BC6H::Quantize(int iValue, int prec, bool bSigned)
1287 {
1288  assert(prec > 1); // didn't bother to make it work for 1
1289  int q, s = 0;
1290  if(bSigned)
1291  {
1292  assert(iValue >= -F16MAX && iValue <= F16MAX);
1293  if(iValue < 0)
1294  {
1295  s = 1;
1296  iValue = -iValue;
1297  }
1298  q = (prec >= 16) ? iValue : (iValue << (prec-1)) / (F16MAX+1);
1299  if(s)
1300  q = -q;
1301  assert (q > -(1 << (prec-1)) && q < (1 << (prec-1)));
1302  }
1303  else
1304  {
1305  assert(iValue >= 0 && iValue <= F16MAX);
1306  q = (prec >= 15) ? iValue : (iValue << prec) / (F16MAX+1);
1307  assert (q >= 0 && q < (1 << prec));
1308  }
1309 
1310  return q;
1311 }
1312 
1313 _Use_decl_annotations_
1314 int D3DX_BC6H::Unquantize(int comp, uint8_t uBitsPerComp, bool bSigned)
1315 {
1316  int unq = 0, s = 0;
1317  if(bSigned)
1318  {
1319  if(uBitsPerComp >= 16)
1320  {
1321  unq = comp;
1322  }
1323  else
1324  {
1325  if(comp < 0)
1326  {
1327  s = 1;
1328  comp = -comp;
1329  }
1330 
1331  if(comp == 0) unq = 0;
1332  else if(comp >= ((1 << (uBitsPerComp - 1)) - 1)) unq = 0x7FFF;
1333  else unq = ((comp << 15) + 0x4000) >> (uBitsPerComp-1);
1334 
1335  if(s) unq = -unq;
1336  }
1337  }
1338  else
1339  {
1340  if(uBitsPerComp >= 15) unq = comp;
1341  else if(comp == 0) unq = 0;
1342  else if(comp == ((1 << uBitsPerComp) - 1)) unq = 0xFFFF;
1343  else unq = ((comp << 16) + 0x8000) >> uBitsPerComp;
1344  }
1345 
1346  return unq;
1347 }
1348 
1349 _Use_decl_annotations_
1350 int D3DX_BC6H::FinishUnquantize(int comp, bool bSigned)
1351 {
1352  if(bSigned)
1353  {
1354  return (comp < 0) ? -(((-comp) * 31) >> 5) : (comp * 31) >> 5; // scale the magnitude by 31/32
1355  }
1356  else
1357  {
1358  return (comp * 31) >> 6; // scale the magnitude by 31/64
1359  }
1360 }
1361 
1362 
1363 //-------------------------------------------------------------------------------------
1364 _Use_decl_annotations_
1365 bool D3DX_BC6H::EndPointsFit(const EncodeParams* pEP, const INTEndPntPair aEndPts[])
1366 {
1367  assert( pEP );
1368  const bool bTransformed = ms_aInfo[pEP->uMode].bTransformed;
1369  const bool bIsSigned = pEP->bSigned;
1370  const LDRColorA& Prec0 = ms_aInfo[pEP->uMode].RGBAPrec[0][0];
1371  const LDRColorA& Prec1 = ms_aInfo[pEP->uMode].RGBAPrec[0][1];
1372  const LDRColorA& Prec2 = ms_aInfo[pEP->uMode].RGBAPrec[1][0];
1373  const LDRColorA& Prec3 = ms_aInfo[pEP->uMode].RGBAPrec[1][1];
1374 
1375  INTColor aBits[4];
1376  aBits[0].r = NBits(aEndPts[0].A.r, bIsSigned);
1377  aBits[0].g = NBits(aEndPts[0].A.g, bIsSigned);
1378  aBits[0].b = NBits(aEndPts[0].A.b, bIsSigned);
1379  aBits[1].r = NBits(aEndPts[0].B.r, bTransformed || bIsSigned);
1380  aBits[1].g = NBits(aEndPts[0].B.g, bTransformed || bIsSigned);
1381  aBits[1].b = NBits(aEndPts[0].B.b, bTransformed || bIsSigned);
1382  if(aBits[0].r > Prec0.r || aBits[1].r > Prec1.r ||
1383  aBits[0].g > Prec0.g || aBits[1].g > Prec1.g ||
1384  aBits[0].b > Prec0.b || aBits[1].b > Prec1.b)
1385  return false;
1386 
1387  if(ms_aInfo[pEP->uMode].uPartitions)
1388  {
1389  aBits[2].r = NBits(aEndPts[1].A.r, bTransformed || bIsSigned);
1390  aBits[2].g = NBits(aEndPts[1].A.g, bTransformed || bIsSigned);
1391  aBits[2].b = NBits(aEndPts[1].A.b, bTransformed || bIsSigned);
1392  aBits[3].r = NBits(aEndPts[1].B.r, bTransformed || bIsSigned);
1393  aBits[3].g = NBits(aEndPts[1].B.g, bTransformed || bIsSigned);
1394  aBits[3].b = NBits(aEndPts[1].B.b, bTransformed || bIsSigned);
1395 
1396  if(aBits[2].r > Prec2.r || aBits[3].r > Prec3.r ||
1397  aBits[2].g > Prec2.g || aBits[3].g > Prec3.g ||
1398  aBits[2].b > Prec2.b || aBits[3].b > Prec3.b)
1399  return false;
1400  }
1401 
1402  return true;
1403 }
1404 
1405 _Use_decl_annotations_
1406 void D3DX_BC6H::GeneratePaletteQuantized(const EncodeParams* pEP, const INTEndPntPair& endPts, INTColor aPalette[]) const
1407 {
1408  assert( pEP );
1409  const size_t uIndexPrec = ms_aInfo[pEP->uMode].uIndexPrec;
1410  const size_t uNumIndices = size_t(1) << uIndexPrec;
1411  assert( uNumIndices > 0 );
1412  _Analysis_assume_( uNumIndices > 0 );
1413  const LDRColorA& Prec = ms_aInfo[pEP->uMode].RGBAPrec[0][0];
1414 
1415  // scale endpoints
1416  INTEndPntPair unqEndPts;
1417  unqEndPts.A.r = Unquantize(endPts.A.r, Prec.r, pEP->bSigned);
1418  unqEndPts.A.g = Unquantize(endPts.A.g, Prec.g, pEP->bSigned);
1419  unqEndPts.A.b = Unquantize(endPts.A.b, Prec.b, pEP->bSigned);
1420  unqEndPts.B.r = Unquantize(endPts.B.r, Prec.r, pEP->bSigned);
1421  unqEndPts.B.g = Unquantize(endPts.B.g, Prec.g, pEP->bSigned);
1422  unqEndPts.B.b = Unquantize(endPts.B.b, Prec.b, pEP->bSigned);
1423 
1424  // interpolate
1425  const int* aWeights = nullptr;
1426  switch(uIndexPrec)
1427  {
1428  case 3: aWeights = g_aWeights3; assert(uNumIndices <= 8); _Analysis_assume_(uNumIndices <= 8); break;
1429  case 4: aWeights = g_aWeights4; assert(uNumIndices <= 16); _Analysis_assume_(uNumIndices <= 16); break;
1430  default:
1431  assert(false);
1432  for(size_t i = 0; i < uNumIndices; ++i)
1433  {
1434  #pragma prefast(suppress:22103, "writing blocks in two halves confuses tool")
1435  aPalette[i] = INTColor(0,0,0);
1436  }
1437  return;
1438  }
1439 
1440  for (size_t i = 0; i < uNumIndices; ++i)
1441  {
1442  aPalette[i].r = FinishUnquantize(
1443  (unqEndPts.A.r * (BC67_WEIGHT_MAX - aWeights[i]) + unqEndPts.B.r * aWeights[i] + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT,
1444  pEP->bSigned);
1445  aPalette[i].g = FinishUnquantize(
1446  (unqEndPts.A.g * (BC67_WEIGHT_MAX - aWeights[i]) + unqEndPts.B.g * aWeights[i] + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT,
1447  pEP->bSigned);
1448  aPalette[i].b = FinishUnquantize(
1449  (unqEndPts.A.b * (BC67_WEIGHT_MAX - aWeights[i]) + unqEndPts.B.b * aWeights[i] + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT,
1450  pEP->bSigned);
1451  }
1452 }
1453 
1454 // given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
1455 _Use_decl_annotations_
1456 float D3DX_BC6H::MapColorsQuantized(const EncodeParams* pEP, const INTColor aColors[], size_t np, const INTEndPntPair &endPts) const
1457 {
1458  assert( pEP );
1459 
1460  const uint8_t uIndexPrec = ms_aInfo[pEP->uMode].uIndexPrec;
1461  const uint8_t uNumIndices = 1 << uIndexPrec;
1462  INTColor aPalette[BC6H_MAX_INDICES];
1463  GeneratePaletteQuantized(pEP, endPts, aPalette);
1464 
1465  float fTotErr = 0;
1466  for(size_t i = 0; i < np; ++i)
1467  {
1468  XMVECTOR vcolors = XMLoadSInt4( reinterpret_cast<const XMINT4*>( &aColors[i] ) );
1469 
1470  // Compute ErrorMetricRGB
1471  XMVECTOR tpal = XMLoadSInt4( reinterpret_cast<const XMINT4*>( &aPalette[0] ) );
1472  tpal = XMVectorSubtract( vcolors, tpal );
1473  float fBestErr = XMVectorGetX( XMVector3Dot( tpal, tpal ) );
1474 
1475  for(int j = 1; j < uNumIndices && fBestErr > 0; ++j)
1476  {
1477  // Compute ErrorMetricRGB
1478  tpal = XMLoadSInt4( reinterpret_cast<const XMINT4*>( &aPalette[j] ) );
1479  tpal = XMVectorSubtract( vcolors, tpal );
1480  float fErr = XMVectorGetX( XMVector3Dot( tpal, tpal ) );
1481  if(fErr > fBestErr) break; // error increased, so we're done searching
1482  if(fErr < fBestErr) fBestErr = fErr;
1483  }
1484  fTotErr += fBestErr;
1485  }
1486  return fTotErr;
1487 }
1488 
1489 _Use_decl_annotations_
1490 float D3DX_BC6H::PerturbOne(const EncodeParams* pEP, const INTColor aColors[], size_t np, uint8_t ch,
1491  const INTEndPntPair& oldEndPts, INTEndPntPair& newEndPts, float fOldErr, int do_b) const
1492 {
1493  assert( pEP );
1494  uint8_t uPrec;
1495  switch(ch)
1496  {
1497  case 0: uPrec = ms_aInfo[pEP->uMode].RGBAPrec[0][0].r; break;
1498  case 1: uPrec = ms_aInfo[pEP->uMode].RGBAPrec[0][0].g; break;
1499  case 2: uPrec = ms_aInfo[pEP->uMode].RGBAPrec[0][0].b; break;
1500  default: assert(false); newEndPts = oldEndPts; return FLT_MAX;
1501  }
1502  INTEndPntPair tmpEndPts;
1503  float fMinErr = fOldErr;
1504  int beststep = 0;
1505 
1506  // copy real endpoints so we can perturb them
1507  tmpEndPts = newEndPts = oldEndPts;
1508 
1509  // do a logarithmic search for the best error for this endpoint (which)
1510  for(int step = 1 << (uPrec-1); step; step >>= 1)
1511  {
1512  bool bImproved = false;
1513  for(int sign = -1; sign <= 1; sign += 2)
1514  {
1515  if(do_b == 0)
1516  {
1517  tmpEndPts.A[ch] = newEndPts.A[ch] + sign * step;
1518  if(tmpEndPts.A[ch] < 0 || tmpEndPts.A[ch] >= (1 << uPrec))
1519  continue;
1520  }
1521  else
1522  {
1523  tmpEndPts.B[ch] = newEndPts.B[ch] + sign * step;
1524  if(tmpEndPts.B[ch] < 0 || tmpEndPts.B[ch] >= (1 << uPrec))
1525  continue;
1526  }
1527 
1528  float fErr = MapColorsQuantized(pEP, aColors, np, tmpEndPts);
1529 
1530  if(fErr < fMinErr)
1531  {
1532  bImproved = true;
1533  fMinErr = fErr;
1534  beststep = sign * step;
1535  }
1536  }
1537  // if this was an improvement, move the endpoint and continue search from there
1538  if(bImproved)
1539  {
1540  if(do_b == 0)
1541  newEndPts.A[ch] += beststep;
1542  else
1543  newEndPts.B[ch] += beststep;
1544  }
1545  }
1546  return fMinErr;
1547 }
1548 
1549 _Use_decl_annotations_
1550 void D3DX_BC6H::OptimizeOne(const EncodeParams* pEP, const INTColor aColors[], size_t np, float aOrgErr,
1551  const INTEndPntPair &aOrgEndPts, INTEndPntPair &aOptEndPts) const
1552 {
1553  assert( pEP );
1554  float aOptErr = aOrgErr;
1555  aOptEndPts.A = aOrgEndPts.A;
1556  aOptEndPts.B = aOrgEndPts.B;
1557 
1558  INTEndPntPair new_a, new_b;
1559  INTEndPntPair newEndPts;
1560  int do_b;
1561 
1562  // now optimize each channel separately
1563  for(uint8_t ch = 0; ch < 3; ++ch)
1564  {
1565  // figure out which endpoint when perturbed gives the most improvement and start there
1566  // if we just alternate, we can easily end up in a local minima
1567  float fErr0 = PerturbOne(pEP, aColors, np, ch, aOptEndPts, new_a, aOptErr, 0); // perturb endpt A
1568  float fErr1 = PerturbOne(pEP, aColors, np, ch, aOptEndPts, new_b, aOptErr, 1); // perturb endpt B
1569 
1570  if(fErr0 < fErr1)
1571  {
1572  if(fErr0 >= aOptErr) continue;
1573  aOptEndPts.A[ch] = new_a.A[ch];
1574  aOptErr = fErr0;
1575  do_b = 1; // do B next
1576  }
1577  else
1578  {
1579  if(fErr1 >= aOptErr) continue;
1580  aOptEndPts.B[ch] = new_b.B[ch];
1581  aOptErr = fErr1;
1582  do_b = 0; // do A next
1583  }
1584 
1585  // now alternate endpoints and keep trying until there is no improvement
1586  for(;;)
1587  {
1588  float fErr = PerturbOne(pEP, aColors, np, ch, aOptEndPts, newEndPts, aOptErr, do_b);
1589  if(fErr >= aOptErr)
1590  break;
1591  if(do_b == 0)
1592  aOptEndPts.A[ch] = newEndPts.A[ch];
1593  else
1594  aOptEndPts.B[ch] = newEndPts.B[ch];
1595  aOptErr = fErr;
1596  do_b = 1 - do_b; // now move the other endpoint
1597  }
1598  }
1599 }
1600 
1601 _Use_decl_annotations_
1602 void D3DX_BC6H::OptimizeEndPoints(const EncodeParams* pEP, const float aOrgErr[], const INTEndPntPair aOrgEndPts[], INTEndPntPair aOptEndPts[]) const
1603 {
1604  assert( pEP );
1605  const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
1606  assert( uPartitions < BC6H_MAX_REGIONS );
1607  _Analysis_assume_( uPartitions < BC6H_MAX_REGIONS );
1608  INTColor aPixels[NUM_PIXELS_PER_BLOCK];
1609 
1610  for(size_t p = 0; p <= uPartitions; ++p)
1611  {
1612  // collect the pixels in the region
1613  size_t np = 0;
1614  for(size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
1615  {
1616  if(g_aPartitionTable[p][pEP->uShape][i] == p)
1617  {
1618  aPixels[np++] = pEP->aIPixels[i];
1619  }
1620  }
1621 
1622  OptimizeOne(pEP, aPixels, np, aOrgErr[p], aOrgEndPts[p], aOptEndPts[p]);
1623  }
1624 }
1625 
1626 // Swap endpoints as needed to ensure that the indices at fix up have a 0 high-order bit
1627 _Use_decl_annotations_
1628 void D3DX_BC6H::SwapIndices(const EncodeParams* pEP, INTEndPntPair aEndPts[], size_t aIndices[])
1629 {
1630  assert( pEP );
1631  const size_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
1632  const size_t uNumIndices = size_t(1) << ms_aInfo[pEP->uMode].uIndexPrec;
1633  const size_t uHighIndexBit = uNumIndices >> 1;
1634 
1635  assert( uPartitions < BC6H_MAX_REGIONS && pEP->uShape < BC6H_MAX_SHAPES );
1636  _Analysis_assume_( uPartitions < BC6H_MAX_REGIONS && pEP->uShape < BC6H_MAX_SHAPES );
1637 
1638  for(size_t p = 0; p <= uPartitions; ++p)
1639  {
1640  size_t i = g_aFixUp[uPartitions][pEP->uShape][p];
1641  assert(g_aPartitionTable[uPartitions][pEP->uShape][i] == p);
1642  if(aIndices[i] & uHighIndexBit)
1643  {
1644  // high bit is set, swap the aEndPts and indices for this region
1645  std::swap(aEndPts[p].A, aEndPts[p].B);
1646 
1647  for(size_t j = 0; j < NUM_PIXELS_PER_BLOCK; ++j)
1648  if(g_aPartitionTable[uPartitions][pEP->uShape][j] == p)
1649  aIndices[j] = uNumIndices - 1 - aIndices[j];
1650  }
1651  }
1652 }
1653 
1654 // assign indices given a tile, shape, and quantized endpoints, return toterr for each region
1655 _Use_decl_annotations_
1656 void D3DX_BC6H::AssignIndices(const EncodeParams* pEP, const INTEndPntPair aEndPts[], size_t aIndices[], float aTotErr[]) const
1657 {
1658  assert( pEP );
1659  const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
1660  const uint8_t uNumIndices = 1 << ms_aInfo[pEP->uMode].uIndexPrec;
1661 
1662  assert( uPartitions < BC6H_MAX_REGIONS && pEP->uShape < BC6H_MAX_SHAPES );
1663  _Analysis_assume_( uPartitions < BC6H_MAX_REGIONS && pEP->uShape < BC6H_MAX_SHAPES );
1664 
1665  // build list of possibles
1666  INTColor aPalette[BC6H_MAX_REGIONS][BC6H_MAX_INDICES];
1667 
1668  for(size_t p = 0; p <= uPartitions; ++p)
1669  {
1670  GeneratePaletteQuantized(pEP, aEndPts[p], aPalette[p]);
1671  aTotErr[p] = 0;
1672  }
1673 
1674  for(size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
1675  {
1676  const uint8_t uRegion = g_aPartitionTable[uPartitions][pEP->uShape][i];
1677  assert( uRegion < BC6H_MAX_REGIONS );
1678  _Analysis_assume_( uRegion < BC6H_MAX_REGIONS );
1679  float fBestErr = Norm(pEP->aIPixels[i], aPalette[uRegion][0]);
1680  aIndices[i] = 0;
1681 
1682  for(uint8_t j = 1; j < uNumIndices && fBestErr > 0; ++j)
1683  {
1684  float fErr = Norm(pEP->aIPixels[i], aPalette[uRegion][j]);
1685  if(fErr > fBestErr) break; // error increased, so we're done searching
1686  if(fErr < fBestErr)
1687  {
1688  fBestErr = fErr;
1689  aIndices[i] = j;
1690  }
1691  }
1692  aTotErr[uRegion] += fBestErr;
1693  }
1694 }
1695 
1696 _Use_decl_annotations_
1697 void D3DX_BC6H::QuantizeEndPts(const EncodeParams* pEP, INTEndPntPair* aQntEndPts) const
1698 {
1699  assert( pEP && aQntEndPts );
1700  const INTEndPntPair* aUnqEndPts = pEP->aUnqEndPts[pEP->uShape];
1701  const LDRColorA& Prec = ms_aInfo[pEP->uMode].RGBAPrec[0][0];
1702  const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
1703  assert( uPartitions < BC6H_MAX_REGIONS );
1704  _Analysis_assume_( uPartitions < BC6H_MAX_REGIONS );
1705 
1706  for(size_t p = 0; p <= uPartitions; ++p)
1707  {
1708  aQntEndPts[p].A.r = Quantize(aUnqEndPts[p].A.r, Prec.r, pEP->bSigned);
1709  aQntEndPts[p].A.g = Quantize(aUnqEndPts[p].A.g, Prec.g, pEP->bSigned);
1710  aQntEndPts[p].A.b = Quantize(aUnqEndPts[p].A.b, Prec.b, pEP->bSigned);
1711  aQntEndPts[p].B.r = Quantize(aUnqEndPts[p].B.r, Prec.r, pEP->bSigned);
1712  aQntEndPts[p].B.g = Quantize(aUnqEndPts[p].B.g, Prec.g, pEP->bSigned);
1713  aQntEndPts[p].B.b = Quantize(aUnqEndPts[p].B.b, Prec.b, pEP->bSigned);
1714  }
1715 }
1716 
1717 _Use_decl_annotations_
1718 void D3DX_BC6H::EmitBlock(const EncodeParams* pEP, const INTEndPntPair aEndPts[], const size_t aIndices[])
1719 {
1720  assert( pEP );
1721  const uint8_t uRealMode = ms_aInfo[pEP->uMode].uMode;
1722  const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
1723  const uint8_t uIndexPrec = ms_aInfo[pEP->uMode].uIndexPrec;
1724  const size_t uHeaderBits = uPartitions > 0 ? 82 : 65;
1725  const ModeDescriptor* desc = ms_aDesc[pEP->uMode];
1726  size_t uStartBit = 0;
1727 
1728  while(uStartBit < uHeaderBits)
1729  {
1730  switch(desc[uStartBit].m_eField)
1731  {
1732  case M: SetBit(uStartBit, uint8_t(uRealMode >> desc[uStartBit].m_uBit) & 0x01); break;
1733  case D: SetBit(uStartBit, uint8_t(pEP->uShape >> desc[uStartBit].m_uBit) & 0x01); break;
1734  case RW: SetBit(uStartBit, uint8_t(aEndPts[0].A.r >> desc[uStartBit].m_uBit) & 0x01); break;
1735  case RX: SetBit(uStartBit, uint8_t(aEndPts[0].B.r >> desc[uStartBit].m_uBit) & 0x01); break;
1736  case RY: SetBit(uStartBit, uint8_t(aEndPts[1].A.r >> desc[uStartBit].m_uBit) & 0x01); break;
1737  case RZ: SetBit(uStartBit, uint8_t(aEndPts[1].B.r >> desc[uStartBit].m_uBit) & 0x01); break;
1738  case GW: SetBit(uStartBit, uint8_t(aEndPts[0].A.g >> desc[uStartBit].m_uBit) & 0x01); break;
1739  case GX: SetBit(uStartBit, uint8_t(aEndPts[0].B.g >> desc[uStartBit].m_uBit) & 0x01); break;
1740  case GY: SetBit(uStartBit, uint8_t(aEndPts[1].A.g >> desc[uStartBit].m_uBit) & 0x01); break;
1741  case GZ: SetBit(uStartBit, uint8_t(aEndPts[1].B.g >> desc[uStartBit].m_uBit) & 0x01); break;
1742  case BW: SetBit(uStartBit, uint8_t(aEndPts[0].A.b >> desc[uStartBit].m_uBit) & 0x01); break;
1743  case BX: SetBit(uStartBit, uint8_t(aEndPts[0].B.b >> desc[uStartBit].m_uBit) & 0x01); break;
1744  case BY: SetBit(uStartBit, uint8_t(aEndPts[1].A.b >> desc[uStartBit].m_uBit) & 0x01); break;
1745  case BZ: SetBit(uStartBit, uint8_t(aEndPts[1].B.b >> desc[uStartBit].m_uBit) & 0x01); break;
1746  default: assert(false);
1747  }
1748  }
1749 
1750  for(size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
1751  {
1752  if(IsFixUpOffset(ms_aInfo[pEP->uMode].uPartitions, pEP->uShape, i))
1753  SetBits(uStartBit, uIndexPrec - 1, static_cast<uint8_t>( aIndices[i] ));
1754  else
1755  SetBits(uStartBit, uIndexPrec, static_cast<uint8_t>( aIndices[i] ));
1756  }
1757  assert(uStartBit == 128);
1758 }
1759 
1760 _Use_decl_annotations_
1761 void D3DX_BC6H::Refine(EncodeParams* pEP)
1762 {
1763  assert( pEP );
1764  const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
1765  assert( uPartitions < BC6H_MAX_REGIONS );
1766  _Analysis_assume_( uPartitions < BC6H_MAX_REGIONS );
1767 
1768  const bool bTransformed = ms_aInfo[pEP->uMode].bTransformed;
1769  float aOrgErr[BC6H_MAX_REGIONS], aOptErr[BC6H_MAX_REGIONS];
1770  INTEndPntPair aOrgEndPts[BC6H_MAX_REGIONS], aOptEndPts[BC6H_MAX_REGIONS];
1771  size_t aOrgIdx[NUM_PIXELS_PER_BLOCK], aOptIdx[NUM_PIXELS_PER_BLOCK];
1772 
1773  QuantizeEndPts(pEP, aOrgEndPts);
1774  AssignIndices(pEP, aOrgEndPts, aOrgIdx, aOrgErr);
1775  SwapIndices(pEP, aOrgEndPts, aOrgIdx);
1776 
1777  if(bTransformed) TransformForward(aOrgEndPts);
1778  if(EndPointsFit(pEP, aOrgEndPts))
1779  {
1780  if(bTransformed) TransformInverse(aOrgEndPts, ms_aInfo[pEP->uMode].RGBAPrec[0][0], pEP->bSigned);
1781  OptimizeEndPoints(pEP, aOrgErr, aOrgEndPts, aOptEndPts);
1782  AssignIndices(pEP, aOptEndPts, aOptIdx, aOptErr);
1783  SwapIndices(pEP, aOptEndPts, aOptIdx);
1784 
1785  float fOrgTotErr = 0.0f, fOptTotErr = 0.0f;
1786  for(size_t p = 0; p <= uPartitions; ++p)
1787  {
1788  fOrgTotErr += aOrgErr[p];
1789  fOptTotErr += aOptErr[p];
1790  }
1791 
1792  if(bTransformed) TransformForward(aOptEndPts);
1793  if(EndPointsFit(pEP, aOptEndPts) && fOptTotErr < fOrgTotErr && fOptTotErr < pEP->fBestErr)
1794  {
1795  pEP->fBestErr = fOptTotErr;
1796  EmitBlock(pEP, aOptEndPts, aOptIdx);
1797  }
1798  else if(fOrgTotErr < pEP->fBestErr)
1799  {
1800  // either it stopped fitting when we optimized it, or there was no improvement
1801  // so go back to the unoptimized endpoints which we know will fit
1802  if(bTransformed) TransformForward(aOrgEndPts);
1803  pEP->fBestErr = fOrgTotErr;
1804  EmitBlock(pEP, aOrgEndPts, aOrgIdx);
1805  }
1806  }
1807 }
1808 
1809 _Use_decl_annotations_
1810 void D3DX_BC6H::GeneratePaletteUnquantized(const EncodeParams* pEP, size_t uRegion, INTColor aPalette[])
1811 {
1812  assert( pEP );
1813  assert( uRegion < BC6H_MAX_REGIONS && pEP->uShape < BC6H_MAX_SHAPES );
1814  _Analysis_assume_( uRegion < BC6H_MAX_REGIONS && pEP->uShape < BC6H_MAX_SHAPES );
1815  const INTEndPntPair& endPts = pEP->aUnqEndPts[pEP->uShape][uRegion];
1816  const uint8_t uIndexPrec = ms_aInfo[pEP->uMode].uIndexPrec;
1817  const uint8_t uNumIndices = 1 << uIndexPrec;
1818  assert(uNumIndices > 0);
1819  _Analysis_assume_(uNumIndices > 0);
1820 
1821  const int* aWeights = nullptr;
1822  switch(uIndexPrec)
1823  {
1824  case 3: aWeights = g_aWeights3; assert(uNumIndices <= 8); _Analysis_assume_(uNumIndices <= 8); break;
1825  case 4: aWeights = g_aWeights4; assert(uNumIndices <= 16); _Analysis_assume_(uNumIndices <= 16); break;
1826  default:
1827  assert(false);
1828  for(size_t i = 0; i < uNumIndices; ++i)
1829  {
1830  #pragma prefast(suppress:22103, "writing blocks in two halves confuses tool")
1831  aPalette[i] = INTColor(0,0,0);
1832  }
1833  return;
1834  }
1835 
1836  for(register size_t i = 0; i < uNumIndices; ++i)
1837  {
1838  aPalette[i].r = (endPts.A.r * (BC67_WEIGHT_MAX - aWeights[i]) + endPts.B.r * aWeights[i] + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT;
1839  aPalette[i].g = (endPts.A.g * (BC67_WEIGHT_MAX - aWeights[i]) + endPts.B.g * aWeights[i] + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT;
1840  aPalette[i].b = (endPts.A.b * (BC67_WEIGHT_MAX - aWeights[i]) + endPts.B.b * aWeights[i] + BC67_WEIGHT_ROUND) >> BC67_WEIGHT_SHIFT;
1841  }
1842 }
1843 
1844 _Use_decl_annotations_
1845 float D3DX_BC6H::MapColors(const EncodeParams* pEP, size_t uRegion, size_t np, const size_t* auIndex) const
1846 {
1847  assert( pEP );
1848  const uint8_t uIndexPrec = ms_aInfo[pEP->uMode].uIndexPrec;
1849  const uint8_t uNumIndices = 1 << uIndexPrec;
1850  INTColor aPalette[BC6H_MAX_INDICES];
1851  GeneratePaletteUnquantized(pEP, uRegion, aPalette);
1852 
1853  float fTotalErr = 0.0f;
1854  for(size_t i = 0; i < np; ++i)
1855  {
1856  float fBestErr = Norm(pEP->aIPixels[auIndex[i]], aPalette[0]);
1857  for(uint8_t j = 1; j < uNumIndices && fBestErr > 0.0f; ++j)
1858  {
1859  float fErr = Norm(pEP->aIPixels[auIndex[i]], aPalette[j]);
1860  if(fErr > fBestErr) break; // error increased, so we're done searching
1861  if(fErr < fBestErr) fBestErr = fErr;
1862  }
1863  fTotalErr += fBestErr;
1864  }
1865 
1866  return fTotalErr;
1867 }
1868 
1869 _Use_decl_annotations_
1870 float D3DX_BC6H::RoughMSE(EncodeParams* pEP) const
1871 {
1872  assert( pEP );
1873  assert( pEP->uShape < BC6H_MAX_SHAPES);
1874  _Analysis_assume_( pEP->uShape < BC6H_MAX_SHAPES);
1875 
1876  INTEndPntPair* aEndPts = pEP->aUnqEndPts[pEP->uShape];
1877 
1878  const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
1879  assert( uPartitions < BC6H_MAX_REGIONS );
1880  _Analysis_assume_( uPartitions < BC6H_MAX_REGIONS );
1881 
1882  size_t auPixIdx[NUM_PIXELS_PER_BLOCK];
1883 
1884  float fError = 0.0f;
1885  for(size_t p = 0; p <= uPartitions; ++p)
1886  {
1887  size_t np = 0;
1888  for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
1889  {
1890  if(g_aPartitionTable[uPartitions][pEP->uShape][i] == p)
1891  {
1892  auPixIdx[np++] = i;
1893  }
1894  }
1895 
1896  // handle simple cases
1897  assert(np > 0);
1898  if(np == 1)
1899  {
1900  aEndPts[p].A = pEP->aIPixels[auPixIdx[0]];
1901  aEndPts[p].B = pEP->aIPixels[auPixIdx[0]];
1902  continue;
1903  }
1904  else if(np == 2)
1905  {
1906  aEndPts[p].A = pEP->aIPixels[auPixIdx[0]];
1907  aEndPts[p].B = pEP->aIPixels[auPixIdx[1]];
1908  continue;
1909  }
1910 
1911  HDRColorA epA, epB;
1912  OptimizeRGB(pEP->aHDRPixels, &epA, &epB, 4, np, auPixIdx);
1913  aEndPts[p].A.Set(epA, pEP->bSigned);
1914  aEndPts[p].B.Set(epB, pEP->bSigned);
1915  if(pEP->bSigned)
1916  {
1917  aEndPts[p].A.Clamp(-F16MAX, F16MAX);
1918  aEndPts[p].B.Clamp(-F16MAX, F16MAX);
1919  }
1920  else
1921  {
1922  aEndPts[p].A.Clamp(0, F16MAX);
1923  aEndPts[p].B.Clamp(0, F16MAX);
1924  }
1925 
1926  fError += MapColors(pEP, p, np, auPixIdx);
1927  }
1928 
1929  return fError;
1930 }
1931 
1932 
1933 
1934 //-------------------------------------------------------------------------------------
1935 // BC7 Compression
1936 //-------------------------------------------------------------------------------------
1937 _Use_decl_annotations_
1938 void D3DX_BC7::Decode(HDRColorA* pOut) const
1939 {
1940  assert( pOut );
1941 
1942  size_t uFirst = 0;
1943  while(uFirst < 128 && !GetBit(uFirst)) {}
1944  uint8_t uMode = uint8_t(uFirst - 1);
1945 
1946  if(uMode < 8)
1947  {
1948  const uint8_t uPartitions = ms_aInfo[uMode].uPartitions;
1949  assert( uPartitions < BC7_MAX_REGIONS );
1950  _Analysis_assume_( uPartitions < BC7_MAX_REGIONS );
1951 
1952  const uint8_t uNumEndPts = (uPartitions + 1) << 1;
1953  const uint8_t uIndexPrec = ms_aInfo[uMode].uIndexPrec;
1954  const uint8_t uIndexPrec2 = ms_aInfo[uMode].uIndexPrec2;
1955  register size_t i;
1956  size_t uStartBit = uMode + 1;
1957  uint8_t P[6];
1958  uint8_t uShape = GetBits(uStartBit, ms_aInfo[uMode].uPartitionBits);
1959  assert( uShape < BC7_MAX_SHAPES );
1960  _Analysis_assume_( uShape < BC7_MAX_SHAPES );
1961 
1962  uint8_t uRotation = GetBits(uStartBit, ms_aInfo[uMode].uRotationBits);
1963  assert( uRotation < 4 );
1964 
1965  uint8_t uIndexMode = GetBits(uStartBit, ms_aInfo[uMode].uIndexModeBits);
1966  assert( uIndexMode < 2 );
1967 
1968  LDRColorA c[BC7_MAX_REGIONS << 1];
1969  const LDRColorA RGBAPrec = ms_aInfo[uMode].RGBAPrec;
1970  const LDRColorA RGBAPrecWithP = ms_aInfo[uMode].RGBAPrecWithP;
1971 
1972  assert( uNumEndPts <= (BC7_MAX_REGIONS << 1) );
1973 
1974  // Red channel
1975  for(i = 0; i < uNumEndPts; i++)
1976  {
1977  if ( uStartBit + RGBAPrec.r > 128 )
1978  {
1979 #ifdef _DEBUG
1980  OutputDebugStringA( "BC7: Invalid block encountered during decoding\n" );
1981 #endif
1982  FillWithErrorColors( pOut );
1983  return;
1984  }
1985 
1986  c[i].r = GetBits(uStartBit, RGBAPrec.r);
1987  }
1988 
1989  // Green channel
1990  for(i = 0; i < uNumEndPts; i++)
1991  {
1992  if ( uStartBit + RGBAPrec.g > 128 )
1993  {
1994 #ifdef _DEBUG
1995  OutputDebugStringA( "BC7: Invalid block encountered during decoding\n" );
1996 #endif
1997  FillWithErrorColors( pOut );
1998  return;
1999  }
2000 
2001  c[i].g = GetBits(uStartBit, RGBAPrec.g);
2002  }
2003 
2004  // Blue channel
2005  for(i = 0; i < uNumEndPts; i++)
2006  {
2007  if ( uStartBit + RGBAPrec.b > 128 )
2008  {
2009 #ifdef _DEBUG
2010  OutputDebugStringA( "BC7: Invalid block encountered during decoding\n" );
2011 #endif
2012  FillWithErrorColors( pOut );
2013  return;
2014  }
2015 
2016  c[i].b = GetBits(uStartBit, RGBAPrec.b);
2017  }
2018 
2019  // Alpha channel
2020  for(i = 0; i < uNumEndPts; i++)
2021  {
2022  if ( uStartBit + RGBAPrec.a > 128 )
2023  {
2024 #ifdef _DEBUG
2025  OutputDebugStringA( "BC7: Invalid block encountered during decoding\n" );
2026 #endif
2027  FillWithErrorColors( pOut );
2028  return;
2029  }
2030 
2031  c[i].a = RGBAPrec.a ? GetBits(uStartBit, RGBAPrec.a) : 255;
2032  }
2033 
2034  // P-bits
2035  assert( ms_aInfo[uMode].uPBits <= 6 );
2036  _Analysis_assume_( ms_aInfo[uMode].uPBits <= 6 );
2037  for(i = 0; i < ms_aInfo[uMode].uPBits; i++)
2038  {
2039  if ( uStartBit > 127 )
2040  {
2041 #ifdef _DEBUG
2042  OutputDebugStringA( "BC7: Invalid block encountered during decoding\n" );
2043 #endif
2044  FillWithErrorColors( pOut );
2045  return;
2046  }
2047 
2048  P[i] = GetBit(uStartBit);
2049  }
2050 
2051  if(ms_aInfo[uMode].uPBits)
2052  {
2053  for(i = 0; i < uNumEndPts; i++)
2054  {
2055  size_t pi = i * ms_aInfo[uMode].uPBits / uNumEndPts;
2056  for(register uint8_t ch = 0; ch < BC7_NUM_CHANNELS; ch++)
2057  {
2058  if(RGBAPrec[ch] != RGBAPrecWithP[ch])
2059  {
2060  c[i][ch] = (c[i][ch] << 1) | P[pi];
2061  }
2062  }
2063  }
2064  }
2065 
2066  for(i = 0; i < uNumEndPts; i++)
2067  {
2068  c[i] = Unquantize(c[i], RGBAPrecWithP);
2069  }
2070 
2072 
2073  // read color indices
2074  for(i = 0; i < NUM_PIXELS_PER_BLOCK; i++)
2075  {
2076  size_t uNumBits = IsFixUpOffset(ms_aInfo[uMode].uPartitions, uShape, i) ? uIndexPrec - 1 : uIndexPrec;
2077  if ( uStartBit + uNumBits > 128 )
2078  {
2079 #ifdef _DEBUG
2080  OutputDebugStringA( "BC7: Invalid block encountered during decoding\n" );
2081 #endif
2082  FillWithErrorColors( pOut );
2083  return;
2084  }
2085  w1[i] = GetBits(uStartBit, uNumBits);
2086  }
2087 
2088  // read alpha indices
2089  if(uIndexPrec2)
2090  {
2091  for(i = 0; i < NUM_PIXELS_PER_BLOCK; i++)
2092  {
2093  size_t uNumBits = i ? uIndexPrec2 : uIndexPrec2 - 1;
2094  if ( uStartBit + uNumBits > 128 )
2095  {
2096 #ifdef _DEBUG
2097  OutputDebugStringA( "BC7: Invalid block encountered during decoding\n" );
2098 #endif
2099  FillWithErrorColors( pOut );
2100  return;
2101  }
2102  w2[i] = GetBits(uStartBit, uNumBits );
2103  }
2104  }
2105 
2106  for(i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
2107  {
2108  uint8_t uRegion = g_aPartitionTable[uPartitions][uShape][i];
2109  LDRColorA outPixel;
2110  if(uIndexPrec2 == 0)
2111  {
2112  LDRColorA::Interpolate(c[uRegion << 1], c[(uRegion << 1) + 1], w1[i], w1[i], uIndexPrec, uIndexPrec, outPixel);
2113  }
2114  else
2115  {
2116  if(uIndexMode == 0)
2117  {
2118  LDRColorA::Interpolate(c[uRegion << 1], c[(uRegion << 1) + 1], w1[i], w2[i], uIndexPrec, uIndexPrec2, outPixel);
2119  }
2120  else
2121  {
2122  LDRColorA::Interpolate(c[uRegion << 1], c[(uRegion << 1) + 1], w2[i], w1[i], uIndexPrec2, uIndexPrec, outPixel);
2123  }
2124  }
2125 
2126  switch(uRotation)
2127  {
2128  case 1: std::swap(outPixel.r, outPixel.a); break;
2129  case 2: std::swap(outPixel.g, outPixel.a); break;
2130  case 3: std::swap(outPixel.b, outPixel.a); break;
2131  }
2132 
2133  pOut[i] = HDRColorA(outPixel);
2134  }
2135  }
2136  else
2137  {
2138 #ifdef _DEBUG
2139  OutputDebugStringA( "BC7: Reserved mode 8 encountered during decoding\n" );
2140 #endif
2141  // Per the BC7 format spec, we must return transparent black
2142  memset( pOut, 0, sizeof(HDRColorA) * NUM_PIXELS_PER_BLOCK );
2143  }
2144 }
2145 
2146 _Use_decl_annotations_
2147 void D3DX_BC7::Encode(const HDRColorA* const pIn)
2148 {
2149  assert( pIn );
2150 
2151  D3DX_BC7 final = *this;
2152  EncodeParams EP(pIn);
2153  float fMSEBest = FLT_MAX;
2154 
2155  for(size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
2156  {
2157  EP.aLDRPixels[i].r = uint8_t( std::max<float>( 0.0f, std::min<float>( 255.0f, pIn[i].r * 255.0f + 0.01f ) ) );
2158  EP.aLDRPixels[i].g = uint8_t( std::max<float>( 0.0f, std::min<float>( 255.0f, pIn[i].g * 255.0f + 0.01f ) ) );
2159  EP.aLDRPixels[i].b = uint8_t( std::max<float>( 0.0f, std::min<float>( 255.0f, pIn[i].b * 255.0f + 0.01f ) ) );
2160  EP.aLDRPixels[i].a = uint8_t( std::max<float>( 0.0f, std::min<float>( 255.0f, pIn[i].a * 255.0f + 0.01f ) ) );
2161  }
2162 
2163  for(EP.uMode = 0; EP.uMode < 8 && fMSEBest > 0; ++EP.uMode)
2164  {
2165  const size_t uShapes = size_t(1) << ms_aInfo[EP.uMode].uPartitionBits;
2166  assert( uShapes <= BC7_MAX_SHAPES );
2167  _Analysis_assume_( uShapes <= BC7_MAX_SHAPES );
2168 
2169  const size_t uNumRots = size_t(1) << ms_aInfo[EP.uMode].uRotationBits;
2170  const size_t uNumIdxMode = size_t(1) << ms_aInfo[EP.uMode].uIndexModeBits;
2171  // Number of rough cases to look at. reasonable values of this are 1, uShapes/4, and uShapes
2172  // uShapes/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out
2173  const size_t uItems = std::max<size_t>(1, uShapes >> 2);
2174  float afRoughMSE[BC7_MAX_SHAPES];
2175  size_t auShape[BC7_MAX_SHAPES];
2176 
2177  for(size_t r = 0; r < uNumRots && fMSEBest > 0; ++r)
2178  {
2179  switch(r)
2180  {
2181  case 1: for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++) std::swap(EP.aLDRPixels[i].r, EP.aLDRPixels[i].a); break;
2182  case 2: for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++) std::swap(EP.aLDRPixels[i].g, EP.aLDRPixels[i].a); break;
2183  case 3: for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++) std::swap(EP.aLDRPixels[i].b, EP.aLDRPixels[i].a); break;
2184  }
2185 
2186  for(size_t im = 0; im < uNumIdxMode && fMSEBest > 0; ++im)
2187  {
2188  // pick the best uItems shapes and refine these.
2189  for(size_t s = 0; s < uShapes; s++)
2190  {
2191  afRoughMSE[s] = RoughMSE(&EP, s, im);
2192  auShape[s] = s;
2193  }
2194 
2195  // Bubble up the first uItems items
2196  for(size_t i = 0; i < uItems; i++)
2197  {
2198  for(size_t j = i + 1; j < uShapes; j++)
2199  {
2200  if(afRoughMSE[i] > afRoughMSE[j])
2201  {
2202  std::swap(afRoughMSE[i], afRoughMSE[j]);
2203  std::swap(auShape[i], auShape[j]);
2204  }
2205  }
2206  }
2207 
2208  for(size_t i = 0; i < uItems && fMSEBest > 0; i++)
2209  {
2210  float fMSE = Refine(&EP, auShape[i], r, im);
2211  if(fMSE < fMSEBest)
2212  {
2213  final = *this;
2214  fMSEBest = fMSE;
2215  }
2216  }
2217  }
2218 
2219  switch(r)
2220  {
2221  case 1: for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++) std::swap(EP.aLDRPixels[i].r, EP.aLDRPixels[i].a); break;
2222  case 2: for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++) std::swap(EP.aLDRPixels[i].g, EP.aLDRPixels[i].a); break;
2223  case 3: for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++) std::swap(EP.aLDRPixels[i].b, EP.aLDRPixels[i].a); break;
2224  }
2225  }
2226  }
2227 
2228  *this = final;
2229 }
2230 
2231 
2232 //-------------------------------------------------------------------------------------
2233 _Use_decl_annotations_
2234 void D3DX_BC7::GeneratePaletteQuantized(const EncodeParams* pEP, size_t uIndexMode, const LDREndPntPair& endPts, LDRColorA aPalette[]) const
2235 {
2236  assert( pEP );
2237  const size_t uIndexPrec = uIndexMode ? ms_aInfo[pEP->uMode].uIndexPrec2 : ms_aInfo[pEP->uMode].uIndexPrec;
2238  const size_t uIndexPrec2 = uIndexMode ? ms_aInfo[pEP->uMode].uIndexPrec : ms_aInfo[pEP->uMode].uIndexPrec2;
2239  const size_t uNumIndices = size_t(1) << uIndexPrec;
2240  const size_t uNumIndices2 = size_t(1) << uIndexPrec2;
2241  assert( uNumIndices > 0 && uNumIndices2 > 0 );
2242  _Analysis_assume_( uNumIndices > 0 && uNumIndices2 > 0 );
2243  assert( (uNumIndices <= BC7_MAX_INDICES) && (uNumIndices2 <= BC7_MAX_INDICES) );
2244  _Analysis_assume_( (uNumIndices <= BC7_MAX_INDICES) && (uNumIndices2 <= BC7_MAX_INDICES) );
2245 
2246  LDRColorA a = Unquantize(endPts.A, ms_aInfo[pEP->uMode].RGBAPrecWithP);
2247  LDRColorA b = Unquantize(endPts.B, ms_aInfo[pEP->uMode].RGBAPrecWithP);
2248  if(uIndexPrec2 == 0)
2249  {
2250  for(register size_t i = 0; i < uNumIndices; i++)
2251  LDRColorA::Interpolate(a, b, i, i, uIndexPrec, uIndexPrec, aPalette[i]);
2252  }
2253  else
2254  {
2255  for(register size_t i = 0; i < uNumIndices; i++)
2256  LDRColorA::InterpolateRGB(a, b, i, uIndexPrec, aPalette[i]);
2257  for(register size_t i = 0; i < uNumIndices2; i++)
2258  LDRColorA::InterpolateA(a, b, i, uIndexPrec2, aPalette[i]);
2259  }
2260 }
2261 
2262 _Use_decl_annotations_
2263 float D3DX_BC7::PerturbOne(const EncodeParams* pEP, const LDRColorA aColors[], size_t np, size_t uIndexMode, size_t ch,
2264  const LDREndPntPair &oldEndPts, LDREndPntPair &newEndPts, float fOldErr, uint8_t do_b) const
2265 {
2266  assert( pEP );
2267  const int prec = ms_aInfo[pEP->uMode].RGBAPrecWithP[ch];
2268  LDREndPntPair tmp_endPts = newEndPts = oldEndPts;
2269  float fMinErr = fOldErr;
2270  uint8_t* pnew_c = (do_b ? &newEndPts.B[ch] : &newEndPts.A[ch]);
2271  uint8_t* ptmp_c = (do_b ? &tmp_endPts.B[ch] : &tmp_endPts.A[ch]);
2272 
2273  // do a logarithmic search for the best error for this endpoint (which)
2274  for(int step = 1 << (prec-1); step; step >>= 1)
2275  {
2276  bool bImproved = false;
2277  int beststep = 0;
2278  for(int sign = -1; sign <= 1; sign += 2)
2279  {
2280  int tmp = int(*pnew_c) + sign * step;
2281  if(tmp < 0 || tmp >= (1 << prec))
2282  continue;
2283  else
2284  *ptmp_c = (uint8_t) tmp;
2285 
2286  float fTotalErr = MapColors(pEP, aColors, np, uIndexMode, tmp_endPts, fMinErr);
2287  if(fTotalErr < fMinErr)
2288  {
2289  bImproved = true;
2290  fMinErr = fTotalErr;
2291  beststep = sign * step;
2292  }
2293  }
2294 
2295  // if this was an improvement, move the endpoint and continue search from there
2296  if(bImproved)
2297  *pnew_c = uint8_t(int(*pnew_c) + beststep);
2298  }
2299  return fMinErr;
2300 }
2301 
2302 // perturb the endpoints at least -3 to 3.
2303 // always ensure endpoint ordering is preserved (no need to overlap the scan)
2304 _Use_decl_annotations_
2305 void D3DX_BC7::Exhaustive(const EncodeParams* pEP, const LDRColorA aColors[], size_t np, size_t uIndexMode, size_t ch,
2306  float& fOrgErr, LDREndPntPair& optEndPt) const
2307 {
2308  assert( pEP );
2309  const uint8_t uPrec = ms_aInfo[pEP->uMode].RGBAPrecWithP[ch];
2310  LDREndPntPair tmpEndPt;
2311  if(fOrgErr == 0)
2312  return;
2313 
2314  int delta = 5;
2315 
2316  // ok figure out the range of A and B
2317  tmpEndPt = optEndPt;
2318  int alow = std::max<int>(0, int(optEndPt.A[ch]) - delta);
2319  int ahigh = std::min<int>((1 << uPrec) - 1, int(optEndPt.A[ch]) + delta);
2320  int blow = std::max<int>(0, int(optEndPt.B[ch]) - delta);
2321  int bhigh = std::min<int>((1 << uPrec) - 1, int(optEndPt.B[ch]) + delta);
2322  int amin = 0;
2323  int bmin = 0;
2324 
2325  float fBestErr = fOrgErr;
2326  if(optEndPt.A[ch] <= optEndPt.B[ch])
2327  {
2328  // keep a <= b
2329  for(int a = alow; a <= ahigh; ++a)
2330  {
2331  for(int b = std::max<int>(a, blow); b < bhigh; ++b)
2332  {
2333  tmpEndPt.A[ch] = (uint8_t) a;
2334  tmpEndPt.B[ch] = (uint8_t) b;
2335 
2336  float fErr = MapColors(pEP, aColors, np, uIndexMode, tmpEndPt, fBestErr);
2337  if(fErr < fBestErr)
2338  {
2339  amin = a;
2340  bmin = b;
2341  fBestErr = fErr;
2342  }
2343  }
2344  }
2345  }
2346  else
2347  {
2348  // keep b <= a
2349  for(int b = blow; b < bhigh; ++b)
2350  {
2351  for(int a = std::max<int>(b, alow); a <= ahigh; ++a)
2352  {
2353  tmpEndPt.A[ch] = (uint8_t) a;
2354  tmpEndPt.B[ch] = (uint8_t) b;
2355 
2356  float fErr = MapColors(pEP, aColors, np, uIndexMode, tmpEndPt, fBestErr);
2357  if(fErr < fBestErr)
2358  {
2359  amin = a;
2360  bmin = b;
2361  fBestErr = fErr;
2362  }
2363  }
2364  }
2365  }
2366 
2367  if(fBestErr < fOrgErr)
2368  {
2369  optEndPt.A[ch] = (uint8_t) amin;
2370  optEndPt.B[ch] = (uint8_t) bmin;
2371  fOrgErr = fBestErr;
2372  }
2373 }
2374 
2375 _Use_decl_annotations_
2376 void D3DX_BC7::OptimizeOne(const EncodeParams* pEP, const LDRColorA aColors[], size_t np, size_t uIndexMode,
2377  float fOrgErr, const LDREndPntPair& org, LDREndPntPair& opt) const
2378 {
2379  assert( pEP );
2380 
2381  float fOptErr = fOrgErr;
2382  opt = org;
2383 
2384  LDREndPntPair new_a, new_b;
2385  LDREndPntPair newEndPts;
2386  uint8_t do_b;
2387 
2388  // now optimize each channel separately
2389  for(size_t ch = 0; ch < BC7_NUM_CHANNELS; ++ch)
2390  {
2391  if(ms_aInfo[pEP->uMode].RGBAPrecWithP[ch] == 0)
2392  continue;
2393 
2394  // figure out which endpoint when perturbed gives the most improvement and start there
2395  // if we just alternate, we can easily end up in a local minima
2396  float fErr0 = PerturbOne(pEP, aColors, np, uIndexMode, ch, opt, new_a, fOptErr, 0); // perturb endpt A
2397  float fErr1 = PerturbOne(pEP, aColors, np, uIndexMode, ch, opt, new_b, fOptErr, 1); // perturb endpt B
2398 
2399  uint8_t& copt_a = opt.A[ch];
2400  uint8_t& copt_b = opt.B[ch];
2401  uint8_t& cnew_a = new_a.A[ch];
2402  uint8_t& cnew_b = new_a.B[ch];
2403 
2404  if(fErr0 < fErr1)
2405  {
2406  if(fErr0 >= fOptErr)
2407  continue;
2408  copt_a = cnew_a;
2409  fOptErr = fErr0;
2410  do_b = 1; // do B next
2411  }
2412  else
2413  {
2414  if(fErr1 >= fOptErr)
2415  continue;
2416  copt_b = cnew_b;
2417  fOptErr = fErr1;
2418  do_b = 0; // do A next
2419  }
2420 
2421  // now alternate endpoints and keep trying until there is no improvement
2422  for( ; ; )
2423  {
2424  float fErr = PerturbOne(pEP, aColors, np, uIndexMode, ch, opt, newEndPts, fOptErr, do_b);
2425  if(fErr >= fOptErr)
2426  break;
2427  if(do_b == 0)
2428  copt_a = cnew_a;
2429  else
2430  copt_b = cnew_b;
2431  fOptErr = fErr;
2432  do_b = 1 - do_b; // now move the other endpoint
2433  }
2434  }
2435 
2436  // finally, do a small exhaustive search around what we think is the global minima to be sure
2437  for(size_t ch = 0; ch < BC7_NUM_CHANNELS; ch++)
2438  Exhaustive(pEP, aColors, np, uIndexMode, ch, fOptErr, opt);
2439 }
2440 
2441 _Use_decl_annotations_
2442 void D3DX_BC7::OptimizeEndPoints(const EncodeParams* pEP, size_t uShape, size_t uIndexMode, const float afOrgErr[],
2443  const LDREndPntPair aOrgEndPts[], LDREndPntPair aOptEndPts[]) const
2444 {
2445  assert( pEP );
2446  const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
2447  assert( uPartitions < BC7_MAX_REGIONS && uShape < BC7_MAX_SHAPES );
2448  _Analysis_assume_( uPartitions < BC7_MAX_REGIONS && uShape < BC7_MAX_SHAPES );
2449 
2450  LDRColorA aPixels[NUM_PIXELS_PER_BLOCK];
2451 
2452  for(size_t p = 0; p <= uPartitions; ++p)
2453  {
2454  // collect the pixels in the region
2455  size_t np = 0;
2456  for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
2457  if(g_aPartitionTable[uPartitions][uShape][i] == p)
2458  aPixels[np++] = pEP->aLDRPixels[i];
2459 
2460  OptimizeOne(pEP, aPixels, np, uIndexMode, afOrgErr[p], aOrgEndPts[p], aOptEndPts[p]);
2461  }
2462 }
2463 
2464 _Use_decl_annotations_
2465 void D3DX_BC7::AssignIndices(const EncodeParams* pEP, size_t uShape, size_t uIndexMode, LDREndPntPair endPts[], size_t aIndices[], size_t aIndices2[],
2466  float afTotErr[]) const
2467 {
2468  assert( pEP );
2469  assert( uShape < BC7_MAX_SHAPES );
2470  _Analysis_assume_( uShape < BC7_MAX_SHAPES );
2471 
2472  const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
2473  assert( uPartitions < BC7_MAX_REGIONS );
2474  _Analysis_assume_( uPartitions < BC7_MAX_REGIONS );
2475 
2476  const uint8_t uIndexPrec = uIndexMode ? ms_aInfo[pEP->uMode].uIndexPrec2 : ms_aInfo[pEP->uMode].uIndexPrec;
2477  const uint8_t uIndexPrec2 = uIndexMode ? ms_aInfo[pEP->uMode].uIndexPrec : ms_aInfo[pEP->uMode].uIndexPrec2;
2478  const uint8_t uNumIndices = 1 << uIndexPrec;
2479  const uint8_t uNumIndices2 = 1 << uIndexPrec2;
2480 
2481  assert( (uNumIndices <= BC7_MAX_INDICES) && (uNumIndices2 <= BC7_MAX_INDICES) );
2482  _Analysis_assume_( (uNumIndices <= BC7_MAX_INDICES) && (uNumIndices2 <= BC7_MAX_INDICES) );
2483 
2484  const uint8_t uHighestIndexBit = uNumIndices >> 1;
2485  const uint8_t uHighestIndexBit2 = uNumIndices2 >> 1;
2486  LDRColorA aPalette[BC7_MAX_REGIONS][BC7_MAX_INDICES];
2487 
2488  // build list of possibles
2489  LDREndPntPair adjusted_endPts;
2490  for(size_t p = 0; p <= uPartitions; p++)
2491  {
2492  GeneratePaletteQuantized(pEP, uIndexMode, endPts[p], aPalette[p]);
2493  afTotErr[p] = 0;
2494  }
2495 
2496  for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++)
2497  {
2498  uint8_t uRegion = g_aPartitionTable[uPartitions][uShape][i];
2499  assert( uRegion < BC7_MAX_REGIONS );
2500  _Analysis_assume_( uRegion < BC7_MAX_REGIONS );
2501  afTotErr[uRegion] += ComputeError(pEP->aLDRPixels[i], aPalette[uRegion], uIndexPrec, uIndexPrec2, &(aIndices[i]), &(aIndices2[i]));
2502  }
2503 
2504  // swap endpoints as needed to ensure that the indices at index_positions have a 0 high-order bit
2505  if(uIndexPrec2 == 0)
2506  {
2507  for(register size_t p = 0; p <= uPartitions; p++)
2508  {
2509  if(aIndices[g_aFixUp[uPartitions][uShape][p]] & uHighestIndexBit)
2510  {
2511  std::swap(endPts[p].A, endPts[p].B);
2512  for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++)
2513  if(g_aPartitionTable[uPartitions][uShape][i] == p)
2514  aIndices[i] = uNumIndices - 1 - aIndices[i];
2515  }
2516  assert((aIndices[g_aFixUp[uPartitions][uShape][p]] & uHighestIndexBit) == 0);
2517  }
2518  }
2519  else
2520  {
2521  for(register size_t p = 0; p <= uPartitions; p++)
2522  {
2523  if(aIndices[g_aFixUp[uPartitions][uShape][p]] & uHighestIndexBit)
2524  {
2525  std::swap(endPts[p].A.r, endPts[p].B.r);
2526  std::swap(endPts[p].A.g, endPts[p].B.g);
2527  std::swap(endPts[p].A.b, endPts[p].B.b);
2528  for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++)
2529  if(g_aPartitionTable[uPartitions][uShape][i] == p)
2530  aIndices[i] = uNumIndices - 1 - aIndices[i];
2531  }
2532  assert((aIndices[g_aFixUp[uPartitions][uShape][p]] & uHighestIndexBit) == 0);
2533 
2534  if(aIndices2[0] & uHighestIndexBit2)
2535  {
2536  std::swap(endPts[p].A.a, endPts[p].B.a);
2537  for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++)
2538  aIndices2[i] = uNumIndices2 - 1 - aIndices2[i];
2539  }
2540  assert((aIndices2[0] & uHighestIndexBit2) == 0);
2541  }
2542  }
2543 }
2544 
2545 _Use_decl_annotations_
2546 void D3DX_BC7::EmitBlock(const EncodeParams* pEP, size_t uShape, size_t uRotation, size_t uIndexMode, const LDREndPntPair aEndPts[], const size_t aIndex[], const size_t aIndex2[])
2547 {
2548  assert( pEP );
2549  const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
2550  assert( uPartitions < BC7_MAX_REGIONS );
2551  _Analysis_assume_( uPartitions < BC7_MAX_REGIONS );
2552 
2553  const size_t uPBits = ms_aInfo[pEP->uMode].uPBits;
2554  const size_t uIndexPrec = ms_aInfo[pEP->uMode].uIndexPrec;
2555  const size_t uIndexPrec2 = ms_aInfo[pEP->uMode].uIndexPrec2;
2556  const LDRColorA RGBAPrec = ms_aInfo[pEP->uMode].RGBAPrec;
2557  const LDRColorA RGBAPrecWithP = ms_aInfo[pEP->uMode].RGBAPrecWithP;
2558  register size_t i;
2559  size_t uStartBit = 0;
2560  SetBits(uStartBit, pEP->uMode, 0);
2561  SetBits(uStartBit, 1, 1);
2562  SetBits(uStartBit, ms_aInfo[pEP->uMode].uRotationBits, static_cast<uint8_t>( uRotation ));
2563  SetBits(uStartBit, ms_aInfo[pEP->uMode].uIndexModeBits, static_cast<uint8_t>( uIndexMode ));
2564  SetBits(uStartBit, ms_aInfo[pEP->uMode].uPartitionBits, static_cast<uint8_t>( uShape ));
2565 
2566  if(uPBits)
2567  {
2568  const size_t uNumEP = size_t(1 + uPartitions) << 1;
2569  uint8_t aPVote[BC7_MAX_REGIONS << 1] = {0,0,0,0,0,0};
2570  uint8_t aCount[BC7_MAX_REGIONS << 1] = {0,0,0,0,0,0};
2571  for(uint8_t ch = 0; ch < BC7_NUM_CHANNELS; ch++)
2572  {
2573  uint8_t ep = 0;
2574  for(i = 0; i <= uPartitions; i++)
2575  {
2576  if(RGBAPrec[ch] == RGBAPrecWithP[ch])
2577  {
2578  SetBits(uStartBit, RGBAPrec[ch], aEndPts[i].A[ch]);
2579  SetBits(uStartBit, RGBAPrec[ch], aEndPts[i].B[ch]);
2580  }
2581  else
2582  {
2583  SetBits(uStartBit, RGBAPrec[ch], aEndPts[i].A[ch] >> 1);
2584  SetBits(uStartBit, RGBAPrec[ch], aEndPts[i].B[ch] >> 1);
2585  size_t idx = ep++ * uPBits / uNumEP;
2586  assert(idx < (BC7_MAX_REGIONS << 1));
2587  _Analysis_assume_(idx < (BC7_MAX_REGIONS << 1));
2588  aPVote[idx] += aEndPts[i].A[ch] & 0x01;
2589  aCount[idx]++;
2590  idx = ep++ * uPBits / uNumEP;
2591  assert(idx < (BC7_MAX_REGIONS << 1));
2592  _Analysis_assume_(idx < (BC7_MAX_REGIONS << 1));
2593  aPVote[idx] += aEndPts[i].B[ch] & 0x01;
2594  aCount[idx]++;
2595  }
2596  }
2597  }
2598 
2599  for(i = 0; i < uPBits; i++)
2600  {
2601  SetBits(uStartBit, 1, aPVote[i] > (aCount[i] >> 1) ? 1 : 0);
2602  }
2603  }
2604  else
2605  {
2606  for(size_t ch = 0; ch < BC7_NUM_CHANNELS; ch++)
2607  {
2608  for(i = 0; i <= uPartitions; i++)
2609  {
2610  SetBits(uStartBit, RGBAPrec[ch], aEndPts[i].A[ch] );
2611  SetBits(uStartBit, RGBAPrec[ch], aEndPts[i].B[ch] );
2612  }
2613  }
2614  }
2615 
2616  const size_t* aI1 = uIndexMode ? aIndex2 : aIndex;
2617  const size_t* aI2 = uIndexMode ? aIndex : aIndex2;
2618  for(i = 0; i < NUM_PIXELS_PER_BLOCK; i++)
2619  {
2620  if(IsFixUpOffset(ms_aInfo[pEP->uMode].uPartitions, uShape, i))
2621  SetBits(uStartBit, uIndexPrec - 1, static_cast<uint8_t>( aI1[i] ));
2622  else
2623  SetBits(uStartBit, uIndexPrec, static_cast<uint8_t>( aI1[i] ));
2624  }
2625  if(uIndexPrec2)
2626  for(i = 0; i < NUM_PIXELS_PER_BLOCK; i++)
2627  SetBits(uStartBit, i ? uIndexPrec2 : uIndexPrec2 - 1, static_cast<uint8_t>( aI2[i] ));
2628 
2629  assert(uStartBit == 128);
2630 }
2631 
2632 _Use_decl_annotations_
2633 float D3DX_BC7::Refine(const EncodeParams* pEP, size_t uShape, size_t uRotation, size_t uIndexMode)
2634 {
2635  assert( pEP );
2636  assert( uShape < BC7_MAX_SHAPES );
2637  _Analysis_assume_( uShape < BC7_MAX_SHAPES );
2638  const LDREndPntPair* aEndPts = pEP->aEndPts[uShape];
2639 
2640  const size_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
2641  assert( uPartitions < BC7_MAX_REGIONS );
2642  _Analysis_assume_( uPartitions < BC7_MAX_REGIONS );
2643 
2644  LDREndPntPair aOrgEndPts[BC7_MAX_REGIONS];
2645  LDREndPntPair aOptEndPts[BC7_MAX_REGIONS];
2646  size_t aOrgIdx[NUM_PIXELS_PER_BLOCK];
2647  size_t aOrgIdx2[NUM_PIXELS_PER_BLOCK];
2648  size_t aOptIdx[NUM_PIXELS_PER_BLOCK];
2649  size_t aOptIdx2[NUM_PIXELS_PER_BLOCK];
2650  float aOrgErr[BC7_MAX_REGIONS];
2651  float aOptErr[BC7_MAX_REGIONS];
2652 
2653  for(register size_t p = 0; p <= uPartitions; p++)
2654  {
2655  aOrgEndPts[p].A = Quantize(aEndPts[p].A, ms_aInfo[pEP->uMode].RGBAPrecWithP);
2656  aOrgEndPts[p].B = Quantize(aEndPts[p].B, ms_aInfo[pEP->uMode].RGBAPrecWithP);
2657  }
2658 
2659  AssignIndices(pEP, uShape, uIndexMode, aOrgEndPts, aOrgIdx, aOrgIdx2, aOrgErr);
2660  OptimizeEndPoints(pEP, uShape, uIndexMode, aOrgErr, aOrgEndPts, aOptEndPts);
2661  AssignIndices(pEP, uShape, uIndexMode, aOptEndPts, aOptIdx, aOptIdx2, aOptErr);
2662 
2663  float fOrgTotErr = 0, fOptTotErr = 0;
2664  for(register size_t p = 0; p <= uPartitions; p++)
2665  {
2666  fOrgTotErr += aOrgErr[p];
2667  fOptTotErr += aOptErr[p];
2668  }
2669  if(fOptTotErr < fOrgTotErr)
2670  {
2671  EmitBlock(pEP, uShape, uRotation, uIndexMode, aOptEndPts, aOptIdx, aOptIdx2);
2672  return fOptTotErr;
2673  }
2674  else
2675  {
2676  EmitBlock(pEP, uShape, uRotation, uIndexMode, aOrgEndPts, aOrgIdx, aOrgIdx2);
2677  return fOrgTotErr;
2678  }
2679 }
2680 
2681 _Use_decl_annotations_
2682 float D3DX_BC7::MapColors(const EncodeParams* pEP, const LDRColorA aColors[], size_t np, size_t uIndexMode, const LDREndPntPair& endPts, float fMinErr) const
2683 {
2684  assert( pEP );
2685  const uint8_t uIndexPrec = uIndexMode ? ms_aInfo[pEP->uMode].uIndexPrec2 : ms_aInfo[pEP->uMode].uIndexPrec;
2686  const uint8_t uIndexPrec2 = uIndexMode ? ms_aInfo[pEP->uMode].uIndexPrec : ms_aInfo[pEP->uMode].uIndexPrec2;
2687  LDRColorA aPalette[BC7_MAX_INDICES];
2688  float fTotalErr = 0;
2689 
2690  GeneratePaletteQuantized(pEP, uIndexMode, endPts, aPalette);
2691  for(register size_t i = 0; i < np; ++i)
2692  {
2693  fTotalErr += ComputeError(aColors[i], aPalette, uIndexPrec, uIndexPrec2);
2694  if(fTotalErr > fMinErr) // check for early exit
2695  {
2696  fTotalErr = FLT_MAX;
2697  break;
2698  }
2699  }
2700 
2701  return fTotalErr;
2702 }
2703 
2704 _Use_decl_annotations_
2705 float D3DX_BC7::RoughMSE(EncodeParams* pEP, size_t uShape, size_t uIndexMode)
2706 {
2707  assert( pEP );
2708  assert( uShape < BC7_MAX_SHAPES );
2709  _Analysis_assume_( uShape < BC7_MAX_SHAPES );
2710  LDREndPntPair* aEndPts = pEP->aEndPts[uShape];
2711 
2712  const uint8_t uPartitions = ms_aInfo[pEP->uMode].uPartitions;
2713  assert( uPartitions < BC7_MAX_REGIONS );
2714  _Analysis_assume_( uPartitions < BC7_MAX_REGIONS );
2715 
2716  const uint8_t uIndexPrec = uIndexMode ? ms_aInfo[pEP->uMode].uIndexPrec2 : ms_aInfo[pEP->uMode].uIndexPrec;
2717  const uint8_t uIndexPrec2 = uIndexMode ? ms_aInfo[pEP->uMode].uIndexPrec : ms_aInfo[pEP->uMode].uIndexPrec2;
2718  const uint8_t uNumIndices = 1 << uIndexPrec;
2719  const uint8_t uNumIndices2 = 1 << uIndexPrec2;
2720  size_t auPixIdx[NUM_PIXELS_PER_BLOCK];
2721  LDRColorA aPalette[BC7_MAX_REGIONS][BC7_MAX_INDICES];
2722 
2723  for(size_t p = 0; p <= uPartitions; p++)
2724  {
2725  size_t np = 0;
2726  for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++)
2727  {
2728  if (g_aPartitionTable[uPartitions][uShape][i] == p)
2729  {
2730  auPixIdx[np++] = i;
2731  }
2732  }
2733 
2734  // handle simple cases
2735  assert(np > 0);
2736  if(np == 1)
2737  {
2738  aEndPts[p].A = pEP->aLDRPixels[auPixIdx[0]];
2739  aEndPts[p].B = pEP->aLDRPixels[auPixIdx[0]];
2740  continue;
2741  }
2742  else if(np == 2)
2743  {
2744  aEndPts[p].A = pEP->aLDRPixels[auPixIdx[0]];
2745  aEndPts[p].B = pEP->aLDRPixels[auPixIdx[1]];
2746  continue;
2747  }
2748 
2749  if(uIndexPrec2 == 0)
2750  {
2751  HDRColorA epA, epB;
2752  OptimizeRGBA(pEP->aHDRPixels, &epA, &epB, 4, np, auPixIdx);
2753  epA.Clamp(0.0f, 1.0f);
2754  epB.Clamp(0.0f, 1.0f);
2755  epA *= 255.0f;
2756  epB *= 255.0f;
2757  aEndPts[p].A = epA.ToLDRColorA();
2758  aEndPts[p].B = epB.ToLDRColorA();
2759  }
2760  else
2761  {
2762  uint8_t uMinAlpha = 255, uMaxAlpha = 0;
2763  for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; ++i)
2764  {
2765  uMinAlpha = std::min<uint8_t>(uMinAlpha, pEP->aLDRPixels[auPixIdx[i]].a);
2766  uMaxAlpha = std::max<uint8_t>(uMaxAlpha, pEP->aLDRPixels[auPixIdx[i]].a);
2767  }
2768 
2769  HDRColorA epA, epB;
2770  OptimizeRGB(pEP->aHDRPixels, &epA, &epB, 4, np, auPixIdx);
2771  epA.Clamp(0.0f, 1.0f);
2772  epB.Clamp(0.0f, 1.0f);
2773  epA *= 255.0f;
2774  epB *= 255.0f;
2775  aEndPts[p].A = epA.ToLDRColorA();
2776  aEndPts[p].B = epB.ToLDRColorA();
2777  aEndPts[p].A.a = uMinAlpha;
2778  aEndPts[p].B.a = uMaxAlpha;
2779  }
2780  }
2781 
2782  if(uIndexPrec2 == 0)
2783  {
2784  for(size_t p = 0; p <= uPartitions; p++)
2785  for(register size_t i = 0; i < uNumIndices; i++)
2786  LDRColorA::Interpolate(aEndPts[p].A, aEndPts[p].B, i, i, uIndexPrec, uIndexPrec, aPalette[p][i]);
2787  }
2788  else
2789  {
2790  for(size_t p = 0; p <= uPartitions; p++)
2791  {
2792  for(register size_t i = 0; i < uNumIndices; i++)
2793  LDRColorA::InterpolateRGB(aEndPts[p].A, aEndPts[p].B, i, uIndexPrec, aPalette[p][i]);
2794  for(register size_t i = 0; i < uNumIndices2; i++)
2795  LDRColorA::InterpolateA(aEndPts[p].A, aEndPts[p].B, i, uIndexPrec2, aPalette[p][i]);
2796  }
2797  }
2798 
2799  float fTotalErr = 0;
2800  for(register size_t i = 0; i < NUM_PIXELS_PER_BLOCK; i++)
2801  {
2802  uint8_t uRegion = g_aPartitionTable[uPartitions][uShape][i];
2803  fTotalErr += ComputeError(pEP->aLDRPixels[i], aPalette[uRegion], uIndexPrec, uIndexPrec2);
2804  }
2805 
2806  return fTotalErr;
2807 }
2808 
2809 //=====================================================================================
2810 // Entry points
2811 //=====================================================================================
2812 
2813 //-------------------------------------------------------------------------------------
2814 // BC6H Compression
2815 //-------------------------------------------------------------------------------------
2816 _Use_decl_annotations_
2817 void D3DXDecodeBC6HU(XMVECTOR *pColor, const uint8_t *pBC)
2818 {
2819  assert( pColor && pBC );
2820  static_assert( sizeof(D3DX_BC6H) == 16, "D3DX_BC6H should be 16 bytes" );
2821  reinterpret_cast< const D3DX_BC6H* >( pBC )->Decode(false, reinterpret_cast<HDRColorA*>(pColor));
2822 }
2823 
2824 _Use_decl_annotations_
2825 void D3DXDecodeBC6HS(XMVECTOR *pColor, const uint8_t *pBC)
2826 {
2827  assert( pColor && pBC );
2828  static_assert( sizeof(D3DX_BC6H) == 16, "D3DX_BC6H should be 16 bytes" );
2829  reinterpret_cast< const D3DX_BC6H* >( pBC )->Decode(true, reinterpret_cast<HDRColorA*>(pColor));
2830 }
2831 
2832 _Use_decl_annotations_
2833 void D3DXEncodeBC6HU(uint8_t *pBC, const XMVECTOR *pColor, DWORD flags)
2834 {
2835  UNREFERENCED_PARAMETER(flags);
2836  assert( pBC && pColor );
2837  static_assert( sizeof(D3DX_BC6H) == 16, "D3DX_BC6H should be 16 bytes" );
2838  reinterpret_cast< D3DX_BC6H* >( pBC )->Encode(false, reinterpret_cast<const HDRColorA*>(pColor));
2839 }
2840 
2841 _Use_decl_annotations_
2842 void D3DXEncodeBC6HS(uint8_t *pBC, const XMVECTOR *pColor, DWORD flags)
2843 {
2844  UNREFERENCED_PARAMETER(flags);
2845  assert( pBC && pColor );
2846  static_assert( sizeof(D3DX_BC6H) == 16, "D3DX_BC6H should be 16 bytes" );
2847  reinterpret_cast< D3DX_BC6H* >( pBC )->Encode(true, reinterpret_cast<const HDRColorA*>(pColor));
2848 }
2849 
2850 
2851 //-------------------------------------------------------------------------------------
2852 // BC7 Compression
2853 //-------------------------------------------------------------------------------------
2854 _Use_decl_annotations_
2855 void D3DXDecodeBC7(XMVECTOR *pColor, const uint8_t *pBC)
2856 {
2857  assert( pColor && pBC );
2858  static_assert( sizeof(D3DX_BC7) == 16, "D3DX_BC7 should be 16 bytes" );
2859  reinterpret_cast< const D3DX_BC7* >( pBC )->Decode(reinterpret_cast<HDRColorA*>(pColor));
2860 }
2861 
2862 _Use_decl_annotations_
2863 void D3DXEncodeBC7(uint8_t *pBC, const XMVECTOR *pColor, DWORD flags)
2864 {
2865  UNREFERENCED_PARAMETER(flags);
2866  assert( pBC && pColor );
2867  static_assert( sizeof(D3DX_BC7) == 16, "D3DX_BC7 should be 16 bytes" );
2868  reinterpret_cast< D3DX_BC7* >( pBC )->Encode(reinterpret_cast<const HDRColorA*>(pColor));
2869 }
2870 
2871 } // namespace
const size_t BC7_NUM_CHANNELS
Definition: BC.h:47
void D3DXDecodeBC6HS(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(16) const uint8_t *pBC)
static const float pD4[]
Definition: BC6HBC7.cpp:33
function b
#define NUM_PIXELS_PER_BLOCK
Definition: BC.h:38
static bool IsFixUpOffset(_In_range_(0, 2) size_t uPartitions, _In_range_(0, 63) size_t uShape, _In_range_(0, 15) size_t uOffset)
Definition: BC6HBC7.cpp:561
LDRColorA A
Definition: BC.h:262
function a
uint8_t r
Definition: BC.h:74
_In_ size_t _In_ DXGI_FORMAT _In_ size_t _In_ DXGI_FORMAT _In_ DWORD flags
Definition: DirectXTexP.h:170
INTColor & SignExtend(_In_ const LDRColorA &Prec)
Definition: BC.h:372
static float ComputeError(_Inout_ const LDRColorA &pixel, _In_reads_(1<< uIndexPrec) const LDRColorA aPalette[], _In_ uint8_t uIndexPrec, _In_ uint8_t uIndexPrec2, _Out_opt_ size_t *pBestIndex=nullptr, _Out_opt_ size_t *pBestIndex2=nullptr)
Definition: BC6HBC7.cpp:985
size_t _In_ DXGI_FORMAT size_t _In_ TEXP_LEGACY_FORMAT _In_ DWORD flags assert(pDestination &&outSize > 0)
uint8_t a
Definition: BC.h:74
const size_t BC6H_MAX_SHAPES
Definition: BC.h:45
uint8_t b
Definition: BC.h:74
const int32_t BC67_WEIGHT_MAX
Definition: BC.h:50
static const float pC4[]
Definition: BC6HBC7.cpp:32
void ToF16(_Out_writes_(3) PackedVector::HALF aF16[3], _In_ bool bSigned) const
Definition: BC.h:380
void D3DXEncodeBC6HU(_Out_writes_(16) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags)
const int g_aWeights3[8]
Definition: BC6HBC7.cpp:36
void D3DXDecodeBC6HU(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(16) const uint8_t *pBC)
LDRColorA B
Definition: BC.h:263
static void InterpolateRGB(_In_ const LDRColorA &c0, _In_ const LDRColorA &c1, _In_ size_t wc, _In_ _In_range_(2, 4) size_t wcprec, _Out_ LDRColorA &out)
Definition: BC.h:105
static void Interpolate(_In_ const LDRColorA &c0, _In_ const LDRColorA &c1, _In_ size_t wc, _In_ size_t wa, _In_ _In_range_(2, 4) size_t wcprec, _In_ _In_range_(2, 4) size_t waprec, _Out_ LDRColorA &out)
Definition: BC.h:133
static void OptimizeRGB(_Out_ HDRColorA *pX, _Out_ HDRColorA *pY, _In_reads_(NUM_PIXELS_PER_BLOCK) const HDRColorA *pPoints, _In_ size_t cSteps, _In_ DWORD flags)
Definition: BC.cpp:67
#define BC6H_MAX_INDICES
Definition: BC.h:40
INTColor A
Definition: BC.h:437
void Decode(_In_ bool bSigned, _Out_writes_(NUM_PIXELS_PER_BLOCK) HDRColorA *pOut) const
Definition: BC6HBC7.cpp:1078
const int32_t BC67_WEIGHT_ROUND
Definition: BC.h:52
static void InterpolateA(_In_ const LDRColorA &c0, _In_ const LDRColorA &c1, _In_ size_t wa, _In_range_(2, 4) _In_ size_t waprec, _Out_ LDRColorA &out)
Definition: BC.h:120
INTColor B
Definition: BC.h:438
static const float pD3[]
Definition: BC6HBC7.cpp:31
static const uint8_t g_aFixUp[3][64][3]
Definition: BC6HBC7.cpp:247
void D3DXDecodeBC7(_Out_writes_(NUM_PIXELS_PER_BLOCK) XMVECTOR *pColor, _In_reads_(16) const uint8_t *pBC)
uint8_t g
Definition: BC.h:74
void D3DXEncodeBC6HS(_Out_writes_(16) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags)
#define BC7_MAX_INDICES
Definition: BC.h:42
_In_ size_t _In_ DXGI_FORMAT _Inout_updates_all_(count) XMVECTOR *pSource
_In_ size_t _In_ const TexMetadata _In_ DWORD _Out_writes_(nImages) Image *images
void Encode(_In_ bool bSigned, _In_reads_(NUM_PIXELS_PER_BLOCK) const HDRColorA *const pIn)
Definition: BC6HBC7.cpp:1239
const uint16_t F16MAX
Definition: BC.h:33
const size_t BC7_MAX_SHAPES
Definition: BC.h:48
function s(a)
static const uint8_t g_aPartitionTable[3][64][16]
Definition: BC6HBC7.cpp:40
_In_ size_t _In_ DXGI_FORMAT _In_reads_(count) const XMVECTOR *pSource
#define BC6H_MAX_REGIONS
Definition: BC.h:39
static void TransformInverse(_Inout_updates_all_(BC6H_MAX_REGIONS) INTEndPntPair aEndPts[], _In_ const LDRColorA &Prec, _In_ bool bSigned)
Definition: BC6HBC7.cpp:582
void Encode(_In_reads_(NUM_PIXELS_PER_BLOCK) const HDRColorA *const pIn)
Definition: BC6HBC7.cpp:2147
static const float pC3[]
Definition: BC6HBC7.cpp:30
const int g_aWeights2[4]
Definition: BC6HBC7.cpp:35
static int NBits(_In_ int n, _In_ bool bIsSigned)
Definition: BC6HBC7.cpp:605
const uint32_t BC67_WEIGHT_SHIFT
Definition: BC.h:51
static void FillWithErrorColors(_Out_writes_(NUM_PIXELS_PER_BLOCK) HDRColorA *pOut)
Definition: BC6HBC7.cpp:1059
static const float fEpsilon
Definition: BC6HBC7.cpp:29
static float OptimizeRGBA(_In_reads_(NUM_PIXELS_PER_BLOCK) const HDRColorA *const pPoints, _Out_ HDRColorA *pX, _Out_ HDRColorA *pY, _In_ size_t cSteps, _In_ size_t cPixels, _In_reads_(cPixels) const size_t *pIndex)
Definition: BC6HBC7.cpp:819
const int g_aWeights4[16]
Definition: BC6HBC7.cpp:37
#define BC7_MAX_REGIONS
Definition: BC.h:41
void Decode(_Out_writes_(NUM_PIXELS_PER_BLOCK) HDRColorA *pOut) const
Definition: BC6HBC7.cpp:1938
void D3DXEncodeBC7(_Out_writes_(16) uint8_t *pBC, _In_reads_(NUM_PIXELS_PER_BLOCK) const XMVECTOR *pColor, _In_ DWORD flags)
static void TransformForward(_Inout_updates_all_(BC6H_MAX_REGIONS) INTEndPntPair aEndPts[])
Definition: BC6HBC7.cpp:575
static float Norm(_In_ const INTColor &a, _In_ const INTColor &b)
Definition: BC6HBC7.cpp:596