[Mesa-dev] [PATCH v3 1/4] mesa: add header for share bptc decompress functions
Denis Pauk
pauk.denis at gmail.com
Tue Jun 26 20:30:49 UTC 2018
Move shared bptc functions to texcompress_bptc_tmp.h:
* fetch_rgba_unorm_from_block
* fetch_rgb_float_from_block
* compress_rgba_unorm
* compress_rgb_float
Create decompress functions:
* decompress_rgba_unorm
* decompress_rgb_float
Functions will be reused in gallium/auxiliary code.
v2: Add block decompress function
v3: Move all shared code to header
Suggested-by: Marek Olšák <maraeo at gmail.com>
Signed-off-by: Denis Pauk <pauk.denis at gmail.com>
CC: Marek Olšák <maraeo at gmail.com>
---
src/mesa/Makefile.sources | 1 +
src/mesa/main/texcompress_bptc.c | 1451 +--------------------
src/mesa/main/texcompress_bptc_tmp.h | 1743 ++++++++++++++++++++++++++
3 files changed, 1761 insertions(+), 1434 deletions(-)
create mode 100644 src/mesa/main/texcompress_bptc_tmp.h
diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources
index 00aba0a2f7..63f3734c32 100644
--- a/src/mesa/Makefile.sources
+++ b/src/mesa/Makefile.sources
@@ -216,6 +216,7 @@ MAIN_FILES = \
main/texcompress.c \
main/texcompress_bptc.c \
main/texcompress_bptc.h \
+ main/texcompress_bptc_tmp.h \
main/texcompress_cpal.c \
main/texcompress_cpal.h \
main/texcompress_etc.c \
diff --git a/src/mesa/main/texcompress_bptc.c b/src/mesa/main/texcompress_bptc.c
index fd37be97f3..46279f144f 100644
--- a/src/mesa/main/texcompress_bptc.c
+++ b/src/mesa/main/texcompress_bptc.c
@@ -29,632 +29,38 @@
#include <stdbool.h>
#include "texcompress.h"
#include "texcompress_bptc.h"
-#include "util/format_srgb.h"
-#include "util/half_float.h"
+#include "texcompress_bptc_tmp.h"
#include "texstore.h"
-#include "macros.h"
#include "image.h"
#include "mtypes.h"
-#define BLOCK_SIZE 4
-#define N_PARTITIONS 64
-#define BLOCK_BYTES 16
-
-struct bptc_unorm_mode {
- int n_subsets;
- int n_partition_bits;
- bool has_rotation_bits;
- bool has_index_selection_bit;
- int n_color_bits;
- int n_alpha_bits;
- bool has_endpoint_pbits;
- bool has_shared_pbits;
- int n_index_bits;
- int n_secondary_index_bits;
-};
-
-struct bptc_float_bitfield {
- int8_t endpoint;
- uint8_t component;
- uint8_t offset;
- uint8_t n_bits;
- bool reverse;
-};
-
-struct bptc_float_mode {
- bool reserved;
- bool transformed_endpoints;
- int n_partition_bits;
- int n_endpoint_bits;
- int n_index_bits;
- int n_delta_bits[3];
- struct bptc_float_bitfield bitfields[24];
-};
-
-struct bit_writer {
- uint8_t buf;
- int pos;
- uint8_t *dst;
-};
-
-static const struct bptc_unorm_mode
-bptc_unorm_modes[] = {
- /* 0 */ { 3, 4, false, false, 4, 0, true, false, 3, 0 },
- /* 1 */ { 2, 6, false, false, 6, 0, false, true, 3, 0 },
- /* 2 */ { 3, 6, false, false, 5, 0, false, false, 2, 0 },
- /* 3 */ { 2, 6, false, false, 7, 0, true, false, 2, 0 },
- /* 4 */ { 1, 0, true, true, 5, 6, false, false, 2, 3 },
- /* 5 */ { 1, 0, true, false, 7, 8, false, false, 2, 2 },
- /* 6 */ { 1, 0, false, false, 7, 7, true, false, 4, 0 },
- /* 7 */ { 2, 6, false, false, 5, 5, true, false, 2, 0 }
-};
-
-static const struct bptc_float_mode
-bptc_float_modes[] = {
- /* 00 */
- { false, true, 5, 10, 3, { 5, 5, 5 },
- { { 2, 1, 4, 1, false }, { 2, 2, 4, 1, false }, { 3, 2, 4, 1, false },
- { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
- { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
- { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
- { 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false },
- { 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false },
- { 3, 2, 3, 1, false },
- { -1 } }
- },
- /* 01 */
- { false, true, 5, 7, 3, { 6, 6, 6 },
- { { 2, 1, 5, 1, false }, { 3, 1, 4, 1, false }, { 3, 1, 5, 1, false },
- { 0, 0, 0, 7, false }, { 3, 2, 0, 1, false }, { 3, 2, 1, 1, false },
- { 2, 2, 4, 1, false }, { 0, 1, 0, 7, false }, { 2, 2, 5, 1, false },
- { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false }, { 0, 2, 0, 7, false },
- { 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
- { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false },
- { 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false },
- { 2, 0, 0, 6, false },
- { 3, 0, 0, 6, false },
- { -1 } }
- },
- /* 00010 */
- { false, true, 5, 11, 3, { 5, 4, 4 },
- { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
- { 1, 0, 0, 5, false }, { 0, 0, 10, 1, false }, { 2, 1, 0, 4, false },
- { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false }, { 3, 2, 0, 1, false },
- { 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false },
- { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
- { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
- { -1 } }
- },
- /* 00011 */
- { false, false, 0, 10, 4, { 10, 10, 10 },
- { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
- { 1, 0, 0, 10, false }, { 1, 1, 0, 10, false }, { 1, 2, 0, 10, false },
- { -1 } }
- },
- /* 00110 */
- { false, true, 5, 11, 3, { 4, 5, 4 },
- { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
- { 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 3, 1, 4, 1, false },
- { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false }, { 0, 1, 10, 1, false },
- { 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false },
- { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false },
- { 3, 2, 0, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false },
- { 2, 1, 4, 1, false }, { 3, 2, 3, 1, false },
- { -1 } }
- },
- /* 00111 */
- { false, true, 0, 11, 4, { 9, 9, 9 },
- { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
- { 1, 0, 0, 9, false }, { 0, 0, 10, 1, false }, { 1, 1, 0, 9, false },
- { 0, 1, 10, 1, false }, { 1, 2, 0, 9, false }, { 0, 2, 10, 1, false },
- { -1 } }
- },
- /* 01010 */
- { false, true, 5, 11, 3, { 4, 4, 5 },
- { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
- { 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 2, 2, 4, 1, false },
- { 2, 1, 0, 4, false }, { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false },
- { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
- { 0, 2, 10, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false },
- { 3, 2, 1, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false },
- { 3, 2, 4, 1, false }, { 3, 2, 3, 1, false },
- { -1 } }
- },
- /* 01011 */
- { false, true, 0, 12, 4, { 8, 8, 8 },
- { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
- { 1, 0, 0, 8, false }, { 0, 0, 10, 2, true }, { 1, 1, 0, 8, false },
- { 0, 1, 10, 2, true }, { 1, 2, 0, 8, false }, { 0, 2, 10, 2, true },
- { -1 } }
- },
- /* 01110 */
- { false, true, 5, 9, 3, { 5, 5, 5 },
- { { 0, 0, 0, 9, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 9, false },
- { 2, 1, 4, 1, false }, { 0, 2, 0, 9, false }, { 3, 2, 4, 1, false },
- { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
- { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
- { 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false },
- { 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false },
- { 3, 2, 3, 1, false },
- { -1 } }
- },
- /* 01111 */
- { false, true, 0, 16, 4, { 4, 4, 4 },
- { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
- { 1, 0, 0, 4, false }, { 0, 0, 10, 6, true }, { 1, 1, 0, 4, false },
- { 0, 1, 10, 6, true }, { 1, 2, 0, 4, false }, { 0, 2, 10, 6, true },
- { -1 } }
- },
- /* 10010 */
- { false, true, 5, 8, 3, { 6, 5, 5 },
- { { 0, 0, 0, 8, false }, { 3, 1, 4, 1, false }, { 2, 2, 4, 1, false },
- { 0, 1, 0, 8, false }, { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false },
- { 0, 2, 0, 8, false }, { 3, 2, 3, 1, false }, { 3, 2, 4, 1, false },
- { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false },
- { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
- { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 6, false },
- { 3, 0, 0, 6, false },
- { -1 } }
- },
- /* 10011 */
- { true /* reserved */ },
- /* 10110 */
- { false, true, 5, 8, 3, { 5, 6, 5 },
- { { 0, 0, 0, 8, false }, { 3, 2, 0, 1, false }, { 2, 2, 4, 1, false },
- { 0, 1, 0, 8, false }, { 2, 1, 5, 1, false }, { 2, 1, 4, 1, false },
- { 0, 2, 0, 8, false }, { 3, 1, 5, 1, false }, { 3, 2, 4, 1, false },
- { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
- { 1, 1, 0, 6, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
- { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
- { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
- { -1 } }
- },
- /* 10111 */
- { true /* reserved */ },
- /* 11010 */
- { false, true, 5, 8, 3, { 5, 5, 6 },
- { { 0, 0, 0, 8, false }, { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false },
- { 0, 1, 0, 8, false }, { 2, 2, 5, 1, false }, { 2, 1, 4, 1, false },
- { 0, 2, 0, 8, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
- { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
- { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
- { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
- { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
- { -1 } }
- },
- /* 11011 */
- { true /* reserved */ },
- /* 11110 */
- { false, false, 5, 6, 3, { 6, 6, 6 },
- { { 0, 0, 0, 6, false }, { 3, 1, 4, 1, false }, { 3, 2, 0, 1, false },
- { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 6, false },
- { 2, 1, 5, 1, false }, { 2, 2, 5, 1, false }, { 3, 2, 2, 1, false },
- { 2, 1, 4, 1, false }, { 0, 2, 0, 6, false }, { 3, 1, 5, 1, false },
- { 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
- { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false },
- { 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false },
- { 2, 0, 0, 6, false }, { 3, 0, 0, 6, false },
- { -1 } }
- },
- /* 11111 */
- { true /* reserved */ },
-};
-
-/* This partition table is used when the mode has two subsets. Each
- * partition is represented by a 32-bit value which gives 2 bits per texel
- * within the block. The value of the two bits represents which subset to use
- * (0 or 1).
- */
-static const uint32_t
-partition_table1[N_PARTITIONS] = {
- 0x50505050U, 0x40404040U, 0x54545454U, 0x54505040U,
- 0x50404000U, 0x55545450U, 0x55545040U, 0x54504000U,
- 0x50400000U, 0x55555450U, 0x55544000U, 0x54400000U,
- 0x55555440U, 0x55550000U, 0x55555500U, 0x55000000U,
- 0x55150100U, 0x00004054U, 0x15010000U, 0x00405054U,
- 0x00004050U, 0x15050100U, 0x05010000U, 0x40505054U,
- 0x00404050U, 0x05010100U, 0x14141414U, 0x05141450U,
- 0x01155440U, 0x00555500U, 0x15014054U, 0x05414150U,
- 0x44444444U, 0x55005500U, 0x11441144U, 0x05055050U,
- 0x05500550U, 0x11114444U, 0x41144114U, 0x44111144U,
- 0x15055054U, 0x01055040U, 0x05041050U, 0x05455150U,
- 0x14414114U, 0x50050550U, 0x41411414U, 0x00141400U,
- 0x00041504U, 0x00105410U, 0x10541000U, 0x04150400U,
- 0x50410514U, 0x41051450U, 0x05415014U, 0x14054150U,
- 0x41050514U, 0x41505014U, 0x40011554U, 0x54150140U,
- 0x50505500U, 0x00555050U, 0x15151010U, 0x54540404U,
-};
-
-/* This partition table is used when the mode has three subsets. In this case
- * the values can be 0, 1 or 2.
- */
-static const uint32_t
-partition_table2[N_PARTITIONS] = {
- 0xaa685050U, 0x6a5a5040U, 0x5a5a4200U, 0x5450a0a8U,
- 0xa5a50000U, 0xa0a05050U, 0x5555a0a0U, 0x5a5a5050U,
- 0xaa550000U, 0xaa555500U, 0xaaaa5500U, 0x90909090U,
- 0x94949494U, 0xa4a4a4a4U, 0xa9a59450U, 0x2a0a4250U,
- 0xa5945040U, 0x0a425054U, 0xa5a5a500U, 0x55a0a0a0U,
- 0xa8a85454U, 0x6a6a4040U, 0xa4a45000U, 0x1a1a0500U,
- 0x0050a4a4U, 0xaaa59090U, 0x14696914U, 0x69691400U,
- 0xa08585a0U, 0xaa821414U, 0x50a4a450U, 0x6a5a0200U,
- 0xa9a58000U, 0x5090a0a8U, 0xa8a09050U, 0x24242424U,
- 0x00aa5500U, 0x24924924U, 0x24499224U, 0x50a50a50U,
- 0x500aa550U, 0xaaaa4444U, 0x66660000U, 0xa5a0a5a0U,
- 0x50a050a0U, 0x69286928U, 0x44aaaa44U, 0x66666600U,
- 0xaa444444U, 0x54a854a8U, 0x95809580U, 0x96969600U,
- 0xa85454a8U, 0x80959580U, 0xaa141414U, 0x96960000U,
- 0xaaaa1414U, 0xa05050a0U, 0xa0a5a5a0U, 0x96000000U,
- 0x40804080U, 0xa9a8a9a8U, 0xaaaaaa44U, 0x2a4a5254U
-};
-
-static const uint8_t
-anchor_indices[][N_PARTITIONS] = {
- /* Anchor index values for the second subset of two-subset partitioning */
- {
- 0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,
- 0xf,0x2,0x8,0x2,0x2,0x8,0x8,0xf,0x2,0x8,0x2,0x2,0x8,0x8,0x2,0x2,
- 0xf,0xf,0x6,0x8,0x2,0x8,0xf,0xf,0x2,0x8,0x2,0x2,0x2,0xf,0xf,0x6,
- 0x6,0x2,0x6,0x8,0xf,0xf,0x2,0x2,0xf,0xf,0xf,0xf,0xf,0x2,0x2,0xf
- },
-
- /* Anchor index values for the second subset of three-subset partitioning */
- {
- 0x3,0x3,0xf,0xf,0x8,0x3,0xf,0xf,0x8,0x8,0x6,0x6,0x6,0x5,0x3,0x3,
- 0x3,0x3,0x8,0xf,0x3,0x3,0x6,0xa,0x5,0x8,0x8,0x6,0x8,0x5,0xf,0xf,
- 0x8,0xf,0x3,0x5,0x6,0xa,0x8,0xf,0xf,0x3,0xf,0x5,0xf,0xf,0xf,0xf,
- 0x3,0xf,0x5,0x5,0x5,0x8,0x5,0xa,0x5,0xa,0x8,0xd,0xf,0xc,0x3,0x3
- },
-
- /* Anchor index values for the third subset of three-subset
- * partitioning
- */
- {
- 0xf,0x8,0x8,0x3,0xf,0xf,0x3,0x8,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x8,
- 0xf,0x8,0xf,0x3,0xf,0x8,0xf,0x8,0x3,0xf,0x6,0xa,0xf,0xf,0xa,0x8,
- 0xf,0x3,0xf,0xa,0xa,0x8,0x9,0xa,0x6,0xf,0x8,0xf,0x3,0x6,0x6,0x8,
- 0xf,0x3,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x3,0xf,0xf,0x8
- }
-};
-
-static int
-extract_bits(const uint8_t *block,
- int offset,
- int n_bits)
-{
- int byte_index = offset / 8;
- int bit_index = offset % 8;
- int n_bits_in_byte = MIN2(n_bits, 8 - bit_index);
- int result = 0;
- int bit = 0;
-
- while (true) {
- result |= ((block[byte_index] >> bit_index) &
- ((1 << n_bits_in_byte) - 1)) << bit;
-
- n_bits -= n_bits_in_byte;
-
- if (n_bits <= 0)
- return result;
-
- bit += n_bits_in_byte;
- byte_index++;
- bit_index = 0;
- n_bits_in_byte = MIN2(n_bits, 8);
- }
-}
-
-static uint8_t
-expand_component(uint8_t byte,
- int n_bits)
-{
- /* Expands a n-bit quantity into a byte by copying the most-significant
- * bits into the unused least-significant bits.
- */
- return byte << (8 - n_bits) | (byte >> (2 * n_bits - 8));
-}
-
-static int
-extract_unorm_endpoints(const struct bptc_unorm_mode *mode,
- const uint8_t *block,
- int bit_offset,
- uint8_t endpoints[][4])
-{
- int component;
- int subset;
- int endpoint;
- int pbit;
- int n_components;
-
- /* Extract each color component */
- for (component = 0; component < 3; component++) {
- for (subset = 0; subset < mode->n_subsets; subset++) {
- for (endpoint = 0; endpoint < 2; endpoint++) {
- endpoints[subset * 2 + endpoint][component] =
- extract_bits(block, bit_offset, mode->n_color_bits);
- bit_offset += mode->n_color_bits;
- }
- }
- }
-
- /* Extract the alpha values */
- if (mode->n_alpha_bits > 0) {
- for (subset = 0; subset < mode->n_subsets; subset++) {
- for (endpoint = 0; endpoint < 2; endpoint++) {
- endpoints[subset * 2 + endpoint][3] =
- extract_bits(block, bit_offset, mode->n_alpha_bits);
- bit_offset += mode->n_alpha_bits;
- }
- }
-
- n_components = 4;
- } else {
- for (subset = 0; subset < mode->n_subsets; subset++)
- for (endpoint = 0; endpoint < 2; endpoint++)
- endpoints[subset * 2 + endpoint][3] = 255;
-
- n_components = 3;
- }
-
- /* Add in the p-bits */
- if (mode->has_endpoint_pbits) {
- for (subset = 0; subset < mode->n_subsets; subset++) {
- for (endpoint = 0; endpoint < 2; endpoint++) {
- pbit = extract_bits(block, bit_offset, 1);
- bit_offset += 1;
-
- for (component = 0; component < n_components; component++) {
- endpoints[subset * 2 + endpoint][component] <<= 1;
- endpoints[subset * 2 + endpoint][component] |= pbit;
- }
- }
- }
- } else if (mode->has_shared_pbits) {
- for (subset = 0; subset < mode->n_subsets; subset++) {
- pbit = extract_bits(block, bit_offset, 1);
- bit_offset += 1;
-
- for (endpoint = 0; endpoint < 2; endpoint++) {
- for (component = 0; component < n_components; component++) {
- endpoints[subset * 2 + endpoint][component] <<= 1;
- endpoints[subset * 2 + endpoint][component] |= pbit;
- }
- }
- }
- }
-
- /* Expand the n-bit values to a byte */
- for (subset = 0; subset < mode->n_subsets; subset++) {
- for (endpoint = 0; endpoint < 2; endpoint++) {
- for (component = 0; component < 3; component++) {
- endpoints[subset * 2 + endpoint][component] =
- expand_component(endpoints[subset * 2 + endpoint][component],
- mode->n_color_bits +
- mode->has_endpoint_pbits +
- mode->has_shared_pbits);
- }
-
- if (mode->n_alpha_bits > 0) {
- endpoints[subset * 2 + endpoint][3] =
- expand_component(endpoints[subset * 2 + endpoint][3],
- mode->n_alpha_bits +
- mode->has_endpoint_pbits +
- mode->has_shared_pbits);
- }
- }
- }
-
- return bit_offset;
-}
-
-static bool
-is_anchor(int n_subsets,
- int partition_num,
- int texel)
-{
- if (texel == 0)
- return true;
-
- switch (n_subsets) {
- case 1:
- return false;
- case 2:
- return anchor_indices[0][partition_num] == texel;
- case 3:
- return (anchor_indices[1][partition_num] == texel ||
- anchor_indices[2][partition_num] == texel);
- default:
- assert(false);
- return false;
- }
-}
-
-static int
-count_anchors_before_texel(int n_subsets,
- int partition_num,
- int texel)
-{
- int count = 1;
-
- if (texel == 0)
- return 0;
-
- switch (n_subsets) {
- case 1:
- break;
- case 2:
- if (texel > anchor_indices[0][partition_num])
- count++;
- break;
- case 3:
- if (texel > anchor_indices[1][partition_num])
- count++;
- if (texel > anchor_indices[2][partition_num])
- count++;
- break;
- default:
- assert(false);
- return 0;
- }
-
- return count;
-}
-
-static int32_t
-interpolate(int32_t a, int32_t b,
- int index,
- int index_bits)
+static void
+fetch_bptc_rgb_float(const GLubyte *map,
+ GLint rowStride, GLint i, GLint j,
+ GLfloat *texel,
+ bool is_signed)
{
- static const uint8_t weights2[] = { 0, 21, 43, 64 };
- static const uint8_t weights3[] = { 0, 9, 18, 27, 37, 46, 55, 64 };
- static const uint8_t weights4[] =
- { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 };
- static const uint8_t *weights[] = {
- NULL, NULL, weights2, weights3, weights4
- };
- int weight;
+ const GLubyte *block;
- weight = weights[index_bits][index];
+ block = map + (((rowStride + 3) / 4) * (j / 4) + (i / 4)) * 16;
- return ((64 - weight) * a + weight * b + 32) >> 6;
+ fetch_rgb_float_from_block(block, texel, (i % 4) + (j % 4) * 4, is_signed);
}
static void
-apply_rotation(int rotation,
- uint8_t *result)
+fetch_bptc_rgb_signed_float(const GLubyte *map,
+ GLint rowStride, GLint i, GLint j,
+ GLfloat *texel)
{
- uint8_t t;
-
- if (rotation == 0)
- return;
-
- rotation--;
-
- t = result[rotation];
- result[rotation] = result[3];
- result[3] = t;
+ fetch_bptc_rgb_float(map, rowStride, i, j, texel, true);
}
static void
-fetch_rgba_unorm_from_block(const uint8_t *block,
- uint8_t *result,
- int texel)
+fetch_bptc_rgb_unsigned_float(const GLubyte *map,
+ GLint rowStride, GLint i, GLint j,
+ GLfloat *texel)
{
- int mode_num = ffs(block[0]);
- const struct bptc_unorm_mode *mode;
- int bit_offset, secondary_bit_offset;
- int partition_num;
- int subset_num;
- int rotation;
- int index_selection;
- int index_bits;
- int indices[2];
- int index;
- int anchors_before_texel;
- bool anchor;
- uint8_t endpoints[3 * 2][4];
- uint32_t subsets;
- int component;
-
- if (mode_num == 0) {
- /* According to the spec this mode is reserved and shouldn't be used. */
- memset(result, 0, 3);
- result[3] = 0xff;
- return;
- }
-
- mode = bptc_unorm_modes + mode_num - 1;
- bit_offset = mode_num;
-
- partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
- bit_offset += mode->n_partition_bits;
-
- switch (mode->n_subsets) {
- case 1:
- subsets = 0;
- break;
- case 2:
- subsets = partition_table1[partition_num];
- break;
- case 3:
- subsets = partition_table2[partition_num];
- break;
- default:
- assert(false);
- return;
- }
-
- if (mode->has_rotation_bits) {
- rotation = extract_bits(block, bit_offset, 2);
- bit_offset += 2;
- } else {
- rotation = 0;
- }
-
- if (mode->has_index_selection_bit) {
- index_selection = extract_bits(block, bit_offset, 1);
- bit_offset++;
- } else {
- index_selection = 0;
- }
-
- bit_offset = extract_unorm_endpoints(mode, block, bit_offset, endpoints);
-
- anchors_before_texel = count_anchors_before_texel(mode->n_subsets,
- partition_num, texel);
-
- /* Calculate the offset to the secondary index */
- secondary_bit_offset = (bit_offset +
- BLOCK_SIZE * BLOCK_SIZE * mode->n_index_bits -
- mode->n_subsets +
- mode->n_secondary_index_bits * texel -
- anchors_before_texel);
-
- /* Calculate the offset to the primary index for this texel */
- bit_offset += mode->n_index_bits * texel - anchors_before_texel;
-
- subset_num = (subsets >> (texel * 2)) & 3;
-
- anchor = is_anchor(mode->n_subsets, partition_num, texel);
-
- index_bits = mode->n_index_bits;
- if (anchor)
- index_bits--;
- indices[0] = extract_bits(block, bit_offset, index_bits);
-
- if (mode->n_secondary_index_bits) {
- index_bits = mode->n_secondary_index_bits;
- if (anchor)
- index_bits--;
- indices[1] = extract_bits(block, secondary_bit_offset, index_bits);
- }
-
- index = indices[index_selection];
- index_bits = (index_selection ?
- mode->n_secondary_index_bits :
- mode->n_index_bits);
-
- for (component = 0; component < 3; component++)
- result[component] = interpolate(endpoints[subset_num * 2][component],
- endpoints[subset_num * 2 + 1][component],
- index,
- index_bits);
-
- /* Alpha uses the opposite index from the color components */
- if (mode->n_secondary_index_bits && !index_selection) {
- index = indices[1];
- index_bits = mode->n_secondary_index_bits;
- } else {
- index = indices[0];
- index_bits = mode->n_index_bits;
- }
-
- result[3] = interpolate(endpoints[subset_num * 2][3],
- endpoints[subset_num * 2 + 1][3],
- index,
- index_bits);
-
- apply_rotation(rotation, result);
+ fetch_bptc_rgb_float(map, rowStride, i, j, texel, false);
}
static void
@@ -699,257 +105,6 @@ fetch_bptc_srgb_alpha_unorm(const GLubyte *map,
texel[ACOMP] = UBYTE_TO_FLOAT(texel_bytes[3]);
}
-static int32_t
-sign_extend(int32_t value,
- int n_bits)
-{
- if ((value & (1 << (n_bits - 1)))) {
- value |= (~(int32_t) 0) << n_bits;
- }
-
- return value;
-}
-
-static int
-signed_unquantize(int value, int n_endpoint_bits)
-{
- bool sign;
-
- if (n_endpoint_bits >= 16)
- return value;
-
- if (value == 0)
- return 0;
-
- sign = false;
-
- if (value < 0) {
- sign = true;
- value = -value;
- }
-
- if (value >= (1 << (n_endpoint_bits - 1)) - 1)
- value = 0x7fff;
- else
- value = ((value << 15) + 0x4000) >> (n_endpoint_bits - 1);
-
- if (sign)
- value = -value;
-
- return value;
-}
-
-static int
-unsigned_unquantize(int value, int n_endpoint_bits)
-{
- if (n_endpoint_bits >= 15)
- return value;
-
- if (value == 0)
- return 0;
-
- if (value == (1 << n_endpoint_bits) - 1)
- return 0xffff;
-
- return ((value << 15) + 0x4000) >> (n_endpoint_bits - 1);
-}
-
-static int
-extract_float_endpoints(const struct bptc_float_mode *mode,
- const uint8_t *block,
- int bit_offset,
- int32_t endpoints[][3],
- bool is_signed)
-{
- const struct bptc_float_bitfield *bitfield;
- int endpoint, component;
- int n_endpoints;
- int value;
- int i;
-
- if (mode->n_partition_bits)
- n_endpoints = 4;
- else
- n_endpoints = 2;
-
- memset(endpoints, 0, sizeof endpoints[0][0] * n_endpoints * 3);
-
- for (bitfield = mode->bitfields; bitfield->endpoint != -1; bitfield++) {
- value = extract_bits(block, bit_offset, bitfield->n_bits);
- bit_offset += bitfield->n_bits;
-
- if (bitfield->reverse) {
- for (i = 0; i < bitfield->n_bits; i++) {
- if (value & (1 << i))
- endpoints[bitfield->endpoint][bitfield->component] |=
- 1 << ((bitfield->n_bits - 1 - i) + bitfield->offset);
- }
- } else {
- endpoints[bitfield->endpoint][bitfield->component] |=
- value << bitfield->offset;
- }
- }
-
- if (mode->transformed_endpoints) {
- /* The endpoints are specified as signed offsets from e0 */
- for (endpoint = 1; endpoint < n_endpoints; endpoint++) {
- for (component = 0; component < 3; component++) {
- value = sign_extend(endpoints[endpoint][component],
- mode->n_delta_bits[component]);
- endpoints[endpoint][component] =
- ((endpoints[0][component] + value) &
- ((1 << mode->n_endpoint_bits) - 1));
- }
- }
- }
-
- if (is_signed) {
- for (endpoint = 0; endpoint < n_endpoints; endpoint++) {
- for (component = 0; component < 3; component++) {
- value = sign_extend(endpoints[endpoint][component],
- mode->n_endpoint_bits);
- endpoints[endpoint][component] =
- signed_unquantize(value, mode->n_endpoint_bits);
- }
- }
- } else {
- for (endpoint = 0; endpoint < n_endpoints; endpoint++) {
- for (component = 0; component < 3; component++) {
- endpoints[endpoint][component] =
- unsigned_unquantize(endpoints[endpoint][component],
- mode->n_endpoint_bits);
- }
- }
- }
-
- return bit_offset;
-}
-
-static int32_t
-finish_unsigned_unquantize(int32_t value)
-{
- return value * 31 / 64;
-}
-
-static int32_t
-finish_signed_unquantize(int32_t value)
-{
- if (value < 0)
- return (-value * 31 / 32) | 0x8000;
- else
- return value * 31 / 32;
-}
-
-static void
-fetch_rgb_float_from_block(const uint8_t *block,
- float *result,
- int texel,
- bool is_signed)
-{
- int mode_num;
- const struct bptc_float_mode *mode;
- int bit_offset;
- int partition_num;
- int subset_num;
- int index_bits;
- int index;
- int anchors_before_texel;
- int32_t endpoints[2 * 2][3];
- uint32_t subsets;
- int n_subsets;
- int component;
- int32_t value;
-
- if (block[0] & 0x2) {
- mode_num = (((block[0] >> 1) & 0xe) | (block[0] & 1)) + 2;
- bit_offset = 5;
- } else {
- mode_num = block[0] & 3;
- bit_offset = 2;
- }
-
- mode = bptc_float_modes + mode_num;
-
- if (mode->reserved) {
- memset(result, 0, sizeof result[0] * 3);
- result[3] = 1.0f;
- return;
- }
-
- bit_offset = extract_float_endpoints(mode, block, bit_offset,
- endpoints, is_signed);
-
- if (mode->n_partition_bits) {
- partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
- bit_offset += mode->n_partition_bits;
-
- subsets = partition_table1[partition_num];
- n_subsets = 2;
- } else {
- partition_num = 0;
- subsets = 0;
- n_subsets = 1;
- }
-
- anchors_before_texel =
- count_anchors_before_texel(n_subsets, partition_num, texel);
-
- /* Calculate the offset to the primary index for this texel */
- bit_offset += mode->n_index_bits * texel - anchors_before_texel;
-
- subset_num = (subsets >> (texel * 2)) & 3;
-
- index_bits = mode->n_index_bits;
- if (is_anchor(n_subsets, partition_num, texel))
- index_bits--;
- index = extract_bits(block, bit_offset, index_bits);
-
- for (component = 0; component < 3; component++) {
- value = interpolate(endpoints[subset_num * 2][component],
- endpoints[subset_num * 2 + 1][component],
- index,
- mode->n_index_bits);
-
- if (is_signed)
- value = finish_signed_unquantize(value);
- else
- value = finish_unsigned_unquantize(value);
-
- result[component] = _mesa_half_to_float(value);
- }
-
- result[3] = 1.0f;
-}
-
-static void
-fetch_bptc_rgb_float(const GLubyte *map,
- GLint rowStride, GLint i, GLint j,
- GLfloat *texel,
- bool is_signed)
-{
- const GLubyte *block;
-
- block = map + (((rowStride + 3) / 4) * (j / 4) + (i / 4)) * 16;
-
- fetch_rgb_float_from_block(block, texel, (i % 4) + (j % 4) * 4, is_signed);
-}
-
-static void
-fetch_bptc_rgb_signed_float(const GLubyte *map,
- GLint rowStride, GLint i, GLint j,
- GLfloat *texel)
-{
- fetch_bptc_rgb_float(map, rowStride, i, j, texel, true);
-}
-
-static void
-fetch_bptc_rgb_unsigned_float(const GLubyte *map,
- GLint rowStride, GLint i, GLint j,
- GLfloat *texel)
-{
- fetch_bptc_rgb_float(map, rowStride, i, j, texel, false);
-}
-
compressed_fetch_func
_mesa_get_bptc_fetch_func(mesa_format format)
{
@@ -967,312 +122,6 @@ _mesa_get_bptc_fetch_func(mesa_format format)
}
}
-static void
-write_bits(struct bit_writer *writer, int n_bits, int value)
-{
- do {
- if (n_bits + writer->pos >= 8) {
- *(writer->dst++) = writer->buf | (value << writer->pos);
- writer->buf = 0;
- value >>= (8 - writer->pos);
- n_bits -= (8 - writer->pos);
- writer->pos = 0;
- } else {
- writer->buf |= value << writer->pos;
- writer->pos += n_bits;
- break;
- }
- } while (n_bits > 0);
-}
-
-static void
-get_average_luminance_alpha_unorm(int width, int height,
- const uint8_t *src, int src_rowstride,
- int *average_luminance, int *average_alpha)
-{
- int luminance_sum = 0, alpha_sum = 0;
- int y, x;
-
- for (y = 0; y < height; y++) {
- for (x = 0; x < width; x++) {
- luminance_sum += src[0] + src[1] + src[2];
- alpha_sum += src[3];
- src += 4;
- }
- src += src_rowstride - width * 4;
- }
-
- *average_luminance = luminance_sum / (width * height);
- *average_alpha = alpha_sum / (width * height);
-}
-
-static void
-get_rgba_endpoints_unorm(int width, int height,
- const uint8_t *src, int src_rowstride,
- int average_luminance, int average_alpha,
- uint8_t endpoints[][4])
-{
- int endpoint_luminances[2];
- int midpoint;
- int sums[2][4];
- int endpoint;
- int luminance;
- uint8_t temp[3];
- const uint8_t *p = src;
- int rgb_left_endpoint_count = 0;
- int alpha_left_endpoint_count = 0;
- int y, x, i;
-
- memset(sums, 0, sizeof sums);
-
- for (y = 0; y < height; y++) {
- for (x = 0; x < width; x++) {
- luminance = p[0] + p[1] + p[2];
- if (luminance < average_luminance) {
- endpoint = 0;
- rgb_left_endpoint_count++;
- } else {
- endpoint = 1;
- }
- for (i = 0; i < 3; i++)
- sums[endpoint][i] += p[i];
-
- if (p[2] < average_alpha) {
- endpoint = 0;
- alpha_left_endpoint_count++;
- } else {
- endpoint = 1;
- }
- sums[endpoint][3] += p[3];
-
- p += 4;
- }
-
- p += src_rowstride - width * 4;
- }
-
- if (rgb_left_endpoint_count == 0 ||
- rgb_left_endpoint_count == width * height) {
- for (i = 0; i < 3; i++)
- endpoints[0][i] = endpoints[1][i] =
- (sums[0][i] + sums[1][i]) / (width * height);
- } else {
- for (i = 0; i < 3; i++) {
- endpoints[0][i] = sums[0][i] / rgb_left_endpoint_count;
- endpoints[1][i] = (sums[1][i] /
- (width * height - rgb_left_endpoint_count));
- }
- }
-
- if (alpha_left_endpoint_count == 0 ||
- alpha_left_endpoint_count == width * height) {
- endpoints[0][3] = endpoints[1][3] =
- (sums[0][3] + sums[1][3]) / (width * height);
- } else {
- endpoints[0][3] = sums[0][3] / alpha_left_endpoint_count;
- endpoints[1][3] = (sums[1][3] /
- (width * height - alpha_left_endpoint_count));
- }
-
- /* We may need to swap the endpoints to ensure the most-significant bit of
- * the first index is zero */
-
- for (endpoint = 0; endpoint < 2; endpoint++) {
- endpoint_luminances[endpoint] =
- endpoints[endpoint][0] +
- endpoints[endpoint][1] +
- endpoints[endpoint][2];
- }
- midpoint = (endpoint_luminances[0] + endpoint_luminances[1]) / 2;
-
- if ((src[0] + src[1] + src[2] <= midpoint) !=
- (endpoint_luminances[0] <= midpoint)) {
- memcpy(temp, endpoints[0], 3);
- memcpy(endpoints[0], endpoints[1], 3);
- memcpy(endpoints[1], temp, 3);
- }
-
- /* Same for the alpha endpoints */
-
- midpoint = (endpoints[0][3] + endpoints[1][3]) / 2;
-
- if ((src[3] <= midpoint) != (endpoints[0][3] <= midpoint)) {
- temp[0] = endpoints[0][3];
- endpoints[0][3] = endpoints[1][3];
- endpoints[1][3] = temp[0];
- }
-}
-
-static void
-write_rgb_indices_unorm(struct bit_writer *writer,
- int src_width, int src_height,
- const uint8_t *src, int src_rowstride,
- uint8_t endpoints[][4])
-{
- int luminance;
- int endpoint_luminances[2];
- int endpoint;
- int index;
- int y, x;
-
- for (endpoint = 0; endpoint < 2; endpoint++) {
- endpoint_luminances[endpoint] =
- endpoints[endpoint][0] +
- endpoints[endpoint][1] +
- endpoints[endpoint][2];
- }
-
- /* If the endpoints have the same luminance then we'll just use index 0 for
- * all of the texels */
- if (endpoint_luminances[0] == endpoint_luminances[1]) {
- write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 2 - 1, 0);
- return;
- }
-
- for (y = 0; y < src_height; y++) {
- for (x = 0; x < src_width; x++) {
- luminance = src[0] + src[1] + src[2];
-
- index = ((luminance - endpoint_luminances[0]) * 3 /
- (endpoint_luminances[1] - endpoint_luminances[0]));
- if (index < 0)
- index = 0;
- else if (index > 3)
- index = 3;
-
- assert(x != 0 || y != 0 || index < 2);
-
- write_bits(writer, (x == 0 && y == 0) ? 1 : 2, index);
-
- src += 4;
- }
-
- /* Pad the indices out to the block size */
- if (src_width < BLOCK_SIZE)
- write_bits(writer, 2 * (BLOCK_SIZE - src_width), 0);
-
- src += src_rowstride - src_width * 4;
- }
-
- /* Pad the indices out to the block size */
- if (src_height < BLOCK_SIZE)
- write_bits(writer, 2 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
-}
-
-static void
-write_alpha_indices_unorm(struct bit_writer *writer,
- int src_width, int src_height,
- const uint8_t *src, int src_rowstride,
- uint8_t endpoints[][4])
-{
- int index;
- int y, x;
-
- /* If the endpoints have the same alpha then we'll just use index 0 for
- * all of the texels */
- if (endpoints[0][3] == endpoints[1][3]) {
- write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 3 - 1, 0);
- return;
- }
-
- for (y = 0; y < src_height; y++) {
- for (x = 0; x < src_width; x++) {
- index = (((int) src[3] - (int) endpoints[0][3]) * 7 /
- ((int) endpoints[1][3] - endpoints[0][3]));
- if (index < 0)
- index = 0;
- else if (index > 7)
- index = 7;
-
- assert(x != 0 || y != 0 || index < 4);
-
- /* The first index has one less bit */
- write_bits(writer, (x == 0 && y == 0) ? 2 : 3, index);
-
- src += 4;
- }
-
- /* Pad the indices out to the block size */
- if (src_width < BLOCK_SIZE)
- write_bits(writer, 3 * (BLOCK_SIZE - src_width), 0);
-
- src += src_rowstride - src_width * 4;
- }
-
- /* Pad the indices out to the block size */
- if (src_height < BLOCK_SIZE)
- write_bits(writer, 3 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
-}
-
-static void
-compress_rgba_unorm_block(int src_width, int src_height,
- const uint8_t *src, int src_rowstride,
- uint8_t *dst)
-{
- int average_luminance, average_alpha;
- uint8_t endpoints[2][4];
- struct bit_writer writer;
- int component, endpoint;
-
- get_average_luminance_alpha_unorm(src_width, src_height, src, src_rowstride,
- &average_luminance, &average_alpha);
- get_rgba_endpoints_unorm(src_width, src_height, src, src_rowstride,
- average_luminance, average_alpha,
- endpoints);
-
- writer.dst = dst;
- writer.pos = 0;
- writer.buf = 0;
-
- write_bits(&writer, 5, 0x10); /* mode 4 */
- write_bits(&writer, 2, 0); /* rotation 0 */
- write_bits(&writer, 1, 0); /* index selection bit */
-
- /* Write the color endpoints */
- for (component = 0; component < 3; component++)
- for (endpoint = 0; endpoint < 2; endpoint++)
- write_bits(&writer, 5, endpoints[endpoint][component] >> 3);
-
- /* Write the alpha endpoints */
- for (endpoint = 0; endpoint < 2; endpoint++)
- write_bits(&writer, 6, endpoints[endpoint][3] >> 2);
-
- write_rgb_indices_unorm(&writer,
- src_width, src_height,
- src, src_rowstride,
- endpoints);
- write_alpha_indices_unorm(&writer,
- src_width, src_height,
- src, src_rowstride,
- endpoints);
-}
-
-static void
-compress_rgba_unorm(int width, int height,
- const uint8_t *src, int src_rowstride,
- uint8_t *dst, int dst_rowstride)
-{
- int dst_row_diff;
- int y, x;
-
- if (dst_rowstride >= width * 4)
- dst_row_diff = dst_rowstride - ((width + 3) & ~3) * 4;
- else
- dst_row_diff = 0;
-
- for (y = 0; y < height; y += BLOCK_SIZE) {
- for (x = 0; x < width; x += BLOCK_SIZE) {
- compress_rgba_unorm_block(MIN2(width - x, BLOCK_SIZE),
- MIN2(height - y, BLOCK_SIZE),
- src + x * 4 + y * src_rowstride,
- src_rowstride,
- dst);
- dst += BLOCK_BYTES;
- }
- dst += dst_row_diff;
- }
-}
-
GLboolean
_mesa_texstore_bptc_rgba_unorm(TEXSTORE_PARAMS)
{
@@ -1318,272 +167,6 @@ _mesa_texstore_bptc_rgba_unorm(TEXSTORE_PARAMS)
return GL_TRUE;
}
-static float
-get_average_luminance_float(int width, int height,
- const float *src, int src_rowstride)
-{
- float luminance_sum = 0;
- int y, x;
-
- for (y = 0; y < height; y++) {
- for (x = 0; x < width; x++) {
- luminance_sum += src[0] + src[1] + src[2];
- src += 3;
- }
- src += (src_rowstride - width * 3 * sizeof (float)) / sizeof (float);
- }
-
- return luminance_sum / (width * height);
-}
-
-static float
-clamp_value(float value, bool is_signed)
-{
- if (value > 65504.0f)
- return 65504.0f;
-
- if (is_signed) {
- if (value < -65504.0f)
- return -65504.0f;
- else
- return value;
- }
-
- if (value < 0.0f)
- return 0.0f;
-
- return value;
-}
-
-static void
-get_endpoints_float(int width, int height,
- const float *src, int src_rowstride,
- float average_luminance, float endpoints[][3],
- bool is_signed)
-{
- float endpoint_luminances[2];
- float midpoint;
- float sums[2][3];
- int endpoint, component;
- float luminance;
- float temp[3];
- const float *p = src;
- int left_endpoint_count = 0;
- int y, x, i;
-
- memset(sums, 0, sizeof sums);
-
- for (y = 0; y < height; y++) {
- for (x = 0; x < width; x++) {
- luminance = p[0] + p[1] + p[2];
- if (luminance < average_luminance) {
- endpoint = 0;
- left_endpoint_count++;
- } else {
- endpoint = 1;
- }
- for (i = 0; i < 3; i++)
- sums[endpoint][i] += p[i];
-
- p += 3;
- }
-
- p += (src_rowstride - width * 3 * sizeof (float)) / sizeof (float);
- }
-
- if (left_endpoint_count == 0 ||
- left_endpoint_count == width * height) {
- for (i = 0; i < 3; i++)
- endpoints[0][i] = endpoints[1][i] =
- (sums[0][i] + sums[1][i]) / (width * height);
- } else {
- for (i = 0; i < 3; i++) {
- endpoints[0][i] = sums[0][i] / left_endpoint_count;
- endpoints[1][i] = sums[1][i] / (width * height - left_endpoint_count);
- }
- }
-
- /* Clamp the endpoints to the range of a half float and strip out
- * infinities */
- for (endpoint = 0; endpoint < 2; endpoint++) {
- for (component = 0; component < 3; component++) {
- endpoints[endpoint][component] =
- clamp_value(endpoints[endpoint][component], is_signed);
- }
- }
-
- /* We may need to swap the endpoints to ensure the most-significant bit of
- * the first index is zero */
-
- for (endpoint = 0; endpoint < 2; endpoint++) {
- endpoint_luminances[endpoint] =
- endpoints[endpoint][0] +
- endpoints[endpoint][1] +
- endpoints[endpoint][2];
- }
- midpoint = (endpoint_luminances[0] + endpoint_luminances[1]) / 2.0f;
-
- if ((src[0] + src[1] + src[2] <= midpoint) !=
- (endpoint_luminances[0] <= midpoint)) {
- memcpy(temp, endpoints[0], sizeof temp);
- memcpy(endpoints[0], endpoints[1], sizeof temp);
- memcpy(endpoints[1], temp, sizeof temp);
- }
-}
-
-static void
-write_rgb_indices_float(struct bit_writer *writer,
- int src_width, int src_height,
- const float *src, int src_rowstride,
- float endpoints[][3])
-{
- float luminance;
- float endpoint_luminances[2];
- int endpoint;
- int index;
- int y, x;
-
- for (endpoint = 0; endpoint < 2; endpoint++) {
- endpoint_luminances[endpoint] =
- endpoints[endpoint][0] +
- endpoints[endpoint][1] +
- endpoints[endpoint][2];
- }
-
- /* If the endpoints have the same luminance then we'll just use index 0 for
- * all of the texels */
- if (endpoint_luminances[0] == endpoint_luminances[1]) {
- write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 4 - 1, 0);
- return;
- }
-
- for (y = 0; y < src_height; y++) {
- for (x = 0; x < src_width; x++) {
- luminance = src[0] + src[1] + src[2];
-
- index = ((luminance - endpoint_luminances[0]) * 15 /
- (endpoint_luminances[1] - endpoint_luminances[0]));
- if (index < 0)
- index = 0;
- else if (index > 15)
- index = 15;
-
- assert(x != 0 || y != 0 || index < 8);
-
- write_bits(writer, (x == 0 && y == 0) ? 3 : 4, index);
-
- src += 3;
- }
-
- /* Pad the indices out to the block size */
- if (src_width < BLOCK_SIZE)
- write_bits(writer, 4 * (BLOCK_SIZE - src_width), 0);
-
- src += (src_rowstride - src_width * 3 * sizeof (float)) / sizeof (float);
- }
-
- /* Pad the indices out to the block size */
- if (src_height < BLOCK_SIZE)
- write_bits(writer, 4 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
-}
-
-static int
-get_endpoint_value(float value, bool is_signed)
-{
- bool sign = false;
- int half;
-
- if (is_signed) {
- half = _mesa_float_to_half(value);
-
- if (half & 0x8000) {
- half &= 0x7fff;
- sign = true;
- }
-
- half = (32 * half / 31) >> 6;
-
- if (sign)
- half = -half & ((1 << 10) - 1);
-
- return half;
- } else {
- if (value <= 0.0f)
- return 0;
-
- half = _mesa_float_to_half(value);
-
- return (64 * half / 31) >> 6;
- }
-}
-
-static void
-compress_rgb_float_block(int src_width, int src_height,
- const float *src, int src_rowstride,
- uint8_t *dst,
- bool is_signed)
-{
- float average_luminance;
- float endpoints[2][3];
- struct bit_writer writer;
- int component, endpoint;
- int endpoint_value;
-
- average_luminance =
- get_average_luminance_float(src_width, src_height, src, src_rowstride);
- get_endpoints_float(src_width, src_height, src, src_rowstride,
- average_luminance, endpoints, is_signed);
-
- writer.dst = dst;
- writer.pos = 0;
- writer.buf = 0;
-
- write_bits(&writer, 5, 3); /* mode 3 */
-
- /* Write the endpoints */
- for (endpoint = 0; endpoint < 2; endpoint++) {
- for (component = 0; component < 3; component++) {
- endpoint_value =
- get_endpoint_value(endpoints[endpoint][component], is_signed);
- write_bits(&writer, 10, endpoint_value);
- }
- }
-
- write_rgb_indices_float(&writer,
- src_width, src_height,
- src, src_rowstride,
- endpoints);
-}
-
-static void
-compress_rgb_float(int width, int height,
- const float *src, int src_rowstride,
- uint8_t *dst, int dst_rowstride,
- bool is_signed)
-{
- int dst_row_diff;
- int y, x;
-
- if (dst_rowstride >= width * 4)
- dst_row_diff = dst_rowstride - ((width + 3) & ~3) * 4;
- else
- dst_row_diff = 0;
-
- for (y = 0; y < height; y += BLOCK_SIZE) {
- for (x = 0; x < width; x += BLOCK_SIZE) {
- compress_rgb_float_block(MIN2(width - x, BLOCK_SIZE),
- MIN2(height - y, BLOCK_SIZE),
- src + x * 3 +
- y * src_rowstride / sizeof (float),
- src_rowstride,
- dst,
- is_signed);
- dst += BLOCK_BYTES;
- }
- dst += dst_row_diff;
- }
-}
-
static GLboolean
texstore_bptc_rgb_float(TEXSTORE_PARAMS,
bool is_signed)
diff --git a/src/mesa/main/texcompress_bptc_tmp.h b/src/mesa/main/texcompress_bptc_tmp.h
new file mode 100644
index 0000000000..3c4ea2c013
--- /dev/null
+++ b/src/mesa/main/texcompress_bptc_tmp.h
@@ -0,0 +1,1743 @@
+/*
+ * Copyright (C) 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Included by texcompress_bptc and gallium to define BPTC decoding routines.
+ */
+
+#include "util/format_srgb.h"
+#include "util/half_float.h"
+#include "macros.h"
+
+#define BLOCK_SIZE 4
+#define N_PARTITIONS 64
+#define BLOCK_BYTES 16
+
+struct bptc_unorm_mode {
+ int n_subsets;
+ int n_partition_bits;
+ bool has_rotation_bits;
+ bool has_index_selection_bit;
+ int n_color_bits;
+ int n_alpha_bits;
+ bool has_endpoint_pbits;
+ bool has_shared_pbits;
+ int n_index_bits;
+ int n_secondary_index_bits;
+};
+
+struct bptc_float_bitfield {
+ int8_t endpoint;
+ uint8_t component;
+ uint8_t offset;
+ uint8_t n_bits;
+ bool reverse;
+};
+
+struct bptc_float_mode {
+ bool reserved;
+ bool transformed_endpoints;
+ int n_partition_bits;
+ int n_endpoint_bits;
+ int n_index_bits;
+ int n_delta_bits[3];
+ struct bptc_float_bitfield bitfields[24];
+};
+
+struct bit_writer {
+ uint8_t buf;
+ int pos;
+ uint8_t *dst;
+};
+
+static const struct bptc_unorm_mode
+bptc_unorm_modes[] = {
+ /* 0 */ { 3, 4, false, false, 4, 0, true, false, 3, 0 },
+ /* 1 */ { 2, 6, false, false, 6, 0, false, true, 3, 0 },
+ /* 2 */ { 3, 6, false, false, 5, 0, false, false, 2, 0 },
+ /* 3 */ { 2, 6, false, false, 7, 0, true, false, 2, 0 },
+ /* 4 */ { 1, 0, true, true, 5, 6, false, false, 2, 3 },
+ /* 5 */ { 1, 0, true, false, 7, 8, false, false, 2, 2 },
+ /* 6 */ { 1, 0, false, false, 7, 7, true, false, 4, 0 },
+ /* 7 */ { 2, 6, false, false, 5, 5, true, false, 2, 0 }
+};
+
+static const struct bptc_float_mode
+bptc_float_modes[] = {
+ /* 00 */
+ { false, true, 5, 10, 3, { 5, 5, 5 },
+ { { 2, 1, 4, 1, false }, { 2, 2, 4, 1, false }, { 3, 2, 4, 1, false },
+ { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
+ { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
+ { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
+ { 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false },
+ { 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false },
+ { 3, 2, 3, 1, false },
+ { -1 } }
+ },
+ /* 01 */
+ { false, true, 5, 7, 3, { 6, 6, 6 },
+ { { 2, 1, 5, 1, false }, { 3, 1, 4, 1, false }, { 3, 1, 5, 1, false },
+ { 0, 0, 0, 7, false }, { 3, 2, 0, 1, false }, { 3, 2, 1, 1, false },
+ { 2, 2, 4, 1, false }, { 0, 1, 0, 7, false }, { 2, 2, 5, 1, false },
+ { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false }, { 0, 2, 0, 7, false },
+ { 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
+ { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false },
+ { 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false },
+ { 2, 0, 0, 6, false },
+ { 3, 0, 0, 6, false },
+ { -1 } }
+ },
+ /* 00010 */
+ { false, true, 5, 11, 3, { 5, 4, 4 },
+ { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
+ { 1, 0, 0, 5, false }, { 0, 0, 10, 1, false }, { 2, 1, 0, 4, false },
+ { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false }, { 3, 2, 0, 1, false },
+ { 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false },
+ { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
+ { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
+ { -1 } }
+ },
+ /* 00011 */
+ { false, false, 0, 10, 4, { 10, 10, 10 },
+ { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
+ { 1, 0, 0, 10, false }, { 1, 1, 0, 10, false }, { 1, 2, 0, 10, false },
+ { -1 } }
+ },
+ /* 00110 */
+ { false, true, 5, 11, 3, { 4, 5, 4 },
+ { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
+ { 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 3, 1, 4, 1, false },
+ { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false }, { 0, 1, 10, 1, false },
+ { 3, 1, 0, 4, false }, { 1, 2, 0, 4, false }, { 0, 2, 10, 1, false },
+ { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false },
+ { 3, 2, 0, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false },
+ { 2, 1, 4, 1, false }, { 3, 2, 3, 1, false },
+ { -1 } }
+ },
+ /* 00111 */
+ { false, true, 0, 11, 4, { 9, 9, 9 },
+ { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
+ { 1, 0, 0, 9, false }, { 0, 0, 10, 1, false }, { 1, 1, 0, 9, false },
+ { 0, 1, 10, 1, false }, { 1, 2, 0, 9, false }, { 0, 2, 10, 1, false },
+ { -1 } }
+ },
+ /* 01010 */
+ { false, true, 5, 11, 3, { 4, 4, 5 },
+ { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
+ { 1, 0, 0, 4, false }, { 0, 0, 10, 1, false }, { 2, 2, 4, 1, false },
+ { 2, 1, 0, 4, false }, { 1, 1, 0, 4, false }, { 0, 1, 10, 1, false },
+ { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
+ { 0, 2, 10, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 4, false },
+ { 3, 2, 1, 1, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 4, false },
+ { 3, 2, 4, 1, false }, { 3, 2, 3, 1, false },
+ { -1 } }
+ },
+ /* 01011 */
+ { false, true, 0, 12, 4, { 8, 8, 8 },
+ { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
+ { 1, 0, 0, 8, false }, { 0, 0, 10, 2, true }, { 1, 1, 0, 8, false },
+ { 0, 1, 10, 2, true }, { 1, 2, 0, 8, false }, { 0, 2, 10, 2, true },
+ { -1 } }
+ },
+ /* 01110 */
+ { false, true, 5, 9, 3, { 5, 5, 5 },
+ { { 0, 0, 0, 9, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 9, false },
+ { 2, 1, 4, 1, false }, { 0, 2, 0, 9, false }, { 3, 2, 4, 1, false },
+ { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
+ { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
+ { 1, 2, 0, 5, false }, { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false },
+ { 2, 0, 0, 5, false }, { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false },
+ { 3, 2, 3, 1, false },
+ { -1 } }
+ },
+ /* 01111 */
+ { false, true, 0, 16, 4, { 4, 4, 4 },
+ { { 0, 0, 0, 10, false }, { 0, 1, 0, 10, false }, { 0, 2, 0, 10, false },
+ { 1, 0, 0, 4, false }, { 0, 0, 10, 6, true }, { 1, 1, 0, 4, false },
+ { 0, 1, 10, 6, true }, { 1, 2, 0, 4, false }, { 0, 2, 10, 6, true },
+ { -1 } }
+ },
+ /* 10010 */
+ { false, true, 5, 8, 3, { 6, 5, 5 },
+ { { 0, 0, 0, 8, false }, { 3, 1, 4, 1, false }, { 2, 2, 4, 1, false },
+ { 0, 1, 0, 8, false }, { 3, 2, 2, 1, false }, { 2, 1, 4, 1, false },
+ { 0, 2, 0, 8, false }, { 3, 2, 3, 1, false }, { 3, 2, 4, 1, false },
+ { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 5, false },
+ { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
+ { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 6, false },
+ { 3, 0, 0, 6, false },
+ { -1 } }
+ },
+ /* 10011 */
+ { true /* reserved */ },
+ /* 10110 */
+ { false, true, 5, 8, 3, { 5, 6, 5 },
+ { { 0, 0, 0, 8, false }, { 3, 2, 0, 1, false }, { 2, 2, 4, 1, false },
+ { 0, 1, 0, 8, false }, { 2, 1, 5, 1, false }, { 2, 1, 4, 1, false },
+ { 0, 2, 0, 8, false }, { 3, 1, 5, 1, false }, { 3, 2, 4, 1, false },
+ { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
+ { 1, 1, 0, 6, false }, { 3, 1, 0, 4, false }, { 1, 2, 0, 5, false },
+ { 3, 2, 1, 1, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
+ { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
+ { -1 } }
+ },
+ /* 10111 */
+ { true /* reserved */ },
+ /* 11010 */
+ { false, true, 5, 8, 3, { 5, 5, 6 },
+ { { 0, 0, 0, 8, false }, { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false },
+ { 0, 1, 0, 8, false }, { 2, 2, 5, 1, false }, { 2, 1, 4, 1, false },
+ { 0, 2, 0, 8, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
+ { 1, 0, 0, 5, false }, { 3, 1, 4, 1, false }, { 2, 1, 0, 4, false },
+ { 1, 1, 0, 5, false }, { 3, 2, 0, 1, false }, { 3, 1, 0, 4, false },
+ { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false }, { 2, 0, 0, 5, false },
+ { 3, 2, 2, 1, false }, { 3, 0, 0, 5, false }, { 3, 2, 3, 1, false },
+ { -1 } }
+ },
+ /* 11011 */
+ { true /* reserved */ },
+ /* 11110 */
+ { false, false, 5, 6, 3, { 6, 6, 6 },
+ { { 0, 0, 0, 6, false }, { 3, 1, 4, 1, false }, { 3, 2, 0, 1, false },
+ { 3, 2, 1, 1, false }, { 2, 2, 4, 1, false }, { 0, 1, 0, 6, false },
+ { 2, 1, 5, 1, false }, { 2, 2, 5, 1, false }, { 3, 2, 2, 1, false },
+ { 2, 1, 4, 1, false }, { 0, 2, 0, 6, false }, { 3, 1, 5, 1, false },
+ { 3, 2, 3, 1, false }, { 3, 2, 5, 1, false }, { 3, 2, 4, 1, false },
+ { 1, 0, 0, 6, false }, { 2, 1, 0, 4, false }, { 1, 1, 0, 6, false },
+ { 3, 1, 0, 4, false }, { 1, 2, 0, 6, false }, { 2, 2, 0, 4, false },
+ { 2, 0, 0, 6, false }, { 3, 0, 0, 6, false },
+ { -1 } }
+ },
+ /* 11111 */
+ { true /* reserved */ },
+};
+
+/* This partition table is used when the mode has two subsets. Each
+ * partition is represented by a 32-bit value which gives 2 bits per texel
+ * within the block. The value of the two bits represents which subset to use
+ * (0 or 1).
+ */
+static const uint32_t
+partition_table1[N_PARTITIONS] = {
+ 0x50505050U, 0x40404040U, 0x54545454U, 0x54505040U,
+ 0x50404000U, 0x55545450U, 0x55545040U, 0x54504000U,
+ 0x50400000U, 0x55555450U, 0x55544000U, 0x54400000U,
+ 0x55555440U, 0x55550000U, 0x55555500U, 0x55000000U,
+ 0x55150100U, 0x00004054U, 0x15010000U, 0x00405054U,
+ 0x00004050U, 0x15050100U, 0x05010000U, 0x40505054U,
+ 0x00404050U, 0x05010100U, 0x14141414U, 0x05141450U,
+ 0x01155440U, 0x00555500U, 0x15014054U, 0x05414150U,
+ 0x44444444U, 0x55005500U, 0x11441144U, 0x05055050U,
+ 0x05500550U, 0x11114444U, 0x41144114U, 0x44111144U,
+ 0x15055054U, 0x01055040U, 0x05041050U, 0x05455150U,
+ 0x14414114U, 0x50050550U, 0x41411414U, 0x00141400U,
+ 0x00041504U, 0x00105410U, 0x10541000U, 0x04150400U,
+ 0x50410514U, 0x41051450U, 0x05415014U, 0x14054150U,
+ 0x41050514U, 0x41505014U, 0x40011554U, 0x54150140U,
+ 0x50505500U, 0x00555050U, 0x15151010U, 0x54540404U,
+};
+
+/* This partition table is used when the mode has three subsets. In this case
+ * the values can be 0, 1 or 2.
+ */
+static const uint32_t
+partition_table2[N_PARTITIONS] = {
+ 0xaa685050U, 0x6a5a5040U, 0x5a5a4200U, 0x5450a0a8U,
+ 0xa5a50000U, 0xa0a05050U, 0x5555a0a0U, 0x5a5a5050U,
+ 0xaa550000U, 0xaa555500U, 0xaaaa5500U, 0x90909090U,
+ 0x94949494U, 0xa4a4a4a4U, 0xa9a59450U, 0x2a0a4250U,
+ 0xa5945040U, 0x0a425054U, 0xa5a5a500U, 0x55a0a0a0U,
+ 0xa8a85454U, 0x6a6a4040U, 0xa4a45000U, 0x1a1a0500U,
+ 0x0050a4a4U, 0xaaa59090U, 0x14696914U, 0x69691400U,
+ 0xa08585a0U, 0xaa821414U, 0x50a4a450U, 0x6a5a0200U,
+ 0xa9a58000U, 0x5090a0a8U, 0xa8a09050U, 0x24242424U,
+ 0x00aa5500U, 0x24924924U, 0x24499224U, 0x50a50a50U,
+ 0x500aa550U, 0xaaaa4444U, 0x66660000U, 0xa5a0a5a0U,
+ 0x50a050a0U, 0x69286928U, 0x44aaaa44U, 0x66666600U,
+ 0xaa444444U, 0x54a854a8U, 0x95809580U, 0x96969600U,
+ 0xa85454a8U, 0x80959580U, 0xaa141414U, 0x96960000U,
+ 0xaaaa1414U, 0xa05050a0U, 0xa0a5a5a0U, 0x96000000U,
+ 0x40804080U, 0xa9a8a9a8U, 0xaaaaaa44U, 0x2a4a5254U
+};
+
+static const uint8_t
+anchor_indices[][N_PARTITIONS] = {
+ /* Anchor index values for the second subset of two-subset partitioning */
+ {
+ 0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,
+ 0xf,0x2,0x8,0x2,0x2,0x8,0x8,0xf,0x2,0x8,0x2,0x2,0x8,0x8,0x2,0x2,
+ 0xf,0xf,0x6,0x8,0x2,0x8,0xf,0xf,0x2,0x8,0x2,0x2,0x2,0xf,0xf,0x6,
+ 0x6,0x2,0x6,0x8,0xf,0xf,0x2,0x2,0xf,0xf,0xf,0xf,0xf,0x2,0x2,0xf
+ },
+
+ /* Anchor index values for the second subset of three-subset partitioning */
+ {
+ 0x3,0x3,0xf,0xf,0x8,0x3,0xf,0xf,0x8,0x8,0x6,0x6,0x6,0x5,0x3,0x3,
+ 0x3,0x3,0x8,0xf,0x3,0x3,0x6,0xa,0x5,0x8,0x8,0x6,0x8,0x5,0xf,0xf,
+ 0x8,0xf,0x3,0x5,0x6,0xa,0x8,0xf,0xf,0x3,0xf,0x5,0xf,0xf,0xf,0xf,
+ 0x3,0xf,0x5,0x5,0x5,0x8,0x5,0xa,0x5,0xa,0x8,0xd,0xf,0xc,0x3,0x3
+ },
+
+ /* Anchor index values for the third subset of three-subset
+ * partitioning
+ */
+ {
+ 0xf,0x8,0x8,0x3,0xf,0xf,0x3,0x8,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x8,
+ 0xf,0x8,0xf,0x3,0xf,0x8,0xf,0x8,0x3,0xf,0x6,0xa,0xf,0xf,0xa,0x8,
+ 0xf,0x3,0xf,0xa,0xa,0x8,0x9,0xa,0x6,0xf,0x8,0xf,0x3,0x6,0x6,0x8,
+ 0xf,0x3,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x3,0xf,0xf,0x8
+ }
+};
+
+static int
+extract_bits(const uint8_t *block,
+ int offset,
+ int n_bits)
+{
+ int byte_index = offset / 8;
+ int bit_index = offset % 8;
+ int n_bits_in_byte = MIN2(n_bits, 8 - bit_index);
+ int result = 0;
+ int bit = 0;
+
+ while (true) {
+ result |= ((block[byte_index] >> bit_index) &
+ ((1 << n_bits_in_byte) - 1)) << bit;
+
+ n_bits -= n_bits_in_byte;
+
+ if (n_bits <= 0)
+ return result;
+
+ bit += n_bits_in_byte;
+ byte_index++;
+ bit_index = 0;
+ n_bits_in_byte = MIN2(n_bits, 8);
+ }
+}
+
+static uint8_t
+expand_component(uint8_t byte,
+ int n_bits)
+{
+ /* Expands a n-bit quantity into a byte by copying the most-significant
+ * bits into the unused least-significant bits.
+ */
+ return byte << (8 - n_bits) | (byte >> (2 * n_bits - 8));
+}
+
+static int
+extract_unorm_endpoints(const struct bptc_unorm_mode *mode,
+ const uint8_t *block,
+ int bit_offset,
+ uint8_t endpoints[][4])
+{
+ int component;
+ int subset;
+ int endpoint;
+ int pbit;
+ int n_components;
+
+ /* Extract each color component */
+ for (component = 0; component < 3; component++) {
+ for (subset = 0; subset < mode->n_subsets; subset++) {
+ for (endpoint = 0; endpoint < 2; endpoint++) {
+ endpoints[subset * 2 + endpoint][component] =
+ extract_bits(block, bit_offset, mode->n_color_bits);
+ bit_offset += mode->n_color_bits;
+ }
+ }
+ }
+
+ /* Extract the alpha values */
+ if (mode->n_alpha_bits > 0) {
+ for (subset = 0; subset < mode->n_subsets; subset++) {
+ for (endpoint = 0; endpoint < 2; endpoint++) {
+ endpoints[subset * 2 + endpoint][3] =
+ extract_bits(block, bit_offset, mode->n_alpha_bits);
+ bit_offset += mode->n_alpha_bits;
+ }
+ }
+
+ n_components = 4;
+ } else {
+ for (subset = 0; subset < mode->n_subsets; subset++)
+ for (endpoint = 0; endpoint < 2; endpoint++)
+ endpoints[subset * 2 + endpoint][3] = 255;
+
+ n_components = 3;
+ }
+
+ /* Add in the p-bits */
+ if (mode->has_endpoint_pbits) {
+ for (subset = 0; subset < mode->n_subsets; subset++) {
+ for (endpoint = 0; endpoint < 2; endpoint++) {
+ pbit = extract_bits(block, bit_offset, 1);
+ bit_offset += 1;
+
+ for (component = 0; component < n_components; component++) {
+ endpoints[subset * 2 + endpoint][component] <<= 1;
+ endpoints[subset * 2 + endpoint][component] |= pbit;
+ }
+ }
+ }
+ } else if (mode->has_shared_pbits) {
+ for (subset = 0; subset < mode->n_subsets; subset++) {
+ pbit = extract_bits(block, bit_offset, 1);
+ bit_offset += 1;
+
+ for (endpoint = 0; endpoint < 2; endpoint++) {
+ for (component = 0; component < n_components; component++) {
+ endpoints[subset * 2 + endpoint][component] <<= 1;
+ endpoints[subset * 2 + endpoint][component] |= pbit;
+ }
+ }
+ }
+ }
+
+ /* Expand the n-bit values to a byte */
+ for (subset = 0; subset < mode->n_subsets; subset++) {
+ for (endpoint = 0; endpoint < 2; endpoint++) {
+ for (component = 0; component < 3; component++) {
+ endpoints[subset * 2 + endpoint][component] =
+ expand_component(endpoints[subset * 2 + endpoint][component],
+ mode->n_color_bits +
+ mode->has_endpoint_pbits +
+ mode->has_shared_pbits);
+ }
+
+ if (mode->n_alpha_bits > 0) {
+ endpoints[subset * 2 + endpoint][3] =
+ expand_component(endpoints[subset * 2 + endpoint][3],
+ mode->n_alpha_bits +
+ mode->has_endpoint_pbits +
+ mode->has_shared_pbits);
+ }
+ }
+ }
+
+ return bit_offset;
+}
+
+static bool
+is_anchor(int n_subsets,
+ int partition_num,
+ int texel)
+{
+ if (texel == 0)
+ return true;
+
+ switch (n_subsets) {
+ case 1:
+ return false;
+ case 2:
+ return anchor_indices[0][partition_num] == texel;
+ case 3:
+ return (anchor_indices[1][partition_num] == texel ||
+ anchor_indices[2][partition_num] == texel);
+ default:
+ assert(false);
+ return false;
+ }
+}
+
+static int
+count_anchors_before_texel(int n_subsets,
+ int partition_num,
+ int texel)
+{
+ int count = 1;
+
+ if (texel == 0)
+ return 0;
+
+ switch (n_subsets) {
+ case 1:
+ break;
+ case 2:
+ if (texel > anchor_indices[0][partition_num])
+ count++;
+ break;
+ case 3:
+ if (texel > anchor_indices[1][partition_num])
+ count++;
+ if (texel > anchor_indices[2][partition_num])
+ count++;
+ break;
+ default:
+ assert(false);
+ return 0;
+ }
+
+ return count;
+}
+
+static int32_t
+interpolate(int32_t a, int32_t b,
+ int index,
+ int index_bits)
+{
+ static const uint8_t weights2[] = { 0, 21, 43, 64 };
+ static const uint8_t weights3[] = { 0, 9, 18, 27, 37, 46, 55, 64 };
+ static const uint8_t weights4[] =
+ { 0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64 };
+ static const uint8_t *weights[] = {
+ NULL, NULL, weights2, weights3, weights4
+ };
+ int weight;
+
+ weight = weights[index_bits][index];
+
+ return ((64 - weight) * a + weight * b + 32) >> 6;
+}
+
+static void
+apply_rotation(int rotation,
+ uint8_t *result)
+{
+ uint8_t t;
+
+ if (rotation == 0)
+ return;
+
+ rotation--;
+
+ t = result[rotation];
+ result[rotation] = result[3];
+ result[3] = t;
+}
+
+static void
+fetch_rgba_unorm_from_block(const uint8_t *block,
+ uint8_t *result,
+ int texel)
+{
+ int mode_num = ffs(block[0]);
+ const struct bptc_unorm_mode *mode;
+ int bit_offset, secondary_bit_offset;
+ int partition_num;
+ int subset_num;
+ int rotation;
+ int index_selection;
+ int index_bits;
+ int indices[2];
+ int index;
+ int anchors_before_texel;
+ bool anchor;
+ uint8_t endpoints[3 * 2][4];
+ uint32_t subsets;
+ int component;
+
+ if (mode_num == 0) {
+ /* According to the spec this mode is reserved and shouldn't be used. */
+ memset(result, 0, 3);
+ result[3] = 0xff;
+ return;
+ }
+
+ mode = bptc_unorm_modes + mode_num - 1;
+ bit_offset = mode_num;
+
+ partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
+ bit_offset += mode->n_partition_bits;
+
+ switch (mode->n_subsets) {
+ case 1:
+ subsets = 0;
+ break;
+ case 2:
+ subsets = partition_table1[partition_num];
+ break;
+ case 3:
+ subsets = partition_table2[partition_num];
+ break;
+ default:
+ assert(false);
+ return;
+ }
+
+ if (mode->has_rotation_bits) {
+ rotation = extract_bits(block, bit_offset, 2);
+ bit_offset += 2;
+ } else {
+ rotation = 0;
+ }
+
+ if (mode->has_index_selection_bit) {
+ index_selection = extract_bits(block, bit_offset, 1);
+ bit_offset++;
+ } else {
+ index_selection = 0;
+ }
+
+ bit_offset = extract_unorm_endpoints(mode, block, bit_offset, endpoints);
+
+ anchors_before_texel = count_anchors_before_texel(mode->n_subsets,
+ partition_num, texel);
+
+ /* Calculate the offset to the secondary index */
+ secondary_bit_offset = (bit_offset +
+ BLOCK_SIZE * BLOCK_SIZE * mode->n_index_bits -
+ mode->n_subsets +
+ mode->n_secondary_index_bits * texel -
+ anchors_before_texel);
+
+ /* Calculate the offset to the primary index for this texel */
+ bit_offset += mode->n_index_bits * texel - anchors_before_texel;
+
+ subset_num = (subsets >> (texel * 2)) & 3;
+
+ anchor = is_anchor(mode->n_subsets, partition_num, texel);
+
+ index_bits = mode->n_index_bits;
+ if (anchor)
+ index_bits--;
+ indices[0] = extract_bits(block, bit_offset, index_bits);
+
+ if (mode->n_secondary_index_bits) {
+ index_bits = mode->n_secondary_index_bits;
+ if (anchor)
+ index_bits--;
+ indices[1] = extract_bits(block, secondary_bit_offset, index_bits);
+ }
+
+ index = indices[index_selection];
+ index_bits = (index_selection ?
+ mode->n_secondary_index_bits :
+ mode->n_index_bits);
+
+ for (component = 0; component < 3; component++)
+ result[component] = interpolate(endpoints[subset_num * 2][component],
+ endpoints[subset_num * 2 + 1][component],
+ index,
+ index_bits);
+
+ /* Alpha uses the opposite index from the color components */
+ if (mode->n_secondary_index_bits && !index_selection) {
+ index = indices[1];
+ index_bits = mode->n_secondary_index_bits;
+ } else {
+ index = indices[0];
+ index_bits = mode->n_index_bits;
+ }
+
+ result[3] = interpolate(endpoints[subset_num * 2][3],
+ endpoints[subset_num * 2 + 1][3],
+ index,
+ index_bits);
+
+ apply_rotation(rotation, result);
+}
+
+#ifdef BPTC_BLOCK_DECODE
+static void
+decompress_rgba_unorm_block(int src_width, int src_height,
+ const uint8_t *block,
+ uint8_t *dst_row, int dst_rowstride)
+{
+ int mode_num = ffs(block[0]);
+ const struct bptc_unorm_mode *mode;
+ int bit_offset, secondary_bit_offset;
+ int partition_num;
+ int subset_num;
+ int rotation;
+ int index_selection;
+ int index_bits;
+ int indices[2];
+ int index;
+ int anchors_before_texel;
+ bool anchor;
+ uint8_t endpoints[3 * 2][4];
+ uint32_t subsets;
+ int component;
+ unsigned x, y;
+
+ if (mode_num == 0) {
+ /* According to the spec this mode is reserved and shouldn't be used. */
+ for(y = 0; y < src_height; y += 1) {
+ uint8_t *result = dst_row;
+ memset(result, 0, 4 * src_width);
+ for(x = 0; x < src_width; x += 1) {
+ result[3] = 0xff;
+ result += 4;
+ }
+ dst_row += dst_rowstride;
+ }
+ return;
+ }
+
+ mode = bptc_unorm_modes + mode_num - 1;
+ bit_offset = mode_num;
+
+ partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
+ bit_offset += mode->n_partition_bits;
+
+ switch (mode->n_subsets) {
+ case 1:
+ subsets = 0;
+ break;
+ case 2:
+ subsets = partition_table1[partition_num];
+ break;
+ case 3:
+ subsets = partition_table2[partition_num];
+ break;
+ default:
+ assert(false);
+ return;
+ }
+
+ if (mode->has_rotation_bits) {
+ rotation = extract_bits(block, bit_offset, 2);
+ bit_offset += 2;
+ } else {
+ rotation = 0;
+ }
+
+ if (mode->has_index_selection_bit) {
+ index_selection = extract_bits(block, bit_offset, 1);
+ bit_offset++;
+ } else {
+ index_selection = 0;
+ }
+
+ bit_offset = extract_unorm_endpoints(mode, block, bit_offset, endpoints);
+
+ for(y = 0; y < src_height; y += 1) {
+ uint8_t *result = dst_row;
+ for(x = 0; x < src_width; x += 1) {
+ int texel;
+ texel = x + y * 4;
+
+ anchors_before_texel = count_anchors_before_texel(mode->n_subsets,
+ partition_num,
+ texel);
+
+ /* Calculate the offset to the secondary index */
+ secondary_bit_offset = (bit_offset +
+ BLOCK_SIZE * BLOCK_SIZE * mode->n_index_bits -
+ mode->n_subsets +
+ mode->n_secondary_index_bits * texel -
+ anchors_before_texel);
+
+ /* Calculate the offset to the primary index for this texel */
+ bit_offset += mode->n_index_bits * texel - anchors_before_texel;
+
+ subset_num = (subsets >> (texel * 2)) & 3;
+
+ anchor = is_anchor(mode->n_subsets, partition_num, texel);
+
+ index_bits = mode->n_index_bits;
+ if (anchor)
+ index_bits--;
+ indices[0] = extract_bits(block, bit_offset, index_bits);
+
+ if (mode->n_secondary_index_bits) {
+ index_bits = mode->n_secondary_index_bits;
+ if (anchor)
+ index_bits--;
+ indices[1] = extract_bits(block, secondary_bit_offset, index_bits);
+ }
+
+ index = indices[index_selection];
+ index_bits = (index_selection ?
+ mode->n_secondary_index_bits :
+ mode->n_index_bits);
+
+ for (component = 0; component < 3; component++)
+ result[component] = interpolate(endpoints[subset_num * 2][component],
+ endpoints[subset_num * 2 + 1][component],
+ index,
+ index_bits);
+
+ /* Alpha uses the opposite index from the color components */
+ if (mode->n_secondary_index_bits && !index_selection) {
+ index = indices[1];
+ index_bits = mode->n_secondary_index_bits;
+ } else {
+ index = indices[0];
+ index_bits = mode->n_index_bits;
+ }
+
+ result[3] = interpolate(endpoints[subset_num * 2][3],
+ endpoints[subset_num * 2 + 1][3],
+ index,
+ index_bits);
+
+ apply_rotation(rotation, result);
+ result += 4;
+ }
+ dst_row += dst_rowstride;
+ }
+}
+
+static void
+decompress_rgba_unorm(int width, int height,
+ const uint8_t *src, int src_rowstride,
+ uint8_t *dst, int dst_rowstride)
+{
+ int src_row_diff;
+ int y, x;
+
+ if (src_rowstride >= width * 4)
+ src_row_diff = src_rowstride - ((width + 3) & ~3) * 4;
+ else
+ src_row_diff = 0;
+
+ for (y = 0; y < height; y += BLOCK_SIZE) {
+ for (x = 0; x < width; x += BLOCK_SIZE) {
+ decompress_rgba_unorm_block(MIN2(width - x, BLOCK_SIZE),
+ MIN2(height - y, BLOCK_SIZE),
+ src,
+ dst + x * 4 + y * dst_rowstride,
+ dst_rowstride);
+ src += BLOCK_BYTES;
+ }
+ src += src_row_diff;
+ }
+}
+#endif // BPTC_BLOCK_DECODE
+
+static int32_t
+sign_extend(int32_t value,
+ int n_bits)
+{
+ if ((value & (1 << (n_bits - 1)))) {
+ value |= (~(int32_t) 0) << n_bits;
+ }
+
+ return value;
+}
+
+static int
+signed_unquantize(int value, int n_endpoint_bits)
+{
+ bool sign;
+
+ if (n_endpoint_bits >= 16)
+ return value;
+
+ if (value == 0)
+ return 0;
+
+ sign = false;
+
+ if (value < 0) {
+ sign = true;
+ value = -value;
+ }
+
+ if (value >= (1 << (n_endpoint_bits - 1)) - 1)
+ value = 0x7fff;
+ else
+ value = ((value << 15) + 0x4000) >> (n_endpoint_bits - 1);
+
+ if (sign)
+ value = -value;
+
+ return value;
+}
+
+static int
+unsigned_unquantize(int value, int n_endpoint_bits)
+{
+ if (n_endpoint_bits >= 15)
+ return value;
+
+ if (value == 0)
+ return 0;
+
+ if (value == (1 << n_endpoint_bits) - 1)
+ return 0xffff;
+
+ return ((value << 15) + 0x4000) >> (n_endpoint_bits - 1);
+}
+
+static int
+extract_float_endpoints(const struct bptc_float_mode *mode,
+ const uint8_t *block,
+ int bit_offset,
+ int32_t endpoints[][3],
+ bool is_signed)
+{
+ const struct bptc_float_bitfield *bitfield;
+ int endpoint, component;
+ int n_endpoints;
+ int value;
+ int i;
+
+ if (mode->n_partition_bits)
+ n_endpoints = 4;
+ else
+ n_endpoints = 2;
+
+ memset(endpoints, 0, sizeof endpoints[0][0] * n_endpoints * 3);
+
+ for (bitfield = mode->bitfields; bitfield->endpoint != -1; bitfield++) {
+ value = extract_bits(block, bit_offset, bitfield->n_bits);
+ bit_offset += bitfield->n_bits;
+
+ if (bitfield->reverse) {
+ for (i = 0; i < bitfield->n_bits; i++) {
+ if (value & (1 << i))
+ endpoints[bitfield->endpoint][bitfield->component] |=
+ 1 << ((bitfield->n_bits - 1 - i) + bitfield->offset);
+ }
+ } else {
+ endpoints[bitfield->endpoint][bitfield->component] |=
+ value << bitfield->offset;
+ }
+ }
+
+ if (mode->transformed_endpoints) {
+ /* The endpoints are specified as signed offsets from e0 */
+ for (endpoint = 1; endpoint < n_endpoints; endpoint++) {
+ for (component = 0; component < 3; component++) {
+ value = sign_extend(endpoints[endpoint][component],
+ mode->n_delta_bits[component]);
+ endpoints[endpoint][component] =
+ ((endpoints[0][component] + value) &
+ ((1 << mode->n_endpoint_bits) - 1));
+ }
+ }
+ }
+
+ if (is_signed) {
+ for (endpoint = 0; endpoint < n_endpoints; endpoint++) {
+ for (component = 0; component < 3; component++) {
+ value = sign_extend(endpoints[endpoint][component],
+ mode->n_endpoint_bits);
+ endpoints[endpoint][component] =
+ signed_unquantize(value, mode->n_endpoint_bits);
+ }
+ }
+ } else {
+ for (endpoint = 0; endpoint < n_endpoints; endpoint++) {
+ for (component = 0; component < 3; component++) {
+ endpoints[endpoint][component] =
+ unsigned_unquantize(endpoints[endpoint][component],
+ mode->n_endpoint_bits);
+ }
+ }
+ }
+
+ return bit_offset;
+}
+
+static int32_t
+finish_unsigned_unquantize(int32_t value)
+{
+ return value * 31 / 64;
+}
+
+static int32_t
+finish_signed_unquantize(int32_t value)
+{
+ if (value < 0)
+ return (-value * 31 / 32) | 0x8000;
+ else
+ return value * 31 / 32;
+}
+
+static void
+fetch_rgb_float_from_block(const uint8_t *block,
+ float *result,
+ int texel,
+ bool is_signed)
+{
+ int mode_num;
+ const struct bptc_float_mode *mode;
+ int bit_offset;
+ int partition_num;
+ int subset_num;
+ int index_bits;
+ int index;
+ int anchors_before_texel;
+ int32_t endpoints[2 * 2][3];
+ uint32_t subsets;
+ int n_subsets;
+ int component;
+ int32_t value;
+
+ if (block[0] & 0x2) {
+ mode_num = (((block[0] >> 1) & 0xe) | (block[0] & 1)) + 2;
+ bit_offset = 5;
+ } else {
+ mode_num = block[0] & 3;
+ bit_offset = 2;
+ }
+
+ mode = bptc_float_modes + mode_num;
+
+ if (mode->reserved) {
+ memset(result, 0, sizeof result[0] * 3);
+ result[3] = 1.0f;
+ return;
+ }
+
+ bit_offset = extract_float_endpoints(mode, block, bit_offset,
+ endpoints, is_signed);
+
+ if (mode->n_partition_bits) {
+ partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
+ bit_offset += mode->n_partition_bits;
+
+ subsets = partition_table1[partition_num];
+ n_subsets = 2;
+ } else {
+ partition_num = 0;
+ subsets = 0;
+ n_subsets = 1;
+ }
+
+ anchors_before_texel =
+ count_anchors_before_texel(n_subsets, partition_num, texel);
+
+ /* Calculate the offset to the primary index for this texel */
+ bit_offset += mode->n_index_bits * texel - anchors_before_texel;
+
+ subset_num = (subsets >> (texel * 2)) & 3;
+
+ index_bits = mode->n_index_bits;
+ if (is_anchor(n_subsets, partition_num, texel))
+ index_bits--;
+ index = extract_bits(block, bit_offset, index_bits);
+
+ for (component = 0; component < 3; component++) {
+ value = interpolate(endpoints[subset_num * 2][component],
+ endpoints[subset_num * 2 + 1][component],
+ index,
+ mode->n_index_bits);
+
+ if (is_signed)
+ value = finish_signed_unquantize(value);
+ else
+ value = finish_unsigned_unquantize(value);
+
+ result[component] = _mesa_half_to_float(value);
+ }
+
+ result[3] = 1.0f;
+}
+
+#ifdef BPTC_BLOCK_DECODE
+static void
+decompress_rgb_float_block(unsigned src_width, unsigned src_height,
+ const uint8_t *block,
+ float *dst_row, unsigned dst_rowstride,
+ bool is_signed)
+{
+ int mode_num;
+ const struct bptc_float_mode *mode;
+ int bit_offset;
+ int partition_num;
+ int subset_num;
+ int index_bits;
+ int index;
+ int anchors_before_texel;
+ int32_t endpoints[2 * 2][3];
+ uint32_t subsets;
+ int n_subsets;
+ int component;
+ int32_t value;
+ unsigned x, y;
+
+ if (block[0] & 0x2) {
+ mode_num = (((block[0] >> 1) & 0xe) | (block[0] & 1)) + 2;
+ bit_offset = 5;
+ } else {
+ mode_num = block[0] & 3;
+ bit_offset = 2;
+ }
+
+ mode = bptc_float_modes + mode_num;
+
+ if (mode->reserved) {
+ for(y = 0; y < src_height; y += 1) {
+ float *result = dst_row;
+ memset(result, 0, sizeof result[0] * 4 * src_width);
+ for(x = 0; x < src_width; x += 1) {
+ result[3] = 1.0f;
+ result += 4;
+ }
+ dst_row += dst_rowstride / sizeof dst_row[0];
+ }
+ return;
+ }
+
+ bit_offset = extract_float_endpoints(mode, block, bit_offset,
+ endpoints, is_signed);
+
+ if (mode->n_partition_bits) {
+ partition_num = extract_bits(block, bit_offset, mode->n_partition_bits);
+ bit_offset += mode->n_partition_bits;
+
+ subsets = partition_table1[partition_num];
+ n_subsets = 2;
+ } else {
+ partition_num = 0;
+ subsets = 0;
+ n_subsets = 1;
+ }
+
+ for(y = 0; y < src_height; y += 1) {
+ float *result = dst_row;
+ for(x = 0; x < src_width; x += 1) {
+ int texel;
+
+ texel = x + y * 4;
+
+ anchors_before_texel =
+ count_anchors_before_texel(n_subsets, partition_num, texel);
+
+ /* Calculate the offset to the primary index for this texel */
+ bit_offset += mode->n_index_bits * texel - anchors_before_texel;
+
+ subset_num = (subsets >> (texel * 2)) & 3;
+
+ index_bits = mode->n_index_bits;
+ if (is_anchor(n_subsets, partition_num, texel))
+ index_bits--;
+ index = extract_bits(block, bit_offset, index_bits);
+
+ for (component = 0; component < 3; component++) {
+ value = interpolate(endpoints[subset_num * 2][component],
+ endpoints[subset_num * 2 + 1][component],
+ index,
+ mode->n_index_bits);
+
+ if (is_signed)
+ value = finish_signed_unquantize(value);
+ else
+ value = finish_unsigned_unquantize(value);
+
+ result[component] = _mesa_half_to_float(value);
+ }
+
+ result[3] = 1.0f;
+ result += 4;
+ }
+ dst_row += dst_rowstride / sizeof dst_row[0];
+ }
+}
+
+static void
+decompress_rgb_float(int width, int height,
+ const uint8_t *src, int src_rowstride,
+ float *dst, int dst_rowstride, bool is_signed)
+{
+ int src_row_diff;
+ int y, x;
+
+ if (src_rowstride >= width * 4)
+ src_row_diff = src_rowstride - ((width + 3) & ~3) * 4;
+ else
+ src_row_diff = 0;
+
+ for (y = 0; y < height; y += BLOCK_SIZE) {
+ for (x = 0; x < width; x += BLOCK_SIZE) {
+ decompress_rgb_float_block(MIN2(width - x, BLOCK_SIZE),
+ MIN2(height - y, BLOCK_SIZE),
+ src,
+ (dst + x * 4 +
+ (y * dst_rowstride / sizeof dst[0])),
+ dst_rowstride, is_signed);
+ src += BLOCK_BYTES;
+ }
+ src += src_row_diff;
+ }
+}
+#endif // BPTC_BLOCK_DECODE
+
+static void
+write_bits(struct bit_writer *writer, int n_bits, int value)
+{
+ do {
+ if (n_bits + writer->pos >= 8) {
+ *(writer->dst++) = writer->buf | (value << writer->pos);
+ writer->buf = 0;
+ value >>= (8 - writer->pos);
+ n_bits -= (8 - writer->pos);
+ writer->pos = 0;
+ } else {
+ writer->buf |= value << writer->pos;
+ writer->pos += n_bits;
+ break;
+ }
+ } while (n_bits > 0);
+}
+
+static void
+get_average_luminance_alpha_unorm(int width, int height,
+ const uint8_t *src, int src_rowstride,
+ int *average_luminance, int *average_alpha)
+{
+ int luminance_sum = 0, alpha_sum = 0;
+ int y, x;
+
+ for (y = 0; y < height; y++) {
+ for (x = 0; x < width; x++) {
+ luminance_sum += src[0] + src[1] + src[2];
+ alpha_sum += src[3];
+ src += 4;
+ }
+ src += src_rowstride - width * 4;
+ }
+
+ *average_luminance = luminance_sum / (width * height);
+ *average_alpha = alpha_sum / (width * height);
+}
+
+static void
+get_rgba_endpoints_unorm(int width, int height,
+ const uint8_t *src, int src_rowstride,
+ int average_luminance, int average_alpha,
+ uint8_t endpoints[][4])
+{
+ int endpoint_luminances[2];
+ int midpoint;
+ int sums[2][4];
+ int endpoint;
+ int luminance;
+ uint8_t temp[3];
+ const uint8_t *p = src;
+ int rgb_left_endpoint_count = 0;
+ int alpha_left_endpoint_count = 0;
+ int y, x, i;
+
+ memset(sums, 0, sizeof sums);
+
+ for (y = 0; y < height; y++) {
+ for (x = 0; x < width; x++) {
+ luminance = p[0] + p[1] + p[2];
+ if (luminance < average_luminance) {
+ endpoint = 0;
+ rgb_left_endpoint_count++;
+ } else {
+ endpoint = 1;
+ }
+ for (i = 0; i < 3; i++)
+ sums[endpoint][i] += p[i];
+
+ if (p[2] < average_alpha) {
+ endpoint = 0;
+ alpha_left_endpoint_count++;
+ } else {
+ endpoint = 1;
+ }
+ sums[endpoint][3] += p[3];
+
+ p += 4;
+ }
+
+ p += src_rowstride - width * 4;
+ }
+
+ if (rgb_left_endpoint_count == 0 ||
+ rgb_left_endpoint_count == width * height) {
+ for (i = 0; i < 3; i++)
+ endpoints[0][i] = endpoints[1][i] =
+ (sums[0][i] + sums[1][i]) / (width * height);
+ } else {
+ for (i = 0; i < 3; i++) {
+ endpoints[0][i] = sums[0][i] / rgb_left_endpoint_count;
+ endpoints[1][i] = (sums[1][i] /
+ (width * height - rgb_left_endpoint_count));
+ }
+ }
+
+ if (alpha_left_endpoint_count == 0 ||
+ alpha_left_endpoint_count == width * height) {
+ endpoints[0][3] = endpoints[1][3] =
+ (sums[0][3] + sums[1][3]) / (width * height);
+ } else {
+ endpoints[0][3] = sums[0][3] / alpha_left_endpoint_count;
+ endpoints[1][3] = (sums[1][3] /
+ (width * height - alpha_left_endpoint_count));
+ }
+
+ /* We may need to swap the endpoints to ensure the most-significant bit of
+ * the first index is zero */
+
+ for (endpoint = 0; endpoint < 2; endpoint++) {
+ endpoint_luminances[endpoint] =
+ endpoints[endpoint][0] +
+ endpoints[endpoint][1] +
+ endpoints[endpoint][2];
+ }
+ midpoint = (endpoint_luminances[0] + endpoint_luminances[1]) / 2;
+
+ if ((src[0] + src[1] + src[2] <= midpoint) !=
+ (endpoint_luminances[0] <= midpoint)) {
+ memcpy(temp, endpoints[0], 3);
+ memcpy(endpoints[0], endpoints[1], 3);
+ memcpy(endpoints[1], temp, 3);
+ }
+
+ /* Same for the alpha endpoints */
+
+ midpoint = (endpoints[0][3] + endpoints[1][3]) / 2;
+
+ if ((src[3] <= midpoint) != (endpoints[0][3] <= midpoint)) {
+ temp[0] = endpoints[0][3];
+ endpoints[0][3] = endpoints[1][3];
+ endpoints[1][3] = temp[0];
+ }
+}
+
+static void
+write_rgb_indices_unorm(struct bit_writer *writer,
+ int src_width, int src_height,
+ const uint8_t *src, int src_rowstride,
+ uint8_t endpoints[][4])
+{
+ int luminance;
+ int endpoint_luminances[2];
+ int endpoint;
+ int index;
+ int y, x;
+
+ for (endpoint = 0; endpoint < 2; endpoint++) {
+ endpoint_luminances[endpoint] =
+ endpoints[endpoint][0] +
+ endpoints[endpoint][1] +
+ endpoints[endpoint][2];
+ }
+
+ /* If the endpoints have the same luminance then we'll just use index 0 for
+ * all of the texels */
+ if (endpoint_luminances[0] == endpoint_luminances[1]) {
+ write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 2 - 1, 0);
+ return;
+ }
+
+ for (y = 0; y < src_height; y++) {
+ for (x = 0; x < src_width; x++) {
+ luminance = src[0] + src[1] + src[2];
+
+ index = ((luminance - endpoint_luminances[0]) * 3 /
+ (endpoint_luminances[1] - endpoint_luminances[0]));
+ if (index < 0)
+ index = 0;
+ else if (index > 3)
+ index = 3;
+
+ assert(x != 0 || y != 0 || index < 2);
+
+ write_bits(writer, (x == 0 && y == 0) ? 1 : 2, index);
+
+ src += 4;
+ }
+
+ /* Pad the indices out to the block size */
+ if (src_width < BLOCK_SIZE)
+ write_bits(writer, 2 * (BLOCK_SIZE - src_width), 0);
+
+ src += src_rowstride - src_width * 4;
+ }
+
+ /* Pad the indices out to the block size */
+ if (src_height < BLOCK_SIZE)
+ write_bits(writer, 2 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
+}
+
+static void
+write_alpha_indices_unorm(struct bit_writer *writer,
+ int src_width, int src_height,
+ const uint8_t *src, int src_rowstride,
+ uint8_t endpoints[][4])
+{
+ int index;
+ int y, x;
+
+ /* If the endpoints have the same alpha then we'll just use index 0 for
+ * all of the texels */
+ if (endpoints[0][3] == endpoints[1][3]) {
+ write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 3 - 1, 0);
+ return;
+ }
+
+ for (y = 0; y < src_height; y++) {
+ for (x = 0; x < src_width; x++) {
+ index = (((int) src[3] - (int) endpoints[0][3]) * 7 /
+ ((int) endpoints[1][3] - endpoints[0][3]));
+ if (index < 0)
+ index = 0;
+ else if (index > 7)
+ index = 7;
+
+ assert(x != 0 || y != 0 || index < 4);
+
+ /* The first index has one less bit */
+ write_bits(writer, (x == 0 && y == 0) ? 2 : 3, index);
+
+ src += 4;
+ }
+
+ /* Pad the indices out to the block size */
+ if (src_width < BLOCK_SIZE)
+ write_bits(writer, 3 * (BLOCK_SIZE - src_width), 0);
+
+ src += src_rowstride - src_width * 4;
+ }
+
+ /* Pad the indices out to the block size */
+ if (src_height < BLOCK_SIZE)
+ write_bits(writer, 3 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
+}
+
+static void
+compress_rgba_unorm_block(int src_width, int src_height,
+ const uint8_t *src, int src_rowstride,
+ uint8_t *dst)
+{
+ int average_luminance, average_alpha;
+ uint8_t endpoints[2][4];
+ struct bit_writer writer;
+ int component, endpoint;
+
+ get_average_luminance_alpha_unorm(src_width, src_height, src, src_rowstride,
+ &average_luminance, &average_alpha);
+ get_rgba_endpoints_unorm(src_width, src_height, src, src_rowstride,
+ average_luminance, average_alpha,
+ endpoints);
+
+ writer.dst = dst;
+ writer.pos = 0;
+ writer.buf = 0;
+
+ write_bits(&writer, 5, 0x10); /* mode 4 */
+ write_bits(&writer, 2, 0); /* rotation 0 */
+ write_bits(&writer, 1, 0); /* index selection bit */
+
+ /* Write the color endpoints */
+ for (component = 0; component < 3; component++)
+ for (endpoint = 0; endpoint < 2; endpoint++)
+ write_bits(&writer, 5, endpoints[endpoint][component] >> 3);
+
+ /* Write the alpha endpoints */
+ for (endpoint = 0; endpoint < 2; endpoint++)
+ write_bits(&writer, 6, endpoints[endpoint][3] >> 2);
+
+ write_rgb_indices_unorm(&writer,
+ src_width, src_height,
+ src, src_rowstride,
+ endpoints);
+ write_alpha_indices_unorm(&writer,
+ src_width, src_height,
+ src, src_rowstride,
+ endpoints);
+}
+
+static void
+compress_rgba_unorm(int width, int height,
+ const uint8_t *src, int src_rowstride,
+ uint8_t *dst, int dst_rowstride)
+{
+ int dst_row_diff;
+ int y, x;
+
+ if (dst_rowstride >= width * 4)
+ dst_row_diff = dst_rowstride - ((width + 3) & ~3) * 4;
+ else
+ dst_row_diff = 0;
+
+ for (y = 0; y < height; y += BLOCK_SIZE) {
+ for (x = 0; x < width; x += BLOCK_SIZE) {
+ compress_rgba_unorm_block(MIN2(width - x, BLOCK_SIZE),
+ MIN2(height - y, BLOCK_SIZE),
+ src + x * 4 + y * src_rowstride,
+ src_rowstride,
+ dst);
+ dst += BLOCK_BYTES;
+ }
+ dst += dst_row_diff;
+ }
+}
+
+static float
+get_average_luminance_float(int width, int height,
+ const float *src, int src_rowstride)
+{
+ float luminance_sum = 0;
+ int y, x;
+
+ for (y = 0; y < height; y++) {
+ for (x = 0; x < width; x++) {
+ luminance_sum += src[0] + src[1] + src[2];
+ src += 3;
+ }
+ src += (src_rowstride - width * 3 * sizeof (float)) / sizeof (float);
+ }
+
+ return luminance_sum / (width * height);
+}
+
+static float
+clamp_value(float value, bool is_signed)
+{
+ if (value > 65504.0f)
+ return 65504.0f;
+
+ if (is_signed) {
+ if (value < -65504.0f)
+ return -65504.0f;
+ else
+ return value;
+ }
+
+ if (value < 0.0f)
+ return 0.0f;
+
+ return value;
+}
+
+static void
+get_endpoints_float(int width, int height,
+ const float *src, int src_rowstride,
+ float average_luminance, float endpoints[][3],
+ bool is_signed)
+{
+ float endpoint_luminances[2];
+ float midpoint;
+ float sums[2][3];
+ int endpoint, component;
+ float luminance;
+ float temp[3];
+ const float *p = src;
+ int left_endpoint_count = 0;
+ int y, x, i;
+
+ memset(sums, 0, sizeof sums);
+
+ for (y = 0; y < height; y++) {
+ for (x = 0; x < width; x++) {
+ luminance = p[0] + p[1] + p[2];
+ if (luminance < average_luminance) {
+ endpoint = 0;
+ left_endpoint_count++;
+ } else {
+ endpoint = 1;
+ }
+ for (i = 0; i < 3; i++)
+ sums[endpoint][i] += p[i];
+
+ p += 3;
+ }
+
+ p += (src_rowstride - width * 3 * sizeof (float)) / sizeof (float);
+ }
+
+ if (left_endpoint_count == 0 ||
+ left_endpoint_count == width * height) {
+ for (i = 0; i < 3; i++)
+ endpoints[0][i] = endpoints[1][i] =
+ (sums[0][i] + sums[1][i]) / (width * height);
+ } else {
+ for (i = 0; i < 3; i++) {
+ endpoints[0][i] = sums[0][i] / left_endpoint_count;
+ endpoints[1][i] = sums[1][i] / (width * height - left_endpoint_count);
+ }
+ }
+
+ /* Clamp the endpoints to the range of a half float and strip out
+ * infinities */
+ for (endpoint = 0; endpoint < 2; endpoint++) {
+ for (component = 0; component < 3; component++) {
+ endpoints[endpoint][component] =
+ clamp_value(endpoints[endpoint][component], is_signed);
+ }
+ }
+
+ /* We may need to swap the endpoints to ensure the most-significant bit of
+ * the first index is zero */
+
+ for (endpoint = 0; endpoint < 2; endpoint++) {
+ endpoint_luminances[endpoint] =
+ endpoints[endpoint][0] +
+ endpoints[endpoint][1] +
+ endpoints[endpoint][2];
+ }
+ midpoint = (endpoint_luminances[0] + endpoint_luminances[1]) / 2.0f;
+
+ if ((src[0] + src[1] + src[2] <= midpoint) !=
+ (endpoint_luminances[0] <= midpoint)) {
+ memcpy(temp, endpoints[0], sizeof temp);
+ memcpy(endpoints[0], endpoints[1], sizeof temp);
+ memcpy(endpoints[1], temp, sizeof temp);
+ }
+}
+
+static void
+write_rgb_indices_float(struct bit_writer *writer,
+ int src_width, int src_height,
+ const float *src, int src_rowstride,
+ float endpoints[][3])
+{
+ float luminance;
+ float endpoint_luminances[2];
+ int endpoint;
+ int index;
+ int y, x;
+
+ for (endpoint = 0; endpoint < 2; endpoint++) {
+ endpoint_luminances[endpoint] =
+ endpoints[endpoint][0] +
+ endpoints[endpoint][1] +
+ endpoints[endpoint][2];
+ }
+
+ /* If the endpoints have the same luminance then we'll just use index 0 for
+ * all of the texels */
+ if (endpoint_luminances[0] == endpoint_luminances[1]) {
+ write_bits(writer, BLOCK_SIZE * BLOCK_SIZE * 4 - 1, 0);
+ return;
+ }
+
+ for (y = 0; y < src_height; y++) {
+ for (x = 0; x < src_width; x++) {
+ luminance = src[0] + src[1] + src[2];
+
+ index = ((luminance - endpoint_luminances[0]) * 15 /
+ (endpoint_luminances[1] - endpoint_luminances[0]));
+ if (index < 0)
+ index = 0;
+ else if (index > 15)
+ index = 15;
+
+ assert(x != 0 || y != 0 || index < 8);
+
+ write_bits(writer, (x == 0 && y == 0) ? 3 : 4, index);
+
+ src += 3;
+ }
+
+ /* Pad the indices out to the block size */
+ if (src_width < BLOCK_SIZE)
+ write_bits(writer, 4 * (BLOCK_SIZE - src_width), 0);
+
+ src += (src_rowstride - src_width * 3 * sizeof (float)) / sizeof (float);
+ }
+
+ /* Pad the indices out to the block size */
+ if (src_height < BLOCK_SIZE)
+ write_bits(writer, 4 * BLOCK_SIZE * (BLOCK_SIZE - src_height), 0);
+}
+
+static int
+get_endpoint_value(float value, bool is_signed)
+{
+ bool sign = false;
+ int half;
+
+ if (is_signed) {
+ half = _mesa_float_to_half(value);
+
+ if (half & 0x8000) {
+ half &= 0x7fff;
+ sign = true;
+ }
+
+ half = (32 * half / 31) >> 6;
+
+ if (sign)
+ half = -half & ((1 << 10) - 1);
+
+ return half;
+ } else {
+ if (value <= 0.0f)
+ return 0;
+
+ half = _mesa_float_to_half(value);
+
+ return (64 * half / 31) >> 6;
+ }
+}
+
+static void
+compress_rgb_float_block(int src_width, int src_height,
+ const float *src, int src_rowstride,
+ uint8_t *dst,
+ bool is_signed)
+{
+ float average_luminance;
+ float endpoints[2][3];
+ struct bit_writer writer;
+ int component, endpoint;
+ int endpoint_value;
+
+ average_luminance =
+ get_average_luminance_float(src_width, src_height, src, src_rowstride);
+ get_endpoints_float(src_width, src_height, src, src_rowstride,
+ average_luminance, endpoints, is_signed);
+
+ writer.dst = dst;
+ writer.pos = 0;
+ writer.buf = 0;
+
+ write_bits(&writer, 5, 3); /* mode 3 */
+
+ /* Write the endpoints */
+ for (endpoint = 0; endpoint < 2; endpoint++) {
+ for (component = 0; component < 3; component++) {
+ endpoint_value =
+ get_endpoint_value(endpoints[endpoint][component], is_signed);
+ write_bits(&writer, 10, endpoint_value);
+ }
+ }
+
+ write_rgb_indices_float(&writer,
+ src_width, src_height,
+ src, src_rowstride,
+ endpoints);
+}
+
+static void
+compress_rgb_float(int width, int height,
+ const float *src, int src_rowstride,
+ uint8_t *dst, int dst_rowstride,
+ bool is_signed)
+{
+ int dst_row_diff;
+ int y, x;
+
+ if (dst_rowstride >= width * 4)
+ dst_row_diff = dst_rowstride - ((width + 3) & ~3) * 4;
+ else
+ dst_row_diff = 0;
+
+ for (y = 0; y < height; y += BLOCK_SIZE) {
+ for (x = 0; x < width; x += BLOCK_SIZE) {
+ compress_rgb_float_block(MIN2(width - x, BLOCK_SIZE),
+ MIN2(height - y, BLOCK_SIZE),
+ src + x * 3 +
+ y * src_rowstride / sizeof (float),
+ src_rowstride,
+ dst,
+ is_signed);
+ dst += BLOCK_BYTES;
+ }
+ dst += dst_row_diff;
+ }
+}
--
2.18.0
More information about the mesa-dev
mailing list