[Mesa-dev] [PATCH 01/18] radeonsi: make fix_fetch an array of uint8_t
Marek Olšák
maraeo at gmail.com
Thu Feb 16 12:52:53 UTC 2017
From: Marek Olšák <marek.olsak at amd.com>
so that we can add 3-component fallbacks.
---
src/gallium/drivers/radeonsi/si_shader.c | 8 +++++--
src/gallium/drivers/radeonsi/si_shader.h | 5 ++---
src/gallium/drivers/radeonsi/si_state.c | 28 ++++++++++++-------------
src/gallium/drivers/radeonsi/si_state.h | 2 +-
src/gallium/drivers/radeonsi/si_state_shaders.c | 5 ++---
5 files changed, 25 insertions(+), 23 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index cfff54a..8b9fed9 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -359,21 +359,21 @@ static void declare_input_vs(
t_list_ptr = LLVMGetParam(ctx->main_fn, SI_PARAM_VERTEX_BUFFERS);
t_offset = lp_build_const_int32(gallivm, input_index);
t_list = ac_build_indexed_load_const(&ctx->ac, t_list_ptr, t_offset);
vertex_index = LLVMGetParam(ctx->main_fn,
ctx->param_vertex_index0 +
input_index);
- fix_fetch = (ctx->shader->key.mono.vs.fix_fetch >> (4 * input_index)) & 0xf;
+ fix_fetch = ctx->shader->key.mono.vs.fix_fetch[input_index];
/* Do multiple loads for double formats. */
if (fix_fetch == SI_FIX_FETCH_RGB_64_FLOAT) {
num_fetches = 3; /* 3 2-dword loads */
fetch_stride = 8;
} else if (fix_fetch == SI_FIX_FETCH_RGBA_64_FLOAT) {
num_fetches = 2; /* 2 4-dword loads */
fetch_stride = 16;
} else {
num_fetches = 1;
@@ -6263,21 +6263,25 @@ static void si_dump_shader_key(unsigned shader, struct si_shader_key *key,
switch (shader) {
case PIPE_SHADER_VERTEX:
fprintf(f, " part.vs.prolog.instance_divisors = {");
for (i = 0; i < ARRAY_SIZE(key->part.vs.prolog.instance_divisors); i++)
fprintf(f, !i ? "%u" : ", %u",
key->part.vs.prolog.instance_divisors[i]);
fprintf(f, "}\n");
fprintf(f, " part.vs.epilog.export_prim_id = %u\n", key->part.vs.epilog.export_prim_id);
fprintf(f, " as_es = %u\n", key->as_es);
fprintf(f, " as_ls = %u\n", key->as_ls);
- fprintf(f, " mono.vs.fix_fetch = 0x%"PRIx64"\n", key->mono.vs.fix_fetch);
+
+ fprintf(f, " mono.vs.fix_fetch = {");
+ for (i = 0; i < SI_MAX_ATTRIBS; i++)
+ fprintf(f, !i ? "%u" : ", %u", key->mono.vs.fix_fetch[i]);
+ fprintf(f, "}\n");
break;
case PIPE_SHADER_TESS_CTRL:
fprintf(f, " part.tcs.epilog.prim_mode = %u\n", key->part.tcs.epilog.prim_mode);
fprintf(f, " mono.tcs.inputs_to_copy = 0x%"PRIx64"\n", key->mono.tcs.inputs_to_copy);
break;
case PIPE_SHADER_TESS_EVAL:
fprintf(f, " part.tes.epilog.export_prim_id = %u\n", key->part.tes.epilog.export_prim_id);
fprintf(f, " as_es = %u\n", key->as_es);
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 6398b39..4616190 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -243,21 +243,20 @@ enum {
SI_FIX_FETCH_RGBX_32_UNORM,
SI_FIX_FETCH_RGBA_32_SNORM,
SI_FIX_FETCH_RGBX_32_SNORM,
SI_FIX_FETCH_RGBA_32_USCALED,
SI_FIX_FETCH_RGBA_32_SSCALED,
SI_FIX_FETCH_RGBA_32_FIXED,
SI_FIX_FETCH_RGBX_32_FIXED,
SI_FIX_FETCH_RG_64_FLOAT,
SI_FIX_FETCH_RGB_64_FLOAT,
SI_FIX_FETCH_RGBA_64_FLOAT,
- SI_FIX_FETCH_RESERVED_15, /* maximum */
};
struct si_shader;
/* State of the context creating the shader object. */
struct si_compiler_ctx_state {
/* Should only be used by si_init_shader_selector_async and
* si_build_shader_variant if thread_index == -1 (non-threaded). */
LLVMTargetMachineRef tm;
@@ -438,22 +437,22 @@ struct si_shader_key {
/* These two are initially set according to the NEXT_SHADER property,
* or guessed if the property doesn't seem correct.
*/
unsigned as_es:1; /* export shader */
unsigned as_ls:1; /* local shader */
/* Flags for monolithic compilation only. */
union {
struct {
- /* One nibble for every input: SI_FIX_FETCH_* enums. */
- uint64_t fix_fetch;
+ /* One byte for every input: SI_FIX_FETCH_* enums. */
+ uint8_t fix_fetch[SI_MAX_ATTRIBS];
} vs;
struct {
uint64_t inputs_to_copy; /* for fixed-func TCS */
} tcs;
} mono;
/* Optimization flags for asynchronous compilation only. */
union {
struct {
uint64_t kill_outputs; /* "get_unique_index" bits */
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 4ccca52..d9b9f83 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3392,72 +3392,72 @@ static void *si_create_vertex_elements(struct pipe_context *ctx,
channel = first_non_void >= 0 ? &desc->channel[first_non_void] : NULL;
memcpy(swizzle, desc->swizzle, sizeof(swizzle));
v->format_size[i] = desc->block.bits / 8;
/* The hardware always treats the 2-bit alpha channel as
* unsigned, so a shader workaround is needed.
*/
if (data_format == V_008F0C_BUF_DATA_FORMAT_2_10_10_10) {
if (num_format == V_008F0C_BUF_NUM_FORMAT_SNORM) {
- v->fix_fetch |= (uint64_t)SI_FIX_FETCH_A2_SNORM << (4 * i);
+ v->fix_fetch[i] = SI_FIX_FETCH_A2_SNORM;
} else if (num_format == V_008F0C_BUF_NUM_FORMAT_SSCALED) {
- v->fix_fetch |= (uint64_t)SI_FIX_FETCH_A2_SSCALED << (4 * i);
+ v->fix_fetch[i] = SI_FIX_FETCH_A2_SSCALED;
} else if (num_format == V_008F0C_BUF_NUM_FORMAT_SINT) {
/* This isn't actually used in OpenGL. */
- v->fix_fetch |= (uint64_t)SI_FIX_FETCH_A2_SINT << (4 * i);
+ v->fix_fetch[i] = SI_FIX_FETCH_A2_SINT;
}
} else if (channel && channel->type == UTIL_FORMAT_TYPE_FIXED) {
if (desc->swizzle[3] == PIPE_SWIZZLE_1)
- v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBX_32_FIXED << (4 * i);
+ v->fix_fetch[i] = SI_FIX_FETCH_RGBX_32_FIXED;
else
- v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_FIXED << (4 * i);
+ v->fix_fetch[i] = SI_FIX_FETCH_RGBA_32_FIXED;
} else if (channel && channel->size == 32 && !channel->pure_integer) {
if (channel->type == UTIL_FORMAT_TYPE_SIGNED) {
if (channel->normalized) {
if (desc->swizzle[3] == PIPE_SWIZZLE_1)
- v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBX_32_SNORM << (4 * i);
+ v->fix_fetch[i] = SI_FIX_FETCH_RGBX_32_SNORM;
else
- v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_SNORM << (4 * i);
+ v->fix_fetch[i] = SI_FIX_FETCH_RGBA_32_SNORM;
} else {
- v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_SSCALED << (4 * i);
+ v->fix_fetch[i] = SI_FIX_FETCH_RGBA_32_SSCALED;
}
} else if (channel->type == UTIL_FORMAT_TYPE_UNSIGNED) {
if (channel->normalized) {
if (desc->swizzle[3] == PIPE_SWIZZLE_1)
- v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBX_32_UNORM << (4 * i);
+ v->fix_fetch[i] = SI_FIX_FETCH_RGBX_32_UNORM;
else
- v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_UNORM << (4 * i);
+ v->fix_fetch[i] = SI_FIX_FETCH_RGBA_32_UNORM;
} else {
- v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_USCALED << (4 * i);
+ v->fix_fetch[i] = SI_FIX_FETCH_RGBA_32_USCALED;
}
}
} else if (channel && channel->size == 64 &&
channel->type == UTIL_FORMAT_TYPE_FLOAT) {
switch (desc->nr_channels) {
case 1:
case 2:
- v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RG_64_FLOAT << (4 * i);
+ v->fix_fetch[i] = SI_FIX_FETCH_RG_64_FLOAT;
swizzle[0] = PIPE_SWIZZLE_X;
swizzle[1] = PIPE_SWIZZLE_Y;
swizzle[2] = desc->nr_channels == 2 ? PIPE_SWIZZLE_Z : PIPE_SWIZZLE_0;
swizzle[3] = desc->nr_channels == 2 ? PIPE_SWIZZLE_W : PIPE_SWIZZLE_0;
break;
case 3:
- v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGB_64_FLOAT << (4 * i);
+ v->fix_fetch[i] = SI_FIX_FETCH_RGB_64_FLOAT;
swizzle[0] = PIPE_SWIZZLE_X; /* 3 loads */
swizzle[1] = PIPE_SWIZZLE_Y;
swizzle[2] = PIPE_SWIZZLE_0;
swizzle[3] = PIPE_SWIZZLE_0;
break;
case 4:
- v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_64_FLOAT << (4 * i);
+ v->fix_fetch[i] = SI_FIX_FETCH_RGBA_64_FLOAT;
swizzle[0] = PIPE_SWIZZLE_X; /* 2 loads */
swizzle[1] = PIPE_SWIZZLE_Y;
swizzle[2] = PIPE_SWIZZLE_Z;
swizzle[3] = PIPE_SWIZZLE_W;
break;
default:
assert(0);
}
}
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 07b7d58..cd44ed1 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -100,21 +100,21 @@ struct si_vertex_element
{
unsigned count;
unsigned first_vb_use_mask;
/* Vertex buffer descriptor list size aligned for optimal prefetch. */
unsigned desc_list_byte_size;
/* Two bits per attribute indicating the size of each vector component
* in bytes if the size 3-workaround must be applied.
*/
uint32_t fix_size3;
- uint64_t fix_fetch;
+ uint8_t fix_fetch[SI_MAX_ATTRIBS];
uint32_t rsrc_word3[SI_MAX_ATTRIBS];
uint32_t format_size[SI_MAX_ATTRIBS];
struct pipe_vertex_element elements[SI_MAX_ATTRIBS];
};
union si_state {
struct {
struct si_state_blend *blend;
struct si_state_rasterizer *rasterizer;
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index bde02f5..9570259 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -968,23 +968,22 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
switch (sel->type) {
case PIPE_SHADER_VERTEX:
if (sctx->vertex_elements) {
unsigned count = MIN2(sel->info.num_inputs,
sctx->vertex_elements->count);
for (i = 0; i < count; ++i)
key->part.vs.prolog.instance_divisors[i] =
sctx->vertex_elements->elements[i].instance_divisor;
- key->mono.vs.fix_fetch =
- sctx->vertex_elements->fix_fetch &
- u_bit_consecutive64(0, 4 * count);
+ memcpy(key->mono.vs.fix_fetch,
+ sctx->vertex_elements->fix_fetch, count);
}
if (sctx->tes_shader.cso)
key->as_ls = 1;
else if (sctx->gs_shader.cso)
key->as_es = 1;
else {
si_shader_selector_key_hw_vs(sctx, sel, key);
if (sctx->ps_shader.cso && sctx->ps_shader.cso->info.uses_primid)
key->part.vs.epilog.export_prim_id = 1;
--
2.7.4
More information about the mesa-dev
mailing list