[Mesa-dev] [PATCH] radeonsi: make fix_fetch 64-bit

Nicolai Hähnle nhaehnle at gmail.com
Mon Jan 16 15:53:49 UTC 2017


On 16.01.2017 15:04, Marek Olšák wrote:
> On Mon, Jan 16, 2017 at 3:00 PM, Marek Olšák <maraeo at gmail.com> wrote:
>> From: Marek Olšák <marek.olsak at amd.com>
>>
>> v2: add u_bit_consecutive64
>> ---
>>  src/gallium/drivers/radeonsi/si_shader.c        | 4 ++--
>>  src/gallium/drivers/radeonsi/si_shader.h        | 4 ++--
>>  src/gallium/drivers/radeonsi/si_state.c         | 6 +++---
>>  src/gallium/drivers/radeonsi/si_state.h         | 2 +-
>>  src/gallium/drivers/radeonsi/si_state_shaders.c | 2 +-
>>  src/util/bitscan.h                              | 9 +++++++++
>>  6 files changed, 18 insertions(+), 9 deletions(-)
>>
>> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
>> index 6f0f414..dfba9d4 100644
>> --- a/src/gallium/drivers/radeonsi/si_shader.c
>> +++ b/src/gallium/drivers/radeonsi/si_shader.c
>> @@ -426,21 +426,21 @@ static void declare_input_vs(
>>                 "llvm.SI.vs.load.input", ctx->v4f32, args, 3,
>>                 LP_FUNC_ATTR_READNONE);
>>
>>         /* Break up the vec4 into individual components */
>>         for (chan = 0; chan < 4; chan++) {
>>                 LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan);
>>                 out[chan] = LLVMBuildExtractElement(gallivm->builder,
>>                                                     input, llvm_chan, "");
>>         }
>>
>> -       fix_fetch = (ctx->shader->key.mono.vs.fix_fetch >> (2 * input_index)) & 3;
>> +       fix_fetch = (ctx->shader->key.mono.vs.fix_fetch >> (4 * input_index)) & 0xf;
>>         if (fix_fetch) {
>>                 /* The hardware returns an unsigned value; convert it to a
>>                  * signed one.
>>                  */
>>                 LLVMValueRef tmp = out[3];
>>                 LLVMValueRef c30 = LLVMConstInt(ctx->i32, 30, 0);
>>
>>                 /* First, recover the sign-extended signed integer value. */
>>                 if (fix_fetch == SI_FIX_FETCH_A2_SSCALED)
>>                         tmp = LLVMBuildFPToUI(gallivm->builder, tmp, ctx->i32, "");
>> @@ -6578,21 +6578,21 @@ static void si_dump_shader_key(unsigned shader, struct si_shader_key *key,
>>         switch (shader) {
>>         case PIPE_SHADER_VERTEX:
>>                 fprintf(f, "  part.vs.prolog.instance_divisors = {");
>>                 for (i = 0; i < ARRAY_SIZE(key->part.vs.prolog.instance_divisors); i++)
>>                         fprintf(f, !i ? "%u" : ", %u",
>>                                 key->part.vs.prolog.instance_divisors[i]);
>>                 fprintf(f, "}\n");
>>                 fprintf(f, "  part.vs.epilog.export_prim_id = %u\n", key->part.vs.epilog.export_prim_id);
>>                 fprintf(f, "  as_es = %u\n", key->as_es);
>>                 fprintf(f, "  as_ls = %u\n", key->as_ls);
>> -               fprintf(f, "  mono.vs.fix_fetch = 0x%x\n", key->mono.vs.fix_fetch);
>> +               fprintf(f, "  mono.vs.fix_fetch = 0x%"PRIx64"\n", key->mono.vs.fix_fetch);
>>                 break;
>>
>>         case PIPE_SHADER_TESS_CTRL:
>>                 fprintf(f, "  part.tcs.epilog.prim_mode = %u\n", key->part.tcs.epilog.prim_mode);
>>                 fprintf(f, "  mono.tcs.inputs_to_copy = 0x%"PRIx64"\n", key->mono.tcs.inputs_to_copy);
>>                 break;
>>
>>         case PIPE_SHADER_TESS_EVAL:
>>                 fprintf(f, "  part.tes.epilog.export_prim_id = %u\n", key->part.tes.epilog.export_prim_id);
>>                 fprintf(f, "  as_es = %u\n", key->as_es);
>> diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
>> index 1b5dec2..89f9628 100644
>> --- a/src/gallium/drivers/radeonsi/si_shader.h
>> +++ b/src/gallium/drivers/radeonsi/si_shader.h
>> @@ -418,22 +418,22 @@ struct si_shader_key {
>>
>>         /* These two are initially set according to the NEXT_SHADER property,
>>          * or guessed if the property doesn't seem correct.
>>          */
>>         unsigned as_es:1; /* export shader */
>>         unsigned as_ls:1; /* local shader */
>>
>>         /* Flags for monolithic compilation only. */
>>         union {
>>                 struct {
>> -                       /* One pair of bits for every input: SI_FIX_FETCH_* enums. */
>> -                       uint32_t        fix_fetch;
>> +                       /* One nibble for every input: SI_FIX_FETCH_* enums. */
>> +                       uint64_t        fix_fetch;
>>                 } vs;
>>                 struct {
>>                         uint64_t        inputs_to_copy; /* for fixed-func TCS */
>>                 } tcs;
>>         } mono;
>>
>>         /* Optimization flags for asynchronous compilation only. */
>>         union {
>>                 struct {
>>                         uint64_t        kill_outputs; /* "get_unique_index" bits */
>> diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
>> index 6e7d8da..fa78a56 100644
>> --- a/src/gallium/drivers/radeonsi/si_state.c
>> +++ b/src/gallium/drivers/radeonsi/si_state.c
>> @@ -3356,26 +3356,26 @@ static void *si_create_vertex_elements(struct pipe_context *ctx,
>>                                    S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])) |
>>                                    S_008F0C_NUM_FORMAT(num_format) |
>>                                    S_008F0C_DATA_FORMAT(data_format);
>>                 v->format_size[i] = desc->block.bits / 8;
>>
>>                 /* The hardware always treats the 2-bit alpha channel as
>>                  * unsigned, so a shader workaround is needed.
>>                  */
>>                 if (data_format == V_008F0C_BUF_DATA_FORMAT_2_10_10_10) {
>>                         if (num_format == V_008F0C_BUF_NUM_FORMAT_SNORM) {
>> -                               v->fix_fetch |= SI_FIX_FETCH_A2_SNORM << (2 * i);
>> +                               v->fix_fetch |= (uint64_t)SI_FIX_FETCH_A2_SNORM << (4 * i);
>>                         } else if (num_format == V_008F0C_BUF_NUM_FORMAT_SSCALED) {
>> -                               v->fix_fetch |= SI_FIX_FETCH_A2_SSCALED << (2 * i);
>> +                               v->fix_fetch |= (uint64_t)SI_FIX_FETCH_A2_SSCALED << (4 * i);
>>                         } else if (num_format == V_008F0C_BUF_NUM_FORMAT_SINT) {
>>                                 /* This isn't actually used in OpenGL. */
>> -                               v->fix_fetch |= SI_FIX_FETCH_A2_SINT << (2 * i);
>> +                               v->fix_fetch |= (uint64_t)SI_FIX_FETCH_A2_SINT << (4 * i);
>>                         }
>>                 }
>>
>>                 /* We work around the fact that 8_8_8 and 16_16_16 data formats
>>                  * do not exist by using the corresponding 4-component formats.
>>                  * This requires a fixup of the descriptor for bounds checks.
>>                  */
>>                 if (desc->block.bits == 3 * 8 ||
>>                     desc->block.bits == 3 * 16) {
>>                         v->fix_size3 |= (desc->block.bits / 24) << (2 * i);
>> diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
>> index a17dbc7..edc5b93 100644
>> --- a/src/gallium/drivers/radeonsi/si_state.h
>> +++ b/src/gallium/drivers/radeonsi/si_state.h
>> @@ -92,26 +92,26 @@ struct si_state_dsa {
>>
>>  struct si_stencil_ref {
>>         struct r600_atom                atom;
>>         struct pipe_stencil_ref         state;
>>         struct si_dsa_stencil_ref_part  dsa_part;
>>  };
>>
>>  struct si_vertex_element
>>  {
>>         unsigned                        count;
>> -       uint32_t                        fix_fetch;
>>
>>         /* Two bits per attribute indicating the size of each vector component
>>          * in bytes if the size 3-workaround must be applied.
>>          */
>>         uint32_t                        fix_size3;
>> +       uint64_t                        fix_fetch;
>>
>>         uint32_t                        rsrc_word3[SI_MAX_ATTRIBS];
>>         uint32_t                        format_size[SI_MAX_ATTRIBS];
>>         struct pipe_vertex_element      elements[SI_MAX_ATTRIBS];
>>  };
>>
>>  union si_state {
>>         struct {
>>                 struct si_state_blend           *blend;
>>                 struct si_state_rasterizer      *rasterizer;
>> diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
>> index 9967837..d2f04bc 100644
>> --- a/src/gallium/drivers/radeonsi/si_state_shaders.c
>> +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
>> @@ -927,21 +927,21 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
>>         case PIPE_SHADER_VERTEX:
>>                 if (sctx->vertex_elements) {
>>                         unsigned count = MIN2(sel->info.num_inputs,
>>                                               sctx->vertex_elements->count);
>>                         for (i = 0; i < count; ++i)
>>                                 key->part.vs.prolog.instance_divisors[i] =
>>                                         sctx->vertex_elements->elements[i].instance_divisor;
>>
>>                         key->mono.vs.fix_fetch =
>>                                 sctx->vertex_elements->fix_fetch &
>> -                               u_bit_consecutive(0, 2 * count);
>> +                               u_bit_consecutive64(0, 4 * count);
>>                 }
>>                 if (sctx->tes_shader.cso)
>>                         key->as_ls = 1;
>>                 else if (sctx->gs_shader.cso)
>>                         key->as_es = 1;
>>                 else {
>>                         si_shader_selector_key_hw_vs(sctx, sel, key);
>>
>>                         if (sctx->ps_shader.cso && sctx->ps_shader.cso->info.uses_primid)
>>                                 key->part.vs.epilog.export_prim_id = 1;
>> diff --git a/src/util/bitscan.h b/src/util/bitscan.h
>> index a5dfa1f..4f8b608 100644
>> --- a/src/util/bitscan.h
>> +++ b/src/util/bitscan.h
>> @@ -219,16 +219,25 @@ util_last_bit_signed(int i)
>>   */
>>  static inline unsigned
>>  u_bit_consecutive(unsigned start, unsigned count)
>>  {
>>     assert(start + count <= 32);
>>     if (count == 32)
>>        return ~0;
>>     return ((1u << count) - 1) << start;
>>  }
>>
>> +static inline unsigned
>> +u_bit_consecutive64(unsigned start, unsigned count)
>
> And this should return uint64_t.

Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>

> Marek
>
>> +{
>> +   assert(start + count <= 64);
>> +   if (count == 64)
>> +      return ~(uint64_t)0;
>> +   return (((uint64_t)1 << count) - 1) << start;
>> +}
>> +
>>
>>  #ifdef __cplusplus
>>  }
>>  #endif
>>
>>  #endif /* BITSCAN_H */
>> --
>> 2.7.4
>>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>


More information about the mesa-dev mailing list