[Mesa-dev] [PATCH 18/22] intel: Use TXQ for image_size when we have a typed surface

Wed Aug 29 12:38:59 UTC 2018

On Wed, Aug 29, 2018 at 1:36 AM Kenneth Graunke <kenneth at whitecape.org>
wrote:

> On Friday, August 17, 2018 1:06:24 PM PDT Jason Ekstrand wrote:
> > ---
> >  src/intel/compiler/brw_eu_defines.h           |  3 +++
> >  src/intel/compiler/brw_fs.cpp                 |  8 ++++++
> >  src/intel/compiler/brw_fs_generator.cpp       | 26 ++++++++++++++++---
> >  src/intel/compiler/brw_fs_nir.cpp             | 26 +++++++++++++++++++
> >  .../compiler/brw_nir_lower_image_load_store.c | 15 +++++++++++
> >  src/intel/compiler/brw_shader.cpp             |  5 ++++
> >  6 files changed, 79 insertions(+), 4 deletions(-)
> >
> > diff --git a/src/intel/compiler/brw_eu_defines.h
> b/src/intel/compiler/brw_eu_defines.h
> > index 2289fc9be70..c36699d7676 100644
> > --- a/src/intel/compiler/brw_eu_defines.h
> > +++ b/src/intel/compiler/brw_eu_defines.h
> > @@ -354,6 +354,9 @@ enum opcode {
> >     SHADER_OPCODE_SAMPLEINFO,
> >     SHADER_OPCODE_SAMPLEINFO_LOGICAL,
> >
> > +   SHADER_OPCODE_IMAGE_SIZE,
> > +   SHADER_OPCODE_IMAGE_SIZE_LOGICAL,
> > +
> >     /**
> >      * Combines multiple sources of size 1 into a larger virtual GRF.
> >      * For example, parameters for a send-from-GRF message.  Or, updating
> > diff --git a/src/intel/compiler/brw_fs.cpp
> b/src/intel/compiler/brw_fs.cpp
> > index 5b87991652d..1e1954270e8 100644
> > --- a/src/intel/compiler/brw_fs.cpp
> > +++ b/src/intel/compiler/brw_fs.cpp
> > @@ -4946,6 +4946,14 @@ fs_visitor::lower_logical_sends()
> >           lower_sampler_logical_send(ibld, inst,
> SHADER_OPCODE_SAMPLEINFO);
> >           break;
> >
> > +      case SHADER_OPCODE_IMAGE_SIZE_LOGICAL:
> > +         /* Nothing needs to be done except changing the opcode and
> setting a
> > +          * message length.
> > +          */
> > +         inst->opcode = SHADER_OPCODE_IMAGE_SIZE;
> > +         inst->mlen = inst->exec_size / 8;
>
> I'm not sure it's even worth having a LOGICAL opcode for this...
> maybe just set the message length when creating it and call it a day?
>

The problem is that it has to go through SIMD width lowering which doesn't
understand mlen.  We could make SIMD width lowering just shrink the mlen
but that's wrong for almost all opcodes.  Thoughts?

> > +         break;
> > +
> >        case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
> >           lower_surface_logical_send(ibld, inst,
> >                                      SHADER_OPCODE_UNTYPED_SURFACE_READ,
> > diff --git a/src/intel/compiler/brw_fs_generator.cpp
> b/src/intel/compiler/brw_fs_generator.cpp
> > index e265d59ccbe..13293d7efbd 100644
> > --- a/src/intel/compiler/brw_fs_generator.cpp
> > +++ b/src/intel/compiler/brw_fs_generator.cpp
> > @@ -1017,6 +1017,9 @@ fs_generator::generate_tex(fs_inst *inst, struct
> brw_reg dst, struct brw_reg src
> >        case SHADER_OPCODE_SAMPLEINFO:
> >           msg_type = GEN6_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO;
> >           break;
> > +      case SHADER_OPCODE_IMAGE_SIZE:
> > +      msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
> > +      break;
>
> Indentation here seems off.
>

Wow.  Somehow I got tabs in there.  My .vimrc should make that almost
impossible...

> Maybe just add this case label above the
> existing case SHADER_OPCODE_TXS, which does the exact same thing?
>

Sure, why not.

> >        default:
> >        unreachable("not reached");
> >        }
> > @@ -1126,10 +1129,20 @@ fs_generator::generate_tex(fs_inst *inst, struct
> brw_reg dst, struct brw_reg src
> >        }
> >     }
> >
> > -   uint32_t base_binding_table_index = (inst->opcode ==
> SHADER_OPCODE_TG4 ||
> > -         inst->opcode == SHADER_OPCODE_TG4_OFFSET)
> > -         ? prog_data->binding_table.gather_texture_start
> > -         : prog_data->binding_table.texture_start;
> > +   uint32_t base_binding_table_index;
> > +   switch (inst->opcode) {
> > +   case SHADER_OPCODE_TG4:
> > +   case SHADER_OPCODE_TG4_OFFSET:
> > +      base_binding_table_index =
> prog_data->binding_table.gather_texture_start;
> > +      break;
> > +   case SHADER_OPCODE_IMAGE_SIZE:
> > +      /* Image binding table indices are absolute */
>
> Uh, what?  That doesn't seem right.  Shouldn't this be
>
>       base_binding_table_index = prog_data->binding_table.image_start;
>

No... It's dumb.  The binding table index that gets placed in the uniform
image param is an absolute binding table index.  If I move this patch after
the second-to-last patch, we should be able to avoid this mess.  Why don't
I try to do that.

> > +      base_binding_table_index = 0;
> > +      break;
> > +   default:
> > +      base_binding_table_index = prog_data->binding_table.texture_start;
> > +      break;
> > +   }
> >
> >     if (surface_index.file == BRW_IMMEDIATE_VALUE &&
> >         sampler_index.file == BRW_IMMEDIATE_VALUE) {
> > @@ -2114,6 +2127,11 @@ fs_generator::generate_code(const cfg_t *cfg, int
> dispatch_width)
> >        case SHADER_OPCODE_SAMPLEINFO:
> >        generate_tex(inst, dst, src[0], src[1], src[2]);
> >        break;
> > +
> > +      case SHADER_OPCODE_IMAGE_SIZE:
> > +      generate_tex(inst, dst, src[0], src[1], brw_imm_ud(0));
> > +      break;
> > +
>
> Indentation seems off here too.
>

Yup.  Tabs again.

> >        case FS_OPCODE_DDX_COARSE:
> >        case FS_OPCODE_DDX_FINE:
> >           generate_ddx(inst, dst, src[0]);
> > diff --git a/src/intel/compiler/brw_fs_nir.cpp
> b/src/intel/compiler/brw_fs_nir.cpp
> > index 021a31d069c..ac45b559830 100644
> > --- a/src/intel/compiler/brw_fs_nir.cpp
> > +++ b/src/intel/compiler/brw_fs_nir.cpp
> > @@ -3914,6 +3914,32 @@ fs_visitor::nir_emit_intrinsic(const fs_builder
> &bld, nir_intrinsic_instr *instr
> >        break;
> >     }
> >
> > +   case nir_intrinsic_image_deref_size: {
> > +      nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
> > +      const fs_reg image = get_nir_image_deref(deref);
> > +
> > +      /* The LOD also serves as the message payload */
> > +      fs_reg lod = bld.vgrf(BRW_REGISTER_TYPE_UD);
> > +      bld.MOV(lod, brw_imm_ud(0));
> > +
> > +      fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, 4);
> > +      fs_inst *inst = bld.emit(SHADER_OPCODE_IMAGE_SIZE_LOGICAL,
> > +                               tmp, lod, image);
> > +      inst->size_written = type_sz(tmp.type) * bld.dispatch_width() * 4;
> > +
> > +      for (unsigned c = 0; c < instr->dest.ssa.num_components; ++c) {
> > +         if (c == 2 && glsl_get_sampler_dim(deref->type) ==
> GLSL_SAMPLER_DIM_CUBE) {
> > +            bld.emit(SHADER_OPCODE_INT_QUOTIENT,
> > +                     offset(retype(dest, tmp.type), bld, c),
> > +                     offset(tmp, bld, c), brw_imm_ud(6));
> > +         } else {
> > +            bld.MOV(offset(retype(dest, tmp.type), bld, c),
> > +                    offset(tmp, bld, c));
> > +         }
> > +      }
> > +      break;
> > +   }
> > +
> >     case nir_intrinsic_image_deref_load_param_intel: {
> >        nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
> >        const fs_reg image = get_nir_image_deref(deref);
> > diff --git a/src/intel/compiler/brw_nir_lower_image_load_store.c
> b/src/intel/compiler/brw_nir_lower_image_load_store.c
> > index a4bd37fc1e5..99206d81ee0 100644
> > --- a/src/intel/compiler/brw_nir_lower_image_load_store.c
> > +++ b/src/intel/compiler/brw_nir_lower_image_load_store.c
> > @@ -727,6 +727,21 @@ lower_image_size_instr(nir_builder *b,
> >                         nir_intrinsic_instr *intrin)
> >  {
> >     nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
> > +   nir_variable *var = nir_deref_instr_get_variable(deref);
> > +
> > +   /* For write-only images, we have an actual image surface so we fall
> back
> > +    * and let the back-end emit a TXQ for this.
>
> We use "TXS" for textureSize().  TXQ is a TGSI name.  Please s/TXQ/TXS/g
>

Fixed.

> > +    */
> > +   if (var->data.image.write_only)
> > +      return false;
> > +
> > +   /* If we have a matching typed format, then we have an actual image
> surface
> > +    * so we fall back and let the back-end emit a TXQ for this.
>

Here too.

> > +    */
> > +   const enum isl_format image_fmt =
> > +      isl_format_for_gl_format(var->data.image.format);
> > +   if (isl_has_matching_typed_storage_image_format(devinfo, image_fmt))
> > +      return false;
> >
> >     b->cursor = nir_instr_remove(&intrin->instr);
> >
> > diff --git a/src/intel/compiler/brw_shader.cpp
> b/src/intel/compiler/brw_shader.cpp
> > index 804573fc631..c517451cd2d 100644
> > --- a/src/intel/compiler/brw_shader.cpp
> > +++ b/src/intel/compiler/brw_shader.cpp
> > @@ -267,6 +267,11 @@ brw_instruction_name(const struct gen_device_info
> *devinfo, enum opcode op)
> >     case SHADER_OPCODE_SAMPLEINFO_LOGICAL:
> >        return "sampleinfo_logical";
> >
> > +   case SHADER_OPCODE_IMAGE_SIZE:
> > +      return "image_size";
> > +   case SHADER_OPCODE_IMAGE_SIZE_LOGICAL:
> > +      return "image_size_logical";
> > +
> >     case SHADER_OPCODE_SHADER_TIME_ADD:
> >        return "shader_time_add";
> >
> >
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20180829/c84e4243/attachment.html>