[Mesa-dev] [PATCH 04/12] i965/fs: Move brw_wm_compile::dispatch_width into fs_visitor.

Paul Berry stereotype441 at gmail.com
Mon Nov 26 14:58:16 PST 2012


On 20 November 2012 21:40, Kenneth Graunke <kenneth at whitecape.org> wrote:

> +   unsigned dispatch_width; /** 8 or 16 */
> +
>

While we're at it, why don't we make it const?  That will prevent us from
accidentally modifying it at some inoppurtune time, and it *may* even cause
the compiler to generate more efficient code.

Note: if we do that we'll have to use the C++ "initialization list" syntax
to initialize it, e.g.:

fs_visitor::fs_visitor(...)
   : dispatch_width(dispatch_width)
{
   ...
}


>     int force_uncompressed_stack;
>     int force_sechalf_stack;
>  };
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
> b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
> index 29c73cf..7fdf526 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
> @@ -93,7 +93,7 @@ fs_visitor::generate_fb_write(fs_inst *inst)
>
>     if (this->dual_src_output.file != BAD_FILE)
>        msg_control =
> BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01;
> -   else if (c->dispatch_width == 16)
> +   else if (dispatch_width == 16)
>        msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
>     else
>        msg_control =
> BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
> @@ -101,7 +101,7 @@ fs_visitor::generate_fb_write(fs_inst *inst)
>     brw_pop_insn_state(p);
>
>     brw_fb_WRITE(p,
> -               c->dispatch_width,
> +               dispatch_width,
>                 inst->base_mrf,
>                 implied_header,
>                 msg_control,
> @@ -133,7 +133,7 @@ fs_visitor::generate_pixel_xy(struct brw_reg dst, bool
> is_x)
>        deltas = brw_imm_v(0x11001100);
>     }
>
> -   if (c->dispatch_width == 16) {
> +   if (dispatch_width == 16) {
>        dst = vec16(dst);
>     }
>
> @@ -203,7 +203,7 @@ fs_visitor::generate_math1_gen6(fs_inst *inst,
>             BRW_MATH_DATA_VECTOR,
>             BRW_MATH_PRECISION_FULL);
>
> -   if (c->dispatch_width == 16) {
> +   if (dispatch_width == 16) {
>        brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
>        brw_math(p, sechalf(dst),
>                op,
> @@ -227,7 +227,7 @@ fs_visitor::generate_math2_gen6(fs_inst *inst,
>     brw_set_compression_control(p, BRW_COMPRESSION_NONE);
>     brw_math2(p, dst, op, src0, src1);
>
> -   if (c->dispatch_width == 16) {
> +   if (dispatch_width == 16) {
>        brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
>        brw_math2(p, sechalf(dst), op, sechalf(src0), sechalf(src1));
>        brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
> @@ -250,7 +250,7 @@ fs_visitor::generate_math_gen4(fs_inst *inst,
>             BRW_MATH_DATA_VECTOR,
>             BRW_MATH_PRECISION_FULL);
>
> -   if (c->dispatch_width == 16) {
> +   if (dispatch_width == 16) {
>        brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
>        brw_math(p, sechalf(dst),
>                op,
> @@ -282,7 +282,7 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg
> dst, struct brw_reg src)
>        break;
>     }
>
> -   if (c->dispatch_width == 16)
> +   if (dispatch_width == 16)
>        simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
>
>     if (intel->gen >= 5) {
> @@ -328,7 +328,7 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg
> dst, struct brw_reg src)
>          /* Note that G45 and older determines shadow compare and dispatch
> width
>           * from message length for most messages.
>           */
> -        assert(c->dispatch_width == 8);
> +        assert(dispatch_width == 8);
>          msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
>          if (inst->shadow_compare) {
>             assert(inst->mlen == 6);
> @@ -731,10 +731,10 @@ fs_visitor::generate_code()
>     if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
>        if (shader) {
>           printf("Native code for fragment shader %d (%d-wide
> dispatch):\n",
> -                prog->Name, c->dispatch_width);
> +                prog->Name, dispatch_width);
>        } else {
>           printf("Native code for fragment program %d (%d-wide
> dispatch):\n",
> -                c->fp->program.Base.Id, c->dispatch_width);
> +                c->fp->program.Base.Id, dispatch_width);
>        }
>     }
>
> @@ -807,7 +807,7 @@ fs_visitor::generate_code()
>        brw_set_predicate_inverse(p, inst->predicate_inverse);
>        brw_set_saturate(p, inst->saturate);
>
> -      if (inst->force_uncompressed || c->dispatch_width == 8) {
> +      if (inst->force_uncompressed || dispatch_width == 8) {
>          brw_set_compression_control(p, BRW_COMPRESSION_NONE);
>        } else if (inst->force_sechalf) {
>          brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
> @@ -833,7 +833,7 @@ fs_visitor::generate_code()
>
>        case BRW_OPCODE_MAD:
>          brw_set_access_mode(p, BRW_ALIGN_16);
> -        if (c->dispatch_width == 16) {
> +        if (dispatch_width == 16) {
>             brw_set_compression_control(p, BRW_COMPRESSION_NONE);
>             brw_MAD(p, dst, src[0], src[1], src[2]);
>             brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
> @@ -893,7 +893,7 @@ fs_visitor::generate_code()
>             assert(intel->gen == 6);
>             gen6_IF(p, inst->conditional_mod, src[0], src[1]);
>          } else {
> -           brw_IF(p, c->dispatch_width == 16 ? BRW_EXECUTE_16 :
> BRW_EXECUTE_8);
> +           brw_IF(p, dispatch_width == 16 ? BRW_EXECUTE_16 :
> BRW_EXECUTE_8);
>          }
>          break;
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_fp.cpp
> b/src/mesa/drivers/dri/i965/brw_fs_fp.cpp
> index 88b0976..dc5a386 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_fp.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_fp.cpp
> @@ -587,7 +587,7 @@ fs_visitor::setup_fp_regs()
>        fp_temp_regs[i] = fs_reg(this, glsl_type::vec4_type);
>
>     /* PROGRAM_STATE_VAR etc. */
> -   if (c->dispatch_width == 8) {
> +   if (dispatch_width == 8) {
>        for (unsigned p = 0;
>             p < c->fp->program.Base.Parameters->NumParameters; p++) {
>           for (unsigned int i = 0; i < 4; i++) {
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
> b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
> index d7bb721..db8f397 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
> @@ -280,10 +280,10 @@ fs_visitor::virtual_grf_interferes(int a, int b)
>      * so our second half values in g6 got overwritten in the first
>      * half.
>      */
> -   if (c->dispatch_width == 16 && (this->pixel_x.reg == a ||
> -                                  this->pixel_x.reg == b ||
> -                                  this->pixel_y.reg == a ||
> -                                  this->pixel_y.reg == b)) {
> +   if (dispatch_width == 16 && (this->pixel_x.reg == a ||
> +                               this->pixel_x.reg == b ||
> +                               this->pixel_y.reg == a ||
> +                               this->pixel_y.reg == b)) {
>        return start <= end;
>     }
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
> b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
> index f87cbbc..c5fd6dc 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
> @@ -45,7 +45,7 @@ fs_visitor::assign_regs_trivial()
>  {
>     int hw_reg_mapping[this->virtual_grf_count + 1];
>     int i;
> -   int reg_width = c->dispatch_width / 8;
> +   int reg_width = dispatch_width / 8;
>
>     /* Note that compressed instructions require alignment to 2 registers.
> */
>     hw_reg_mapping[0] = ALIGN(this->first_non_payload_grf, reg_width);
> @@ -215,7 +215,7 @@ fs_visitor::setup_payload_interference(struct ra_graph
> *g,
>                                         int payload_node_count,
>                                         int first_payload_node)
>  {
> -   int reg_width = c->dispatch_width / 8;
> +   int reg_width = dispatch_width / 8;
>     int loop_depth = 0;
>     int loop_end_ip = 0;
>
> @@ -337,7 +337,7 @@ void
>  fs_visitor::setup_mrf_hack_interference(struct ra_graph *g, int
> first_mrf_node)
>  {
>     int mrf_count = BRW_MAX_GRF - GEN7_MRF_HACK_START;
> -   int reg_width = c->dispatch_width / 8;
> +   int reg_width = dispatch_width / 8;
>
>     /* Identify all the MRFs used in the program. */
>     bool mrf_used[mrf_count];
> @@ -393,7 +393,7 @@ fs_visitor::assign_regs()
>      * registers it's allocating be contiguous physical pairs of regs
>      * for reg_width == 2.
>      */
> -   int reg_width = c->dispatch_width / 8;
> +   int reg_width = dispatch_width / 8;
>     int hw_reg_mapping[this->virtual_grf_count];
>     int payload_node_count = (ALIGN(this->first_non_payload_grf,
> reg_width) /
>                              reg_width);
> @@ -450,7 +450,7 @@ fs_visitor::assign_regs()
>
>        if (reg == -1) {
>          fail("no register to spill\n");
> -      } else if (c->dispatch_width == 16) {
> +      } else if (dispatch_width == 16) {
>          fail("Failure to register allocate.  Reduce number of live scalar
> "
>                "values to avoid this.");
>        } else {
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
> b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
> index 11e9858..6b7c412 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
> @@ -239,7 +239,7 @@ instruction_scheduler::add_barrier_deps(schedule_node
> *n)
>  bool
>  instruction_scheduler::is_compressed(fs_inst *inst)
>  {
> -   return (v->c->dispatch_width == 16 &&
> +   return (v->dispatch_width == 16 &&
>            !inst->force_uncompressed &&
>            !inst->force_sechalf);
>  }
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> index 5d94181..28c7c5a 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> @@ -110,7 +110,7 @@ fs_visitor::visit(ir_variable *ir)
>        if (ir->uniform_block != -1)
>           return;
>
> -      if (c->dispatch_width == 16) {
> +      if (dispatch_width == 16) {
>          if (!variable_storage(ir)) {
>             fail("Failed to find uniform '%s' in 16-wide\n", ir->name);
>          }
> @@ -381,7 +381,7 @@ fs_visitor::visit(ir_expression *ir)
>           * FINISHME: Emit just the MUL if we know an operand is small
>           * enough.
>           */
> -        if (intel->gen >= 7 && c->dispatch_width == 16)
> +        if (intel->gen >= 7 && dispatch_width == 16)
>             fail("16-wide explicit accumulator operands unsupported\n");
>
>          struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D);
> @@ -394,7 +394,7 @@ fs_visitor::visit(ir_expression *ir)
>        }
>        break;
>     case ir_binop_div:
> -      if (intel->gen >= 7 && c->dispatch_width == 16)
> +      if (intel->gen >= 7 && dispatch_width == 16)
>          fail("16-wide INTDIV unsupported\n");
>
>        /* Floating point should be lowered by DIV_TO_MUL_RCP in the
> compiler. */
> @@ -402,7 +402,7 @@ fs_visitor::visit(ir_expression *ir)
>        emit_math(SHADER_OPCODE_INT_QUOTIENT, this->result, op[0], op[1]);
>        break;
>     case ir_binop_mod:
> -      if (intel->gen >= 7 && c->dispatch_width == 16)
> +      if (intel->gen >= 7 && dispatch_width == 16)
>          fail("16-wide INTDIV unsupported\n");
>
>        /* Floating point should be lowered by MOD_TO_FRACT in the
> compiler. */
> @@ -888,7 +888,7 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg
> dst, fs_reg coordinate,
>  {
>     int mlen = 0;
>     int base_mrf = 2;
> -   int reg_width = c->dispatch_width / 8;
> +   int reg_width = dispatch_width / 8;
>     bool header_present = false;
>     const int vector_elements =
>        ir->coordinate ? ir->coordinate->type->vector_elements : 0;
> @@ -1005,7 +1005,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg
> dst, fs_reg coordinate,
>  {
>     int mlen = 0;
>     int base_mrf = 2;
> -   int reg_width = c->dispatch_width / 8;
> +   int reg_width = dispatch_width / 8;
>     bool header_present = false;
>     int offsets[3];
>
> @@ -1036,7 +1036,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg
> dst, fs_reg coordinate,
>        mlen += reg_width;
>        break;
>     case ir_txd: {
> -      if (c->dispatch_width == 16)
> +      if (dispatch_width == 16)
>          fail("Gen7 does not support sample_d/sample_d_c in SIMD16 mode.");
>
>        /* Load dPdx and the coordinate together:
> @@ -1149,7 +1149,7 @@ fs_visitor::rescale_texcoord(ir_texture *ir, fs_reg
> coordinate,
>          0
>        };
>
> -      if (c->dispatch_width == 16) {
> +      if (dispatch_width == 16) {
>          fail("rectangle scale uniform setup not supported on 16-wide\n");
>          return coordinate;
>        }
> @@ -1615,7 +1615,7 @@ fs_visitor::emit_if_gen6(ir_if *ir)
>  void
>  fs_visitor::visit(ir_if *ir)
>  {
> -   if (intel->gen < 6 && c->dispatch_width == 16) {
> +   if (intel->gen < 6 && dispatch_width == 16) {
>        fail("Can't support (non-uniform) control flow on 16-wide\n");
>     }
>
> @@ -1658,7 +1658,7 @@ fs_visitor::visit(ir_loop *ir)
>  {
>     fs_reg counter = reg_undef;
>
> -   if (intel->gen < 6 && c->dispatch_width == 16) {
> +   if (intel->gen < 6 && dispatch_width == 16) {
>        fail("Can't support (non-uniform) control flow on 16-wide\n");
>     }
>
> @@ -1790,7 +1790,7 @@ fs_visitor::emit(fs_inst *inst)
>  void
>  fs_visitor::emit_dummy_fs()
>  {
> -   int reg_width = c->dispatch_width / 8;
> +   int reg_width = dispatch_width / 8;
>
>     /* Everyone's favorite color. */
>     emit(MOV(fs_reg(MRF, 2 + 0 * reg_width), fs_reg(1.0f)));
> @@ -1911,7 +1911,7 @@ fs_visitor::emit_interpolation_setup_gen6()
>  void
>  fs_visitor::emit_color_write(int target, int index, int first_color_mrf)
>  {
> -   int reg_width = c->dispatch_width / 8;
> +   int reg_width = dispatch_width / 8;
>     fs_inst *inst;
>     fs_reg color = outputs[target];
>     fs_reg mrf;
> @@ -1922,7 +1922,7 @@ fs_visitor::emit_color_write(int target, int index,
> int first_color_mrf)
>
>     color.reg_offset += index;
>
> -   if (c->dispatch_width == 8 || intel->gen >= 6) {
> +   if (dispatch_width == 8 || intel->gen >= 6) {
>        /* SIMD8 write looks like:
>         * m + 0: r0
>         * m + 1: r1
> @@ -1992,11 +1992,11 @@ fs_visitor::emit_fb_writes()
>      */
>     int base_mrf = 1;
>     int nr = base_mrf;
> -   int reg_width = c->dispatch_width / 8;
> +   int reg_width = dispatch_width / 8;
>     bool do_dual_src = this->dual_src_output.file != BAD_FILE;
>     bool src0_alpha_to_render_target = false;
>
> -   if (c->dispatch_width == 16 && do_dual_src) {
> +   if (dispatch_width == 16 && do_dual_src) {
>        fail("GL_ARB_blend_func_extended not yet supported in 16-wide.");
>        do_dual_src = false;
>     }
> @@ -2040,7 +2040,7 @@ fs_visitor::emit_fb_writes()
>        nr += reg_width;
>
>     if (c->source_depth_to_render_target) {
> -      if (intel->gen == 6 && c->dispatch_width == 16) {
> +      if (intel->gen == 6 && dispatch_width == 16) {
>          /* For outputting oDepth on gen6, SIMD8 writes have to be
>           * used.  This would require 8-wide moves of each half to
>           * message regs, kind of like pre-gen5 SIMD16 FB writes.
> @@ -2175,7 +2175,7 @@ fs_visitor::resolve_bool_comparison(ir_rvalue
> *rvalue, fs_reg *reg)
>  }
>
>  fs_visitor::fs_visitor(struct brw_wm_compile *c, struct gl_shader_program
> *prog,
> -                       struct brw_shader *shader)
> +                       struct brw_shader *shader, unsigned dispatch_width)
>  {
>     this->c = c;
>     this->p = &c->func;
> @@ -2186,6 +2186,7 @@ fs_visitor::fs_visitor(struct brw_wm_compile *c,
> struct gl_shader_program *prog,
>     this->ctx = &intel->ctx;
>     this->mem_ctx = ralloc_context(NULL);
>     this->shader = shader;
> +   this->dispatch_width = dispatch_width;
>     this->failed = false;
>     this->variable_ht = hash_table_ctor(0,
>                                         hash_table_pointer_hash,
> diff --git a/src/mesa/drivers/dri/i965/brw_wm.h
> b/src/mesa/drivers/dri/i965/brw_wm.h
> index b8d8df3..a5ebddd 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm.h
> +++ b/src/mesa/drivers/dri/i965/brw_wm.h
> @@ -93,8 +93,6 @@ struct brw_wm_compile {
>     GLuint runtime_check_aads_emit:1;
>
>     GLuint last_scratch;
> -
> -   GLuint dispatch_width;
>  };
>
>  bool brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c,
> --
> 1.8.0
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20121126/2be2da88/attachment-0001.html>


More information about the mesa-dev mailing list