[Mesa-dev] [PATCH 04/12] i965/fs: Move brw_wm_compile::dispatch_width into fs_visitor.
Paul Berry
stereotype441 at gmail.com
Mon Nov 26 14:58:16 PST 2012
On 20 November 2012 21:40, Kenneth Graunke <kenneth at whitecape.org> wrote:
> + unsigned dispatch_width; /** 8 or 16 */
> +
>
While we're at it, why don't we make it const? That will prevent us from
accidentally modifying it at some inoppurtune time, and it *may* even cause
the compiler to generate more efficient code.
Note: if we do that we'll have to use the C++ "initialization list" syntax
to initialize it, e.g.:
fs_visitor::fs_visitor(...)
: dispatch_width(dispatch_width)
{
...
}
> int force_uncompressed_stack;
> int force_sechalf_stack;
> };
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
> b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
> index 29c73cf..7fdf526 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
> @@ -93,7 +93,7 @@ fs_visitor::generate_fb_write(fs_inst *inst)
>
> if (this->dual_src_output.file != BAD_FILE)
> msg_control =
> BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01;
> - else if (c->dispatch_width == 16)
> + else if (dispatch_width == 16)
> msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
> else
> msg_control =
> BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
> @@ -101,7 +101,7 @@ fs_visitor::generate_fb_write(fs_inst *inst)
> brw_pop_insn_state(p);
>
> brw_fb_WRITE(p,
> - c->dispatch_width,
> + dispatch_width,
> inst->base_mrf,
> implied_header,
> msg_control,
> @@ -133,7 +133,7 @@ fs_visitor::generate_pixel_xy(struct brw_reg dst, bool
> is_x)
> deltas = brw_imm_v(0x11001100);
> }
>
> - if (c->dispatch_width == 16) {
> + if (dispatch_width == 16) {
> dst = vec16(dst);
> }
>
> @@ -203,7 +203,7 @@ fs_visitor::generate_math1_gen6(fs_inst *inst,
> BRW_MATH_DATA_VECTOR,
> BRW_MATH_PRECISION_FULL);
>
> - if (c->dispatch_width == 16) {
> + if (dispatch_width == 16) {
> brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
> brw_math(p, sechalf(dst),
> op,
> @@ -227,7 +227,7 @@ fs_visitor::generate_math2_gen6(fs_inst *inst,
> brw_set_compression_control(p, BRW_COMPRESSION_NONE);
> brw_math2(p, dst, op, src0, src1);
>
> - if (c->dispatch_width == 16) {
> + if (dispatch_width == 16) {
> brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
> brw_math2(p, sechalf(dst), op, sechalf(src0), sechalf(src1));
> brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
> @@ -250,7 +250,7 @@ fs_visitor::generate_math_gen4(fs_inst *inst,
> BRW_MATH_DATA_VECTOR,
> BRW_MATH_PRECISION_FULL);
>
> - if (c->dispatch_width == 16) {
> + if (dispatch_width == 16) {
> brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
> brw_math(p, sechalf(dst),
> op,
> @@ -282,7 +282,7 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg
> dst, struct brw_reg src)
> break;
> }
>
> - if (c->dispatch_width == 16)
> + if (dispatch_width == 16)
> simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
>
> if (intel->gen >= 5) {
> @@ -328,7 +328,7 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg
> dst, struct brw_reg src)
> /* Note that G45 and older determines shadow compare and dispatch
> width
> * from message length for most messages.
> */
> - assert(c->dispatch_width == 8);
> + assert(dispatch_width == 8);
> msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
> if (inst->shadow_compare) {
> assert(inst->mlen == 6);
> @@ -731,10 +731,10 @@ fs_visitor::generate_code()
> if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
> if (shader) {
> printf("Native code for fragment shader %d (%d-wide
> dispatch):\n",
> - prog->Name, c->dispatch_width);
> + prog->Name, dispatch_width);
> } else {
> printf("Native code for fragment program %d (%d-wide
> dispatch):\n",
> - c->fp->program.Base.Id, c->dispatch_width);
> + c->fp->program.Base.Id, dispatch_width);
> }
> }
>
> @@ -807,7 +807,7 @@ fs_visitor::generate_code()
> brw_set_predicate_inverse(p, inst->predicate_inverse);
> brw_set_saturate(p, inst->saturate);
>
> - if (inst->force_uncompressed || c->dispatch_width == 8) {
> + if (inst->force_uncompressed || dispatch_width == 8) {
> brw_set_compression_control(p, BRW_COMPRESSION_NONE);
> } else if (inst->force_sechalf) {
> brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
> @@ -833,7 +833,7 @@ fs_visitor::generate_code()
>
> case BRW_OPCODE_MAD:
> brw_set_access_mode(p, BRW_ALIGN_16);
> - if (c->dispatch_width == 16) {
> + if (dispatch_width == 16) {
> brw_set_compression_control(p, BRW_COMPRESSION_NONE);
> brw_MAD(p, dst, src[0], src[1], src[2]);
> brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
> @@ -893,7 +893,7 @@ fs_visitor::generate_code()
> assert(intel->gen == 6);
> gen6_IF(p, inst->conditional_mod, src[0], src[1]);
> } else {
> - brw_IF(p, c->dispatch_width == 16 ? BRW_EXECUTE_16 :
> BRW_EXECUTE_8);
> + brw_IF(p, dispatch_width == 16 ? BRW_EXECUTE_16 :
> BRW_EXECUTE_8);
> }
> break;
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_fp.cpp
> b/src/mesa/drivers/dri/i965/brw_fs_fp.cpp
> index 88b0976..dc5a386 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_fp.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_fp.cpp
> @@ -587,7 +587,7 @@ fs_visitor::setup_fp_regs()
> fp_temp_regs[i] = fs_reg(this, glsl_type::vec4_type);
>
> /* PROGRAM_STATE_VAR etc. */
> - if (c->dispatch_width == 8) {
> + if (dispatch_width == 8) {
> for (unsigned p = 0;
> p < c->fp->program.Base.Parameters->NumParameters; p++) {
> for (unsigned int i = 0; i < 4; i++) {
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
> b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
> index d7bb721..db8f397 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
> @@ -280,10 +280,10 @@ fs_visitor::virtual_grf_interferes(int a, int b)
> * so our second half values in g6 got overwritten in the first
> * half.
> */
> - if (c->dispatch_width == 16 && (this->pixel_x.reg == a ||
> - this->pixel_x.reg == b ||
> - this->pixel_y.reg == a ||
> - this->pixel_y.reg == b)) {
> + if (dispatch_width == 16 && (this->pixel_x.reg == a ||
> + this->pixel_x.reg == b ||
> + this->pixel_y.reg == a ||
> + this->pixel_y.reg == b)) {
> return start <= end;
> }
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
> b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
> index f87cbbc..c5fd6dc 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
> @@ -45,7 +45,7 @@ fs_visitor::assign_regs_trivial()
> {
> int hw_reg_mapping[this->virtual_grf_count + 1];
> int i;
> - int reg_width = c->dispatch_width / 8;
> + int reg_width = dispatch_width / 8;
>
> /* Note that compressed instructions require alignment to 2 registers.
> */
> hw_reg_mapping[0] = ALIGN(this->first_non_payload_grf, reg_width);
> @@ -215,7 +215,7 @@ fs_visitor::setup_payload_interference(struct ra_graph
> *g,
> int payload_node_count,
> int first_payload_node)
> {
> - int reg_width = c->dispatch_width / 8;
> + int reg_width = dispatch_width / 8;
> int loop_depth = 0;
> int loop_end_ip = 0;
>
> @@ -337,7 +337,7 @@ void
> fs_visitor::setup_mrf_hack_interference(struct ra_graph *g, int
> first_mrf_node)
> {
> int mrf_count = BRW_MAX_GRF - GEN7_MRF_HACK_START;
> - int reg_width = c->dispatch_width / 8;
> + int reg_width = dispatch_width / 8;
>
> /* Identify all the MRFs used in the program. */
> bool mrf_used[mrf_count];
> @@ -393,7 +393,7 @@ fs_visitor::assign_regs()
> * registers it's allocating be contiguous physical pairs of regs
> * for reg_width == 2.
> */
> - int reg_width = c->dispatch_width / 8;
> + int reg_width = dispatch_width / 8;
> int hw_reg_mapping[this->virtual_grf_count];
> int payload_node_count = (ALIGN(this->first_non_payload_grf,
> reg_width) /
> reg_width);
> @@ -450,7 +450,7 @@ fs_visitor::assign_regs()
>
> if (reg == -1) {
> fail("no register to spill\n");
> - } else if (c->dispatch_width == 16) {
> + } else if (dispatch_width == 16) {
> fail("Failure to register allocate. Reduce number of live scalar
> "
> "values to avoid this.");
> } else {
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
> b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
> index 11e9858..6b7c412 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
> @@ -239,7 +239,7 @@ instruction_scheduler::add_barrier_deps(schedule_node
> *n)
> bool
> instruction_scheduler::is_compressed(fs_inst *inst)
> {
> - return (v->c->dispatch_width == 16 &&
> + return (v->dispatch_width == 16 &&
> !inst->force_uncompressed &&
> !inst->force_sechalf);
> }
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> index 5d94181..28c7c5a 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> @@ -110,7 +110,7 @@ fs_visitor::visit(ir_variable *ir)
> if (ir->uniform_block != -1)
> return;
>
> - if (c->dispatch_width == 16) {
> + if (dispatch_width == 16) {
> if (!variable_storage(ir)) {
> fail("Failed to find uniform '%s' in 16-wide\n", ir->name);
> }
> @@ -381,7 +381,7 @@ fs_visitor::visit(ir_expression *ir)
> * FINISHME: Emit just the MUL if we know an operand is small
> * enough.
> */
> - if (intel->gen >= 7 && c->dispatch_width == 16)
> + if (intel->gen >= 7 && dispatch_width == 16)
> fail("16-wide explicit accumulator operands unsupported\n");
>
> struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D);
> @@ -394,7 +394,7 @@ fs_visitor::visit(ir_expression *ir)
> }
> break;
> case ir_binop_div:
> - if (intel->gen >= 7 && c->dispatch_width == 16)
> + if (intel->gen >= 7 && dispatch_width == 16)
> fail("16-wide INTDIV unsupported\n");
>
> /* Floating point should be lowered by DIV_TO_MUL_RCP in the
> compiler. */
> @@ -402,7 +402,7 @@ fs_visitor::visit(ir_expression *ir)
> emit_math(SHADER_OPCODE_INT_QUOTIENT, this->result, op[0], op[1]);
> break;
> case ir_binop_mod:
> - if (intel->gen >= 7 && c->dispatch_width == 16)
> + if (intel->gen >= 7 && dispatch_width == 16)
> fail("16-wide INTDIV unsupported\n");
>
> /* Floating point should be lowered by MOD_TO_FRACT in the
> compiler. */
> @@ -888,7 +888,7 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg
> dst, fs_reg coordinate,
> {
> int mlen = 0;
> int base_mrf = 2;
> - int reg_width = c->dispatch_width / 8;
> + int reg_width = dispatch_width / 8;
> bool header_present = false;
> const int vector_elements =
> ir->coordinate ? ir->coordinate->type->vector_elements : 0;
> @@ -1005,7 +1005,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg
> dst, fs_reg coordinate,
> {
> int mlen = 0;
> int base_mrf = 2;
> - int reg_width = c->dispatch_width / 8;
> + int reg_width = dispatch_width / 8;
> bool header_present = false;
> int offsets[3];
>
> @@ -1036,7 +1036,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg
> dst, fs_reg coordinate,
> mlen += reg_width;
> break;
> case ir_txd: {
> - if (c->dispatch_width == 16)
> + if (dispatch_width == 16)
> fail("Gen7 does not support sample_d/sample_d_c in SIMD16 mode.");
>
> /* Load dPdx and the coordinate together:
> @@ -1149,7 +1149,7 @@ fs_visitor::rescale_texcoord(ir_texture *ir, fs_reg
> coordinate,
> 0
> };
>
> - if (c->dispatch_width == 16) {
> + if (dispatch_width == 16) {
> fail("rectangle scale uniform setup not supported on 16-wide\n");
> return coordinate;
> }
> @@ -1615,7 +1615,7 @@ fs_visitor::emit_if_gen6(ir_if *ir)
> void
> fs_visitor::visit(ir_if *ir)
> {
> - if (intel->gen < 6 && c->dispatch_width == 16) {
> + if (intel->gen < 6 && dispatch_width == 16) {
> fail("Can't support (non-uniform) control flow on 16-wide\n");
> }
>
> @@ -1658,7 +1658,7 @@ fs_visitor::visit(ir_loop *ir)
> {
> fs_reg counter = reg_undef;
>
> - if (intel->gen < 6 && c->dispatch_width == 16) {
> + if (intel->gen < 6 && dispatch_width == 16) {
> fail("Can't support (non-uniform) control flow on 16-wide\n");
> }
>
> @@ -1790,7 +1790,7 @@ fs_visitor::emit(fs_inst *inst)
> void
> fs_visitor::emit_dummy_fs()
> {
> - int reg_width = c->dispatch_width / 8;
> + int reg_width = dispatch_width / 8;
>
> /* Everyone's favorite color. */
> emit(MOV(fs_reg(MRF, 2 + 0 * reg_width), fs_reg(1.0f)));
> @@ -1911,7 +1911,7 @@ fs_visitor::emit_interpolation_setup_gen6()
> void
> fs_visitor::emit_color_write(int target, int index, int first_color_mrf)
> {
> - int reg_width = c->dispatch_width / 8;
> + int reg_width = dispatch_width / 8;
> fs_inst *inst;
> fs_reg color = outputs[target];
> fs_reg mrf;
> @@ -1922,7 +1922,7 @@ fs_visitor::emit_color_write(int target, int index,
> int first_color_mrf)
>
> color.reg_offset += index;
>
> - if (c->dispatch_width == 8 || intel->gen >= 6) {
> + if (dispatch_width == 8 || intel->gen >= 6) {
> /* SIMD8 write looks like:
> * m + 0: r0
> * m + 1: r1
> @@ -1992,11 +1992,11 @@ fs_visitor::emit_fb_writes()
> */
> int base_mrf = 1;
> int nr = base_mrf;
> - int reg_width = c->dispatch_width / 8;
> + int reg_width = dispatch_width / 8;
> bool do_dual_src = this->dual_src_output.file != BAD_FILE;
> bool src0_alpha_to_render_target = false;
>
> - if (c->dispatch_width == 16 && do_dual_src) {
> + if (dispatch_width == 16 && do_dual_src) {
> fail("GL_ARB_blend_func_extended not yet supported in 16-wide.");
> do_dual_src = false;
> }
> @@ -2040,7 +2040,7 @@ fs_visitor::emit_fb_writes()
> nr += reg_width;
>
> if (c->source_depth_to_render_target) {
> - if (intel->gen == 6 && c->dispatch_width == 16) {
> + if (intel->gen == 6 && dispatch_width == 16) {
> /* For outputting oDepth on gen6, SIMD8 writes have to be
> * used. This would require 8-wide moves of each half to
> * message regs, kind of like pre-gen5 SIMD16 FB writes.
> @@ -2175,7 +2175,7 @@ fs_visitor::resolve_bool_comparison(ir_rvalue
> *rvalue, fs_reg *reg)
> }
>
> fs_visitor::fs_visitor(struct brw_wm_compile *c, struct gl_shader_program
> *prog,
> - struct brw_shader *shader)
> + struct brw_shader *shader, unsigned dispatch_width)
> {
> this->c = c;
> this->p = &c->func;
> @@ -2186,6 +2186,7 @@ fs_visitor::fs_visitor(struct brw_wm_compile *c,
> struct gl_shader_program *prog,
> this->ctx = &intel->ctx;
> this->mem_ctx = ralloc_context(NULL);
> this->shader = shader;
> + this->dispatch_width = dispatch_width;
> this->failed = false;
> this->variable_ht = hash_table_ctor(0,
> hash_table_pointer_hash,
> diff --git a/src/mesa/drivers/dri/i965/brw_wm.h
> b/src/mesa/drivers/dri/i965/brw_wm.h
> index b8d8df3..a5ebddd 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm.h
> +++ b/src/mesa/drivers/dri/i965/brw_wm.h
> @@ -93,8 +93,6 @@ struct brw_wm_compile {
> GLuint runtime_check_aads_emit:1;
>
> GLuint last_scratch;
> -
> - GLuint dispatch_width;
> };
>
> bool brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c,
> --
> 1.8.0
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20121126/2be2da88/attachment-0001.html>
More information about the mesa-dev
mailing list