[Mesa-dev] [PATCH 4/4] i965/nir: Implement SIMD16 support in the NIR FS backend.
Jason Ekstrand
jason at jlekstrand.net
Fri Jan 16 06:58:50 PST 2015
On Jan 16, 2015 5:30 AM, "Kenneth Graunke" <kenneth at whitecape.org> wrote:
>
> Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
> ---
> src/mesa/drivers/dri/i965/brw_fs.cpp | 5 +--
> src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 73
+++++++++++++++++++++-----------
> 2 files changed, 50 insertions(+), 28 deletions(-)
>
> That was easy
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp
b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index b7378de..9572f3a 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -1035,8 +1035,8 @@ fs_visitor::virtual_grf_alloc(int size)
> fs_reg
> fs_visitor::vgrf(int num_components)
> {
> - return fs_reg(GRF, virtual_grf_alloc(num_components),
BRW_REGISTER_TYPE_F,
> - dispatch_width);
> + return fs_reg(GRF, virtual_grf_alloc(num_components * (dispatch_width
/ 8)),
> + BRW_REGISTER_TYPE_F, dispatch_width);
> }
For fs_visitor, we use a constructor that takes a fs_visotor and a
glsl_type. I kind of prefer this in fs_visitor over fs_reg. Maybe we
should move the fs_reg(fs_visitor, glsl_type) constructor to be a
vgrf(glsl_type) function? That doesn't have to be part of this though.
>
> /** Fixed HW reg constructor. */
> @@ -3759,7 +3759,6 @@ fs_visitor::run_fs()
> */
> if (shader) {
> if (getenv("INTEL_USE_NIR") != NULL && !brw->use_rep_send) {
You can kill the !brw->use_rep_send now. It was only there to keep us from
GPU-hanging on rep and fast clears due to having no shaders at all. Since
we have SIMD16 in NIR, we don't need it anymore.
> - no16("Cannot do 16-wide in NIR yet");
> emit_nir_code();
> } else {
> foreach_in_list(ir_instruction, ir, shader->base.ir) {
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> index fca03e5..5208985 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> @@ -132,12 +132,14 @@ fs_visitor::emit_nir_code()
> void
> fs_visitor::nir_setup_inputs(nir_shader *shader)
> {
> + int reg_width = dispatch_width / 8;
> +
> fs_reg varying = nir_inputs;
>
> struct hash_entry *entry;
> hash_table_foreach(shader->inputs, entry) {
> nir_variable *var = (nir_variable *) entry->data;
> - varying.reg_offset = var->data.driver_location;
> + varying.reg_offset = var->data.driver_location * reg_width;
>
> fs_reg reg;
> if (!strcmp(var->name, "gl_FragCoord")) {
> @@ -159,13 +161,15 @@ fs_visitor::nir_setup_inputs(nir_shader *shader)
> void
> fs_visitor::nir_setup_outputs(nir_shader *shader)
> {
> + int reg_width = dispatch_width / 8;
> +
> brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
> fs_reg reg = nir_outputs;
>
> struct hash_entry *entry;
> hash_table_foreach(shader->outputs, entry) {
> nir_variable *var = (nir_variable *) entry->data;
> - reg.reg_offset = var->data.driver_location;
> + reg.reg_offset = var->data.driver_location * reg_width;
>
> if (var->data.index > 0) {
> assert(var->data.location == FRAG_RESULT_DATA0);
> @@ -195,7 +199,8 @@ fs_visitor::nir_setup_outputs(nir_shader *shader)
> for (unsigned int i = 0; i < MAX2(1, var->type->length); i++) {
> int output = var->data.location - FRAG_RESULT_DATA0 + i;
> this->outputs[output] = reg;
> - this->outputs[output].reg_offset += vector_elements * i;
> + this->outputs[output].reg_offset +=
> + (vector_elements * i) * reg_width;
Assuming the register width is correct, you can just use reg = offset(reg,
vector_elements *i) here and offset() will deal with the width. Connor
originally wrote the NIR->FS pass while I was in the middle of my SIMD16
stuff so it didn't get incorporated.
I could repeat the above comment through the patch. Also, replacing
everything with offset() can be done as its own patch before flipping the
switch.
> this->output_components[output] = vector_elements;
> }
> }
> @@ -1035,6 +1040,8 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr)
> fs_reg
> fs_visitor::get_nir_src(nir_src src)
> {
> + int reg_width = dispatch_width / 8;
> +
> if (src.is_ssa) {
> assert(src.ssa->parent_instr->type == nir_instr_type_load_const);
> nir_load_const_instr *load =
nir_instr_as_load_const(src.ssa->parent_instr);
> @@ -1057,7 +1064,7 @@ fs_visitor::get_nir_src(nir_src src)
> * this to F if they need to
> */
> reg.type = BRW_REGISTER_TYPE_D;
> - reg.reg_offset = src.reg.base_offset;
> + reg.reg_offset = src.reg.base_offset * reg_width;
> if (src.reg.indirect) {
> reg.reladdr = new(mem_ctx) fs_reg();
> *reg.reladdr = retype(get_nir_src(*src.reg.indirect),
> @@ -1111,13 +1118,15 @@ fs_visitor::get_nir_alu_src(nir_alu_instr *instr,
unsigned src)
> fs_reg
> fs_visitor::get_nir_dest(nir_dest dest)
> {
> + int reg_width = dispatch_width / 8;
> +
> fs_reg reg;
> if (dest.reg.reg->is_global)
> reg = nir_globals[dest.reg.reg->index];
> else
> reg = nir_locals[dest.reg.reg->index];
>
> - reg.reg_offset = dest.reg.base_offset;
> + reg.reg_offset = dest.reg.base_offset * reg_width;
> if (dest.reg.indirect) {
> reg.reladdr = new(mem_ctx) fs_reg();
> *reg.reladdr = retype(get_nir_src(*dest.reg.indirect),
> @@ -1130,15 +1139,17 @@ fs_visitor::get_nir_dest(nir_dest dest)
> void
> fs_visitor::emit_percomp(fs_inst *inst, unsigned wr_mask)
> {
> + int reg_width = dispatch_width / 8;
> +
> for (unsigned i = 0; i < 4; i++) {
> if (!((wr_mask >> i) & 1))
> continue;
>
> fs_inst *new_inst = new(mem_ctx) fs_inst(*inst);
> - new_inst->dst.reg_offset += i;
> + new_inst->dst.reg_offset += i * reg_width;
> for (unsigned j = 0; j < new_inst->sources; j++)
> if (inst->src[j].file == GRF)
> - new_inst->src[j].reg_offset += i;
> + new_inst->src[j].reg_offset += i * reg_width;
>
> emit(new_inst);
> }
> @@ -1150,15 +1161,17 @@ fs_visitor::emit_percomp(enum opcode op, fs_reg
dest, fs_reg src0,
> enum brw_predicate predicate,
> enum brw_conditional_mod mod)
> {
> + int reg_width = dispatch_width / 8;
> +
> for (unsigned i = 0; i < 4; i++) {
> if (!((wr_mask >> i) & 1))
> continue;
>
> fs_inst *new_inst = new(mem_ctx) fs_inst(op, dest, src0);
> - new_inst->dst.reg_offset += i;
> + new_inst->dst.reg_offset += i * reg_width;
> for (unsigned j = 0; j < new_inst->sources; j++)
> if (new_inst->src[j].file == GRF)
> - new_inst->src[j].reg_offset += i;
> + new_inst->src[j].reg_offset += i * reg_width;
>
> new_inst->predicate = predicate;
> new_inst->conditional_mod = mod;
> @@ -1173,15 +1186,17 @@ fs_visitor::emit_percomp(enum opcode op, fs_reg
dest, fs_reg src0, fs_reg src1,
> enum brw_predicate predicate,
> enum brw_conditional_mod mod)
> {
> + int reg_width = dispatch_width / 8;
> +
> for (unsigned i = 0; i < 4; i++) {
> if (!((wr_mask >> i) & 1))
> continue;
>
> fs_inst *new_inst = new(mem_ctx) fs_inst(op, dest, src0, src1);
> - new_inst->dst.reg_offset += i;
> + new_inst->dst.reg_offset += i * reg_width;
> for (unsigned j = 0; j < new_inst->sources; j++)
> if (new_inst->src[j].file == GRF)
> - new_inst->src[j].reg_offset += i;
> + new_inst->src[j].reg_offset += i * reg_width;
>
> new_inst->predicate = predicate;
> new_inst->conditional_mod = mod;
> @@ -1194,15 +1209,17 @@ void
> fs_visitor::emit_math_percomp(enum opcode op, fs_reg dest, fs_reg src0,
> unsigned wr_mask, bool saturate)
> {
> + int reg_width = dispatch_width / 8;
> +
> for (unsigned i = 0; i < 4; i++) {
> if (!((wr_mask >> i) & 1))
> continue;
>
> fs_reg new_dest = dest;
> - new_dest.reg_offset += i;
> + new_dest.reg_offset += i * reg_width;
> fs_reg new_src0 = src0;
> if (src0.file == GRF)
> - new_src0.reg_offset += i;
> + new_src0.reg_offset += i * reg_width;
>
> fs_inst *new_inst = emit_math(op, new_dest, new_src0);
> new_inst->saturate = saturate;
> @@ -1214,18 +1231,20 @@ fs_visitor::emit_math_percomp(enum opcode op,
fs_reg dest, fs_reg src0,
> fs_reg src1, unsigned wr_mask,
> bool saturate)
> {
> + int reg_width = dispatch_width / 8;
> +
> for (unsigned i = 0; i < 4; i++) {
> if (!((wr_mask >> i) & 1))
> continue;
>
> fs_reg new_dest = dest;
> - new_dest.reg_offset += i;
> + new_dest.reg_offset += i * reg_width;
> fs_reg new_src0 = src0;
> if (src0.file == GRF)
> - new_src0.reg_offset += i;
> + new_src0.reg_offset += i * reg_width;
> fs_reg new_src1 = src1;
> if (src1.file == GRF)
> - new_src1.reg_offset += i;
> + new_src1.reg_offset += i * reg_width;
>
> fs_inst *new_inst = emit_math(op, new_dest, new_src0, new_src1);
> new_inst->saturate = saturate;
> @@ -1236,9 +1255,11 @@ void
> fs_visitor::emit_reduction(enum opcode op, fs_reg dest, fs_reg src,
> unsigned num_components)
> {
> + int reg_width = dispatch_width / 8;
> +
> fs_reg src0 = src;
> fs_reg src1 = src;
> - src1.reg_offset++;
> + src1.reg_offset += 1 * reg_width;
>
> if (num_components == 2) {
> emit(op, dest, src0, src1);
> @@ -1250,7 +1271,7 @@ fs_visitor::emit_reduction(enum opcode op, fs_reg
dest, fs_reg src,
> emit(op, temp1, src0, src1);
>
> fs_reg src2 = src;
> - src2.reg_offset += 2;
> + src2.reg_offset += 2 * reg_width;
>
> if (num_components == 3) {
> emit(op, dest, temp1, src2);
> @@ -1260,7 +1281,7 @@ fs_visitor::emit_reduction(enum opcode op, fs_reg
dest, fs_reg src,
> assert(num_components == 4);
>
> fs_reg src3 = src;
> - src3.reg_offset += 3;
> + src3.reg_offset += 3 * reg_width;
> fs_reg temp2 = vgrf(1);
> temp2.type = src.type;
>
> @@ -1271,6 +1292,8 @@ fs_visitor::emit_reduction(enum opcode op, fs_reg
dest, fs_reg src,
> void
> fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
> {
> + int reg_width = dispatch_width / 8;
> +
> fs_reg dest;
> if (nir_intrinsic_infos[instr->intrinsic].has_dest)
> dest = get_nir_dest(instr->dest);
> @@ -1374,7 +1397,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr
*instr)
> index++;
>
> emit(MOV(dest, src));
> - dest.reg_offset++;
> + dest.reg_offset += 1 * reg_width;
> }
> }
> break;
> @@ -1435,7 +1458,7 @@ fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr
*instr)
> assert(packed_consts.subreg_offset < 32);
>
> emit(MOV(dest, packed_consts));
> - dest.reg_offset++;
> + dest.reg_offset += 1 * reg_width;
> }
> }
> break;
> @@ -1448,14 +1471,14 @@
fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
> for (int i = 0; i < instr->const_index[1]; i++) {
> for (unsigned j = 0; j < instr->num_components; j++) {
> fs_reg src = nir_inputs;
> - src.reg_offset = instr->const_index[0] + index;
> + src.reg_offset = (instr->const_index[0] + index) * reg_width;
> if (has_indirect)
> src.reladdr = new(mem_ctx)
fs_reg(get_nir_src(instr->src[0]));
> src.type = dest.type;
> index++;
>
> emit(MOV(dest, src));
> - dest.reg_offset++;
> + dest.reg_offset += 1 * reg_width;
> }
> }
> break;
> @@ -1587,13 +1610,13 @@
fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
> for (int i = 0; i < instr->const_index[1]; i++) {
> for (unsigned j = 0; j < instr->num_components; j++) {
> fs_reg new_dest = nir_outputs;
> - new_dest.reg_offset = instr->const_index[0] + index;
> + new_dest.reg_offset = (instr->const_index[0] + index) *
reg_width;
> if (has_indirect)
> src.reladdr = new(mem_ctx)
fs_reg(get_nir_src(instr->src[1]));
> new_dest.type = src.type;
> index++;
> emit(MOV(new_dest, src));
> - src.reg_offset++;
> + src.reg_offset += 1 * reg_width;
> }
> }
> break;
> --
> 2.2.2
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20150116/9a421333/attachment-0001.html>
More information about the mesa-dev
mailing list