[Mesa-dev] [PATCH v4 (part2) 15/59] i965/vec4: Implement unsized array's length calculation

Iago Toral Quiroga itoral at igalia.com
Wed Aug 5 01:30:12 PDT 2015


From: Samuel Iglesias Gonsalvez <siglesias at igalia.com>

Notice that Skylake needs to include a header in the sampler message
so it will need some tweaks to work there.

Signed-off-by: Samuel Iglesias Gonsalvez <siglesias at igalia.com>
---
 src/mesa/drivers/dri/i965/brw_defines.h          |  3 ++
 src/mesa/drivers/dri/i965/brw_shader.cpp         |  3 ++
 src/mesa/drivers/dri/i965/brw_vec4.cpp           |  1 +
 src/mesa/drivers/dri/i965/brw_vec4.h             |  6 ++++
 src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 31 ++++++++++++++++
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp   | 46 ++++++++++++++++++++++--
 6 files changed, 88 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index f595366..3148849 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1050,6 +1050,9 @@ enum opcode {
    VS_OPCODE_PULL_CONSTANT_LOAD,
    VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
    VS_OPCODE_SET_SIMD4X2_HEADER_GEN9,
+
+   VS_OPCODE_UNSIZED_ARRAY_LENGTH,
+
    VS_OPCODE_UNPACK_FLAGS_SIMD4X2,
 
    /**
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 819e4f2..7890835 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -743,6 +743,9 @@ brw_instruction_name(enum opcode op)
    case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9:
       return "set_simd4x2_header_gen9";
 
+   case VS_OPCODE_UNSIZED_ARRAY_LENGTH:
+      return "vs_unsized_array_length";
+
    case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
       return "unpack_flags_simd4x2";
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index f18915a..7c9f362 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -328,6 +328,7 @@ vec4_visitor::implied_mrf_writes(vec4_instruction *inst)
    case SHADER_OPCODE_TXS:
    case SHADER_OPCODE_TG4:
    case SHADER_OPCODE_TG4_OFFSET:
+   case VS_OPCODE_UNSIZED_ARRAY_LENGTH:
       return inst->header_size;
    default:
       unreachable("not reached");
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 985886d..3f73beb 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -566,6 +566,12 @@ private:
                                          struct brw_reg offset);
    void generate_set_simd4x2_header_gen9(vec4_instruction *inst,
                                          struct brw_reg dst);
+
+   void generate_unsized_array_length(vec4_instruction *inst,
+                                      struct brw_reg dst,
+                                      struct brw_reg src,
+                                      struct brw_reg index);
+
    void generate_unpack_flags(struct brw_reg dst);
 
    const struct brw_compiler *compiler;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index 92050b9..1472f75 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -1029,6 +1029,32 @@ vec4_generator::generate_pull_constant_load(vec4_instruction *inst,
 }
 
 void
+vec4_generator::generate_unsized_array_length(vec4_instruction *inst,
+                                                 struct brw_reg dst,
+                                                 struct brw_reg src,
+                                                 struct brw_reg surf_index)
+{
+   assert(devinfo->gen >= 7);
+   assert(surf_index.type == BRW_REGISTER_TYPE_UD &&
+          surf_index.file == BRW_IMMEDIATE_VALUE);
+
+   brw_SAMPLE(p,
+              dst,
+              inst->base_mrf,
+              src,
+              surf_index.dw1.ud,
+              0,
+              GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO,
+              1, /* response length */
+              inst->mlen,
+              inst->header_size > 0,
+              BRW_SAMPLER_SIMD_MODE_SIMD4X2,
+              BRW_SAMPLER_RETURN_FORMAT_SINT32);
+
+   brw_mark_surface_used(&prog_data->base, surf_index.dw1.ud);
+}
+
+void
 vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst,
                                                  struct brw_reg dst,
                                                  struct brw_reg surf_index,
@@ -1401,6 +1427,11 @@ vec4_generator::generate_code(const cfg_t *cfg)
          generate_set_simd4x2_header_gen9(inst, dst);
          break;
 
+
+      case VS_OPCODE_UNSIZED_ARRAY_LENGTH:
+         generate_unsized_array_length(inst, dst, src[0], src[1]);
+         break;
+
       case GS_OPCODE_URB_WRITE:
          generate_gs_urb_write(inst);
          break;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 1bf2e1b..c7a0d4a 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1940,9 +1940,51 @@ vec4_visitor::visit(ir_expression *ir)
       emit(BFE(result_dst, op[2], op[1], op[0]));
       break;
 
-   case ir_triop_ssbo_unsized_array_length:
-      unreachable("not reached: not implemented");
+   case ir_triop_ssbo_unsized_array_length: {
+      ir_constant *const_uniform_block = ir->operands[0]->as_constant();
+      unsigned ubo_index = const_uniform_block->value.u[0];
+      ir_constant *const_offset_ir = ir->operands[1]->as_constant();
+      int const_offset = const_offset_ir ? const_offset_ir->value.u[0] : 0;
+      ir_constant *const_stride_ir = ir->operands[2]->as_constant();
+      int unsized_array_stride = const_stride_ir ? const_stride_ir->value.u[0] : 1;
+
+      assert(shader->base.UniformBlocks[ubo_index].IsShaderStorage);
+
+      src_reg surf_index = src_reg(prog_data->base.binding_table.ubo_start +
+                                   ubo_index);
+
+      dst_reg buffer_size = dst_reg(this, ir->type);
+
+      vec4_instruction *inst = new(mem_ctx) vec4_instruction(
+         VS_OPCODE_UNSIZED_ARRAY_LENGTH, buffer_size);
+
+      inst->base_mrf = 2;
+      inst->mlen = 1; /* always at least one */
+      inst->src[1] = src_reg(surf_index);
+
+      /* MRF for the first parameter */
+      src_reg lod = src_reg(0);
+      int param_base = inst->base_mrf;
+      int writemask = WRITEMASK_X;
+      emit(MOV(dst_reg(MRF, param_base, glsl_type::int_type, writemask), lod));
+
+      emit(inst);
+
+      /* array.length() =
+          max((buffer_object_size - offset_of_array) / stride_of_array, 0) */
+      emit(ADD(buffer_size, src_reg(buffer_size), brw_imm_d(-const_offset)));
+
+      assert(unsized_array_stride > 0);
+
+      src_reg stride = src_reg(unsized_array_stride);
+      dst_reg temp = dst_reg(this, glsl_type::int_type);
+      emit_math(SHADER_OPCODE_INT_QUOTIENT,
+                temp,
+                src_reg(buffer_size),
+                stride);
+      emit_minmax(BRW_CONDITIONAL_GE, result_dst, src_reg(temp), brw_imm_d(0));
       break;
+   }
 
    case ir_triop_vector_insert:
       unreachable("should have been lowered by lower_vector_insert");
-- 
1.9.1



More information about the mesa-dev mailing list