[Mesa-dev] [PATCH v2 082/103] i965/vec4: make emit_pull_constant_load support 64-bit loads

Iago Toral Quiroga itoral at igalia.com
Tue Oct 11 09:02:26 UTC 2016


This way callers don't need to know about 64-bit particularities and
we reuse some code.
---
 src/mesa/drivers/dri/i965/brw_vec4.cpp         | 22 ++-----
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 81 ++++++++++++++------------
 2 files changed, 50 insertions(+), 53 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index b0bc2d5..e732bf4 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -884,24 +884,12 @@ vec4_visitor::move_push_constants_to_pull_constants()
 
          int uniform = inst->src[i].nr;
 
-         dst_reg temp;
-         if (type_sz(inst->src[i].type) != 8) {
-            temp = dst_reg(this, glsl_type::vec4_type);
-            emit_pull_constant_load(block, inst, temp, inst->src[i],
-                                    pull_constant_loc[uniform], src_reg());
-         } else {
-            dst_reg shuffled = dst_reg(this, glsl_type::dvec4_type);
-            dst_reg shuffled_float = retype(shuffled, BRW_REGISTER_TYPE_F);
-
-            emit_pull_constant_load(block, inst, shuffled_float, inst->src[i],
-                                    pull_constant_loc[uniform], src_reg());
-            emit_pull_constant_load(block, inst, offset(shuffled_float, 1),
-                                    offset(inst->src[i], 1),
-                                    pull_constant_loc[uniform], src_reg());
+         const glsl_type *temp_type = type_sz(inst->src[i].type) == 8 ?
+            glsl_type::dvec4_type : glsl_type::vec4_type;
+         dst_reg temp = dst_reg(this, temp_type);
 
-            temp = dst_reg(this, glsl_type::dvec4_type);
-            shuffle_64bit_data(temp, src_reg(shuffled), false, block, inst);
-         }
+         emit_pull_constant_load(block, inst, temp, inst->src[i],
+                                 pull_constant_loc[uniform], src_reg());
 
          inst->src[i].file = temp.file;
          inst->src[i].nr = temp.nr;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index f12a114..0177f68 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1718,33 +1718,57 @@ vec4_visitor::move_grf_array_access_to_scratch()
  */
 void
 vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
-				      dst_reg temp, src_reg orig_src,
+                                      dst_reg temp, src_reg orig_src,
                                       int base_offset, src_reg indirect)
 {
    assert(orig_src.offset % 16 == 0);
-   int reg_offset = base_offset + orig_src.offset / 16;
    const unsigned index = prog_data->base.binding_table.pull_constants_start;
 
-   src_reg offset;
-   if (indirect.file != BAD_FILE) {
-      offset = src_reg(this, glsl_type::uint_type);
-
-      emit_before(block, inst, ADD(dst_reg(offset), indirect,
-                                   brw_imm_ud(reg_offset * 16)));
-   } else if (devinfo->gen >= 8) {
-      /* Store the offset in a GRF so we can send-from-GRF. */
-      offset = src_reg(this, glsl_type::uint_type);
-      emit_before(block, inst, MOV(dst_reg(offset), brw_imm_ud(reg_offset * 16)));
-   } else {
-      offset = brw_imm_d(reg_offset * 16);
+   /* For 64bit loads we need to emit two 32-bit load messages and we also
+    * we need to shuffle the 32-bit data result into proper 64-bit data. To do
+    * that we emit the 32-bit loads into a temporary and we shuffle the result
+    * into the original destination.
+    */
+   dst_reg orig_temp = temp;
+   bool is_64bit = type_sz(orig_src.type) == 8;
+   if (is_64bit) {
+      assert(type_sz(temp.type) == 8);
+      dst_reg temp_df = dst_reg(this, glsl_type::dvec4_type);
+      temp = retype(temp_df, BRW_REGISTER_TYPE_F);
    }
 
-   emit_pull_constant_load_reg(temp,
-                               brw_imm_ud(index),
-                               offset,
-                               block, inst);
+   src_reg src = orig_src;
+   for (int i = 0; i < (is_64bit ? 2 : 1); i++) {
+      int reg_offset = base_offset + src.offset / 16;
+
+      src_reg byte_offset;
+      if (indirect.file != BAD_FILE) {
+         byte_offset = src_reg(this, glsl_type::uint_type);
+         emit_before(block, inst, ADD(dst_reg(byte_offset), indirect,
+                                      brw_imm_ud(reg_offset * 16)));
+      } else if (devinfo->gen >= 8) {
+         /* Store the offset in a GRF so we can send-from-GRF. */
+         byte_offset = src_reg(this, glsl_type::uint_type);
+         emit_before(block, inst, MOV(dst_reg(byte_offset),
+                                      brw_imm_ud(reg_offset * 16)));
+      } else {
+         byte_offset = brw_imm_d(reg_offset * 16);
+      }
+
+      emit_pull_constant_load_reg(offset(temp, i),
+                                  brw_imm_ud(index),
+                                  byte_offset,
+                                  block, inst);
 
-   brw_mark_surface_used(&prog_data->base, index);
+      brw_mark_surface_used(&prog_data->base, index);
+
+      src = offset(src, 1);
+   }
+
+   if (is_64bit) {
+      temp = retype(temp, BRW_REGISTER_TYPE_DF);
+      shuffle_64bit_data(orig_temp, src_reg(temp), false, block, inst);
+   }
 }
 
 /**
@@ -1817,23 +1841,8 @@ vec4_visitor::move_uniform_array_access_to_pull_constants()
 
       assert(inst->src[0].swizzle == BRW_SWIZZLE_NOOP);
 
-      if (type_sz(inst->src[0].type) != 8) {
-         emit_pull_constant_load(block, inst, inst->dst, inst->src[0],
-                                 pull_constant_loc[uniform_nr], inst->src[1]);
-      } else {
-         dst_reg shuffled = dst_reg(this, glsl_type::dvec4_type);
-         dst_reg shuffled_float = retype(shuffled, BRW_REGISTER_TYPE_F);
-
-         emit_pull_constant_load(block, inst, shuffled_float, inst->src[0],
-                                 pull_constant_loc[uniform_nr], inst->src[1]);
-         emit_pull_constant_load(block, inst, offset(shuffled_float, 1),
-                                 offset(inst->src[0], 1),
-                                 pull_constant_loc[uniform_nr], inst->src[1]);
-
-         shuffle_64bit_data(retype(inst->dst, BRW_REGISTER_TYPE_DF),
-                            src_reg(shuffled), false, block, inst);
-      }
-
+      emit_pull_constant_load(block, inst, inst->dst, inst->src[0],
+                              pull_constant_loc[uniform_nr], inst->src[1]);
       inst->remove(block);
    }
 
-- 
2.7.4



More information about the mesa-dev mailing list