[Mesa-dev] [WIP 21/25] i965/fs/gen7: Add generator support for loading double precision uniforms

Topi Pohjolainen topi.pohjolainen at intel.com
Thu Oct 16 05:24:33 PDT 2014


Signed-off-by: Topi Pohjolainen <topi.pohjolainen at intel.com>
---
 src/mesa/drivers/dri/i965/brw_defines.h        |  1 +
 src/mesa/drivers/dri/i965/brw_fs.h             |  3 +++
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 32 ++++++++++++++++++++++++++
 3 files changed, 36 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 88097b7..186d09a 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -913,6 +913,7 @@ enum opcode {
    FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7,
    FS_OPCODE_VARYING_PULL_CONSTANT_LOAD,
    FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7,
+   FS_OPCODE_UNIFORM_DOUBLE_LOAD,
    FS_OPCODE_MOV_DISPATCH_TO_FLAGS,
    FS_OPCODE_DISCARD_JUMP,
    FS_OPCODE_SET_OMASK,
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 0f47464..c51aae4 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -735,6 +735,9 @@ private:
                                                  struct brw_reg dst,
                                                  struct brw_reg index,
                                                  struct brw_reg offset);
+   void generate_uniform_double_float_load(const fs_inst *inst,
+                                           struct brw_reg dst,
+                                           struct brw_reg src);
    void generate_mov_dispatch_to_flags(fs_inst *inst);
 
    void generate_pixel_interpolator_query(fs_inst *inst,
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 4bcb074..39dc563 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -1508,6 +1508,34 @@ fs_generator::generate_pack_double_2x32(fs_inst *inst,
 }
 
 void
+fs_generator::generate_uniform_double_float_load(const fs_inst *inst,
+                                                 struct brw_reg dst,
+                                                 struct brw_reg src)
+{
+   assert(p->brw->gen == 7);
+
+   dst.type = BRW_REGISTER_TYPE_UD;
+   dst.width = BRW_WIDTH_8;
+   dst.hstride = BRW_HORIZONTAL_STRIDE_1;
+   dst.vstride = BRW_VERTICAL_STRIDE_8;
+
+   /* Treat the source as packed pair of 32-bit elements. */
+   src.type = BRW_REGISTER_TYPE_UD;
+   src.width = BRW_WIDTH_2;
+   src.hstride = BRW_HORIZONTAL_STRIDE_1;
+   src.vstride = BRW_VERTICAL_STRIDE_0;
+
+   /* Issue two instructions, one move copies only execution width many single
+    * precision elements. In other words, one instruction writes only
+    * (execution width / 2) many double precision channels - therefore two
+    * are needed to write all the channels.
+    */
+   brw_copy_double_float_scalar(p, dst, src);
+   dst.nr += (inst->exec_size / 8);
+   brw_copy_double_float_scalar(p, dst, src);
+}
+
+void
 fs_generator::generate_unpack_half_2x16_split(fs_inst *inst,
                                               struct brw_reg dst,
                                               struct brw_reg src)
@@ -1982,6 +2010,10 @@ fs_generator::generate_code(const cfg_t *cfg)
 	 generate_varying_pull_constant_load_gen7(inst, dst, src[0], src[1]);
 	 break;
 
+      case FS_OPCODE_UNIFORM_DOUBLE_LOAD:
+	 generate_uniform_double_float_load(inst, dst, src[0]);
+	 break;
+
       case FS_OPCODE_REP_FB_WRITE:
       case FS_OPCODE_FB_WRITE:
 	 generate_fb_write(inst, src[0]);
-- 
1.8.3.1



More information about the mesa-dev mailing list