[Mesa-dev] [WIP 09/25] i965/fs: Generator support for converting double to float

Thu Oct 16 05:24:21 PDT 2014

Signed-off-by: Topi Pohjolainen <topi.pohjolainen at intel.com>
---
 src/mesa/drivers/dri/i965/brw_defines.h        |  2 ++
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 45 ++++++++++++++++++++++++++
 2 files changed, 47 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index ab45d3d..4a173db 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -926,6 +926,8 @@ enum opcode {
    FS_OPCODE_INTERPOLATE_AT_SAMPLE,
    FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET,
    FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET,
+   FS_OPCODE_D2F_CONVERT,
+   FS_OPCODE_D2F_MOV_LOW_32BITS,
 
    VS_OPCODE_URB_WRITE,
    VS_OPCODE_PULL_CONSTANT_LOAD,
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index c2010c0..21c9660 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -1968,6 +1968,51 @@ fs_generator::generate_code(const cfg_t *cfg)
                                            GEN7_PIXEL_INTERPOLATOR_LOC_PER_SLOT_OFFSET);
          break;
 
+      case FS_OPCODE_D2F_CONVERT:
+         /*
+          * Conversion from double to single precision writes 64 bits per
+          * element leaving the upper 32 bits undefined. Hence there has to be
+          * first a separate conversion followed by copy from the lower 32-bits
+          * to the final destination.
+          * In addition, hardware can write only half the amount of channels
+          * whenever double precision operands are involved. Therefore we
+          * need wide enough intermediate, fill it with two separate
+          * converting moves and emit a single instruction copying every second
+          * 32-bit doubleword.
+          */
+         assert(brw->gen >= 7);
+         assert(inst->sources == 1);
+         assert(dst.type == BRW_REGISTER_TYPE_DF);
+         assert(src[0].type == BRW_REGISTER_TYPE_DF);
+
+         /* Now tell the hardware a conversion is needed - treat the
+          * destination as two single precision floats. Convert exec_width / 2
+          * values with two moves each writing exec_width / 2 many 64-bit
+          * channels.
+          */
+         dst.type = BRW_REGISTER_TYPE_F;
+         brw_MOV(p, dst, src[0]);
+         dst.nr += (dispatch_width / 8);
+         if (src[0].file == GRF && src[0].width > 1)
+            src[0].nr += (dispatch_width / 8);
+         brw_MOV(p, dst, src[0]);
+         break;
+
+      case FS_OPCODE_D2F_MOV_LOW_32BITS:
+         assert(brw->gen >= 7);
+         assert(inst->sources == 1);
+         assert(dst.type == BRW_REGISTER_TYPE_F);
+         assert(src[0].type == BRW_REGISTER_TYPE_DF);
+         /* Tell the hardware that there are in fact single precision floats
+          * but that each occupies 64-bits.
+          */
+         src[0].type = BRW_REGISTER_TYPE_F;
+         src[0].width = BRW_WIDTH_4;
+         src[0].hstride = BRW_HORIZONTAL_STRIDE_2;
+         src[0].vstride = BRW_VERTICAL_STRIDE_8;
+         brw_MOV(p, dst, src[0]);
+         break;
+
       default:
 	 if (inst->opcode < (int) ARRAY_SIZE(opcode_descs)) {
 	    _mesa_problem(ctx, "Unsupported opcode `%s' in FS",
-- 
1.8.3.1