[Mesa-dev] [PATCH 22/31] i965/fs: Limit SIMD width of various virtual opcodes to the maximum supported value.

Francisco Jerez currojerez at riseup.net
Sat May 21 05:47:57 UTC 2016


Which is 16 or 8 in most cases.  This will make sure that 32-wide
virtual instructions get chopped up into chunks of their maximum
execution size.
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 45 ++++++++++++++++++++++++++++++++----
 1 file changed, 40 insertions(+), 5 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 7fe507e..9ab9d01 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -4824,6 +4824,22 @@ get_lowered_simd_width(const struct brw_device_info *devinfo,
       /* Integer division is limited to SIMD8 on all generations. */
       return MIN2(8, inst->exec_size);
 
+   case FS_OPCODE_LINTERP:
+   case FS_OPCODE_GET_BUFFER_SIZE:
+   case FS_OPCODE_DDX_COARSE:
+   case FS_OPCODE_DDX_FINE:
+   case FS_OPCODE_DDY_COARSE:
+   case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
+   case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7:
+   case FS_OPCODE_PACK_HALF_2x16_SPLIT:
+   case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X:
+   case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y:
+   case FS_OPCODE_INTERPOLATE_AT_CENTROID:
+   case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
+   case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
+   case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
+      return MIN2(16, inst->exec_size);
+
    case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL:
       /* Pre-ILK hardware doesn't have a SIMD8 variant of the texel fetch
        * message used to implement varying pull constant loads, so expand it
@@ -4878,8 +4894,14 @@ get_lowered_simd_width(const struct brw_device_info *devinfo,
              inst->exec_size == 8);
       /* Dual-source FB writes are unsupported in SIMD16 mode. */
       return (inst->src[FB_WRITE_LOGICAL_SRC_COLOR1].file != BAD_FILE ?
-              8 : inst->exec_size);
+              8 : MIN2(16, inst->exec_size));
 
+   case SHADER_OPCODE_TEX_LOGICAL:
+   case SHADER_OPCODE_TXF_CMS_LOGICAL:
+   case SHADER_OPCODE_TXF_UMS_LOGICAL:
+   case SHADER_OPCODE_TXF_MCS_LOGICAL:
+   case SHADER_OPCODE_LOD_LOGICAL:
+   case SHADER_OPCODE_TG4_LOGICAL:
    case SHADER_OPCODE_SAMPLEINFO_LOGICAL:
       return MIN2(16, inst->exec_size);
 
@@ -4890,7 +4912,7 @@ get_lowered_simd_width(const struct brw_device_info *devinfo,
    case SHADER_OPCODE_TG4_OFFSET_LOGICAL: {
       /* gather4_po_c is unsupported in SIMD16 mode. */
       const fs_reg &shadow_c = inst->src[TEX_LOGICAL_SRC_SHADOW_C];
-      return (shadow_c.file != BAD_FILE ? 8 : inst->exec_size);
+      return (shadow_c.file != BAD_FILE ? 8 : MIN2(16, inst->exec_size));
    }
    case SHADER_OPCODE_TXL_LOGICAL:
    case FS_OPCODE_TXB_LOGICAL: {
@@ -4904,7 +4926,7 @@ get_lowered_simd_width(const struct brw_device_info *devinfo,
       else if (devinfo->gen < 7 && shadow_c.file != BAD_FILE)
          return 8;
       else
-         return inst->exec_size;
+         return MIN2(16, inst->exec_size);
    }
    case SHADER_OPCODE_TXF_LOGICAL:
    case SHADER_OPCODE_TXS_LOGICAL:
@@ -4914,7 +4936,7 @@ get_lowered_simd_width(const struct brw_device_info *devinfo,
       if (devinfo->gen == 4)
          return 16;
       else
-         return inst->exec_size;
+         return MIN2(16, inst->exec_size);
 
    case SHADER_OPCODE_TXF_CMS_W_LOGICAL: {
       /* This opcode can take up to 6 arguments which means that in some
@@ -4929,7 +4951,7 @@ get_lowered_simd_width(const struct brw_device_info *devinfo,
       if ((coord_components + 3) * 2 > MAX_SAMPLER_MESSAGE_SIZE)
          return 8;
       else
-         return inst->exec_size;
+         return MIN2(16, inst->exec_size);
    }
 
    case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
@@ -4937,6 +4959,19 @@ get_lowered_simd_width(const struct brw_device_info *devinfo,
    case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
       return 8;
 
+   case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
+   case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
+   case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
+      return MIN2(16, inst->exec_size);
+
+   case SHADER_OPCODE_URB_READ_SIMD8:
+   case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT:
+   case SHADER_OPCODE_URB_WRITE_SIMD8:
+   case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT:
+   case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED:
+   case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
+      return MIN2(8, inst->exec_size);
+
    case SHADER_OPCODE_MOV_INDIRECT:
       /* Prior to Broadwell, we only have 8 address subregisters */
       return MIN3(devinfo->gen >= 8 ? 16 : 8,
-- 
2.7.3



More information about the mesa-dev mailing list