[Mesa-dev] [PATCH 44/47] (0042) i965: State setup for SIMD32 fragment shaders.

Shaofeng Tang shaofeng.tang at intel.com
Mon May 21 03:30:18 UTC 2018


From: Kevin Rogovin <kevin.rogovin at intel.com>

Change-Id: I9d8b1758ec4f02c86a7982c518c01a0d17fa3c62
---
 Notes.txt                                     | 26 +++++++++++++++++++
 src/mesa/drivers/dri/i965/genX_state_upload.c | 37 +++++++++++++++++++++++++++
 2 files changed, 63 insertions(+)

diff --git a/Notes.txt b/Notes.txt
index e1ad8dc..dcb394f 100644
--- a/Notes.txt
+++ b/Notes.txt
@@ -11,3 +11,29 @@ Patch 0018 i965/fs: Rework FB write header setup for SIMD32 and better schedulin
 
 Patch 0032 965/fs: Implement 32-wide FS payload setup on Gen6+.
    Needed to change nir->info->XXX to nir->info.XXX in fs_vistor::setup_fs_payload_gen6().
+
+
+Patch 0042 State Setup for SIMD32 fragmetn shaders
+    This patch had to be completely rewritten since the state uploads
+    were all moved to a common file genX_state_upload.c.
+
+    Additional pain is required because the meaning of the kernel
+    fields is NOT what one would expect when 32-wide enabled is
+    activated.
+
+   /* GEN is amusing at times, depending on what dispatches are enabled,
+    * which kernel is used for different dispatch modes changes.
+    *
+    * | 8-enabled | 16-enabled | 32-enabled | 8-shader | 16-shader | 32-shader |
+    * |  TRUE     |  FALSE     |  FALSE     | Kerenl0  |           |           |
+    * |  TRUE     |  TRUE      |  FALSE     | Kerenl0  | Kerenl2   |           |
+    * |  TRUE     |  TRUE      |  TRUE      | Kernel0  | Kerenl2   | Kernel1   |
+    * |  FALSE    |  TRUE      |  FALSE     |          | Kernal0   |           |
+    * |  FALSE    |  FALSE     |  TRUE      |          |           | Kernel0   |
+    * |  FALSE    |  TRUE      |  TRUE      |          | Kernel2   | Kernel1   |
+    *
+    * Atleast from the table, we can get a simple set or rules:
+    *  - 8-wide, if it is enabled, it is alway at Kernel0
+    *  - if N-wide is the only one enabled, then it is at Kernel0
+    *  - if there are atleast 2-enables, then 16-wide is at 2 and 32-wide is at 1.
+    */
diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c
index 2ad00af..188b49c 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -1898,6 +1898,7 @@ genX(upload_wm)(struct brw_context *brw)
       wm.MaximumNumberofThreads = devinfo->max_wm_threads - 1;
       wm._8PixelDispatchEnable = wm_prog_data->dispatch_8;
       wm._16PixelDispatchEnable = wm_prog_data->dispatch_16;
+      wm._32PixelDispatchEnable = wm_prog_data->dispatch_32;
       wm.DispatchGRFStartRegisterForConstantSetupData0 =
          wm_prog_data->base.dispatch_grf_start_reg;
       if (GEN_GEN == 6 ||
@@ -1910,6 +1911,10 @@ genX(upload_wm)(struct brw_context *brw)
          wm.KernelStartPointer2 =
             KSP(brw, stage_state->prog_offset + wm_prog_data->prog_offset_2);
       }
+      if (GEN_GEN == 6 || wm_prog_data->prog_offset_1) {
+         wm.KernelStartPointer1 =
+            KSP(brw, stage_state->prog_offset + wm_prog_data->prog_offset_1);
+      }
 #endif
 
 #if GEN_GEN == 6
@@ -1936,6 +1941,9 @@ genX(upload_wm)(struct brw_context *brw)
       else
          wm.PositionXYOffsetSelect = POSOFFSET_NONE;
 
+      wm.DispatchGRFStartRegisterForConstantSetupData1 =
+         wm_prog_data->dispatch_grf_start_reg_1;
+
       wm.DispatchGRFStartRegisterForConstantSetupData2 =
          wm_prog_data->dispatch_grf_start_reg_2;
 #endif
@@ -3937,12 +3945,41 @@ genX(upload_ps)(struct brw_context *brw)
 
       ps._8PixelDispatchEnable = prog_data->dispatch_8;
       ps._16PixelDispatchEnable = prog_data->dispatch_16;
+      ps._32PixelDispatchEnable = prog_data->dispatch_32;
+
+#if GEN_GEN >= 9
+      if (ps._32PixelDispatchEnable &&
+          brw->num_samples == 16 && !prog_data->persample_dispatch) {
+         /* From the SKL+ 3DSTATE_PS hardware docs:
+          * "When NUM_MULTISAMPLES = 16 or FORCE_SAMPLE_COUNT = 16, SIMD32
+          *  Dispatch must not be enabled for PER_PIXEL dispatch mode."
+          *
+          * But disabling 32-wide dispatch at this point would cause the
+          * ordering of KSP offsets to change unless the two other dispatch
+          * modes are enabled.
+          *
+          * Currently, the Intel GEN shader compiler will not emit SIMD32
+          * if either of SIMD8 or SIMD16 is not emitted; however if we
+          * ever get to a situation where that assumption no longer holds
+          * here is the big red WARNING of doom.
+          *
+          * XXX - Use a saner representation of brw_wm_prog_data so we can do
+          *       the right thing if the assertion below doesn't hold.
+          */
+         assert(prog_data->dispatch_8 && prog_data->dispatch_16);
+         ps._32PixelDispatchEnable = false;
+      }
+#endif
       ps.DispatchGRFStartRegisterForConstantSetupData0 =
          prog_data->base.dispatch_grf_start_reg;
+      ps.DispatchGRFStartRegisterForConstantSetupData1 =
+         prog_data->dispatch_grf_start_reg_1;
       ps.DispatchGRFStartRegisterForConstantSetupData2 =
          prog_data->dispatch_grf_start_reg_2;
 
       ps.KernelStartPointer0 = stage_state->prog_offset;
+      ps.KernelStartPointer1 = stage_state->prog_offset +
+         prog_data->prog_offset_1;
       ps.KernelStartPointer2 = stage_state->prog_offset +
          prog_data->prog_offset_2;
 
-- 
2.7.4



More information about the mesa-dev mailing list