[Mesa-dev] [PATCH 3/3] i965: Port Gen4-5 VS_STATE to genxml.

Kenneth Graunke kenneth at whitecape.org
Wed May 10 19:41:39 UTC 2017


It's actually not that much code.
---
 src/intel/genxml/gen4.xml                     |   2 +-
 src/intel/genxml/gen45.xml                    |   2 +-
 src/intel/genxml/gen5.xml                     |   2 +-
 src/mesa/drivers/dri/i965/Makefile.sources    |   1 -
 src/mesa/drivers/dri/i965/brw_vs_state.c      | 192 --------------------------
 src/mesa/drivers/dri/i965/genX_state_upload.c |  74 +++++++++-
 6 files changed, 71 insertions(+), 202 deletions(-)
 delete mode 100644 src/mesa/drivers/dri/i965/brw_vs_state.c

diff --git a/src/intel/genxml/gen4.xml b/src/intel/genxml/gen4.xml
index f4d58879bcc..da34d6ca19b 100644
--- a/src/intel/genxml/gen4.xml
+++ b/src/intel/genxml/gen4.xml
@@ -750,13 +750,13 @@
     <field name="URB Entry Allocation Size" start="147" end="151" type="uint"/>
     <field name="Number of URB Entries" start="139" end="146" type="uint"/>
     <field name="Statistics Enable" start="138" end="138" type="bool"/>
     <field name="Sampler State Offset" start="165" end="191" type="address"/>
     <field name="Sampler Count" start="160" end="162" type="uint"/>
     <field name="Vertex Cache Disable" start="193" end="193" type="bool"/>
-    <field name="Function Enable" start="192" end="192" type="bool"/>
+    <field name="Enable" start="192" end="192" type="bool"/>
   </struct>
 
   <struct name="WM_STATE" length="8">
     <field name="Kernel Start Pointer" start="6" end="31" type="address"/>
     <field name="GRF Register Count" start="1" end="3" type="uint"/>
     <field name="Single Program Flow" start="63" end="63" type="bool"/>
diff --git a/src/intel/genxml/gen45.xml b/src/intel/genxml/gen45.xml
index 4556322fee5..0e95cf27e89 100644
--- a/src/intel/genxml/gen45.xml
+++ b/src/intel/genxml/gen45.xml
@@ -701,13 +701,13 @@
     <field name="URB Entry Allocation Size" start="147" end="151" type="uint"/>
     <field name="Number of URB Entries" start="139" end="146" type="uint"/>
     <field name="Statistics Enable" start="138" end="138" type="bool"/>
     <field name="Sampler State Offset" start="165" end="191" type="address"/>
     <field name="Sampler Count" start="160" end="162" type="uint"/>
     <field name="Vertex Cache Disable" start="193" end="193" type="bool"/>
-    <field name="Function Enable" start="192" end="192" type="bool"/>
+    <field name="Enable" start="192" end="192" type="bool"/>
   </struct>
 
   <struct name="WM_STATE" length="8">
     <field name="Kernel Start Pointer" start="6" end="31" type="address"/>
     <field name="GRF Register Count" start="1" end="3" type="uint"/>
     <field name="Single Program Flow" start="63" end="63" type="bool"/>
diff --git a/src/intel/genxml/gen5.xml b/src/intel/genxml/gen5.xml
index 3d80de9cf1e..c9a18dd18e3 100644
--- a/src/intel/genxml/gen5.xml
+++ b/src/intel/genxml/gen5.xml
@@ -857,13 +857,13 @@
     <field name="URB Entry Allocation Size" start="147" end="151" type="uint"/>
     <field name="Number of URB Entries" start="139" end="146" type="uint"/>
     <field name="Statistics Enable" start="138" end="138" type="bool"/>
     <field name="Sampler State Offset" start="165" end="191" type="address"/>
     <field name="Sampler Count" start="160" end="162" type="uint"/>
     <field name="Vertex Cache Disable" start="193" end="193" type="bool"/>
-    <field name="Function Enable" start="192" end="192" type="bool"/>
+    <field name="Enable" start="192" end="192" type="bool"/>
   </struct>
 
   <struct name="WM_STATE" length="11">
     <field name="Kernel Start Pointer" start="6" end="31" type="offset"/>
     <field name="GRF Register Count" start="1" end="3" type="uint"/>
     <field name="Single Program Flow" start="63" end="63" type="bool"/>
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index 9e567cbc908..349777cf24a 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -67,13 +67,12 @@ i965_FILES = \
 	brw_tex_layout.c \
 	brw_urb.c \
 	brw_util.c \
 	brw_util.h \
 	brw_vs.c \
 	brw_vs.h \
-	brw_vs_state.c \
 	brw_vs_surface_state.c \
 	brw_wm.c \
 	brw_wm.h \
 	brw_wm_state.c \
 	brw_wm_surface_state.c \
 	gen6_clip_state.c \
diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c
deleted file mode 100644
index fafe305f4f7..00000000000
--- a/src/mesa/drivers/dri/i965/brw_vs_state.c
+++ /dev/null
@@ -1,192 +0,0 @@
-/*
- Copyright (C) Intel Corp.  2006.  All Rights Reserved.
- Intel funded Tungsten Graphics to
- develop this 3D driver.
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice (including the
- next paragraph) shall be included in all copies or substantial
- portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- **********************************************************************/
- /*
-  * Authors:
-  *   Keith Whitwell <keithw at vmware.com>
-  */
-
-
-
-#include "intel_batchbuffer.h"
-#include "brw_context.h"
-#include "brw_state.h"
-#include "brw_defines.h"
-#include "main/macros.h"
-
-static void
-brw_upload_vs_unit(struct brw_context *brw)
-{
-   const struct gen_device_info *devinfo = &brw->screen->devinfo;
-   struct brw_stage_state *stage_state = &brw->vs.base;
-   const struct brw_stage_prog_data *prog_data = stage_state->prog_data;
-   const struct brw_vue_prog_data *vue_prog_data =
-      brw_vue_prog_data(stage_state->prog_data);
-
-   struct brw_vs_unit_state *vs;
-
-   vs = brw_state_batch(brw, sizeof(*vs), 32, &stage_state->state_offset);
-   memset(vs, 0, sizeof(*vs));
-
-   /* BRW_NEW_PROGRAM_CACHE | BRW_NEW_VS_PROG_DATA */
-   vs->thread0.grf_reg_count = ALIGN(vue_prog_data->total_grf, 16) / 16 - 1;
-   vs->thread0.kernel_start_pointer =
-      brw_program_reloc(brw,
-			stage_state->state_offset +
-			offsetof(struct brw_vs_unit_state, thread0),
-			stage_state->prog_offset +
-			(vs->thread0.grf_reg_count << 1)) >> 6;
-
-   if (prog_data->use_alt_mode)
-      vs->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
-   else
-      vs->thread1.floating_point_mode = BRW_FLOATING_POINT_IEEE_754;
-
-   /* Choosing multiple program flow means that we may get 2-vertex threads,
-    * which will have the channel mask for dwords 4-7 enabled in the thread,
-    * and those dwords will be written to the second URB handle when we
-    * brw_urb_WRITE() results.
-    */
-   /* Force single program flow on Ironlake.  We cannot reliably get
-    * all applications working without it.  See:
-    * https://bugs.freedesktop.org/show_bug.cgi?id=29172
-    *
-    * The most notable and reliably failing application is the Humus
-    * demo "CelShading"
-   */
-   vs->thread1.single_program_flow = (brw->gen == 5);
-
-   vs->thread1.binding_table_entry_count =
-      prog_data->binding_table.size_bytes / 4;
-
-   if (prog_data->total_scratch != 0) {
-      vs->thread2.scratch_space_base_pointer =
-	 stage_state->scratch_bo->offset64 >> 10; /* reloc */
-      vs->thread2.per_thread_scratch_space =
-	 ffs(stage_state->per_thread_scratch) - 11;
-   } else {
-      vs->thread2.scratch_space_base_pointer = 0;
-      vs->thread2.per_thread_scratch_space = 0;
-   }
-
-   vs->thread3.urb_entry_read_length = vue_prog_data->urb_read_length;
-   vs->thread3.const_urb_entry_read_length = prog_data->curb_read_length;
-   vs->thread3.dispatch_grf_start_reg = prog_data->dispatch_grf_start_reg;
-   vs->thread3.urb_entry_read_offset = 0;
-
-   /* BRW_NEW_PUSH_CONSTANT_ALLOCATION */
-   vs->thread3.const_urb_entry_read_offset = brw->curbe.vs_start * 2;
-
-   /* BRW_NEW_URB_FENCE */
-   if (brw->gen == 5) {
-      switch (brw->urb.nr_vs_entries) {
-      case 8:
-      case 12:
-      case 16:
-      case 32:
-      case 64:
-      case 96:
-      case 128:
-      case 168:
-      case 192:
-      case 224:
-      case 256:
-	 vs->thread4.nr_urb_entries = brw->urb.nr_vs_entries >> 2;
-	 break;
-      default:
-         unreachable("not reached");
-      }
-   } else {
-      switch (brw->urb.nr_vs_entries) {
-      case 8:
-      case 12:
-      case 16:
-      case 32:
-	 break;
-      case 64:
-	 assert(brw->is_g4x);
-	 break;
-      default:
-         unreachable("not reached");
-      }
-      vs->thread4.nr_urb_entries = brw->urb.nr_vs_entries;
-   }
-
-   vs->thread4.urb_entry_allocation_size = brw->urb.vsize - 1;
-
-   vs->thread4.max_threads = CLAMP(brw->urb.nr_vs_entries / 2,
-				   1, devinfo->max_vs_threads) - 1;
-
-   if (brw->gen == 5)
-      vs->vs5.sampler_count = 0; /* hardware requirement */
-   else {
-      vs->vs5.sampler_count = (stage_state->sampler_count + 3) / 4;
-   }
-
-   /* Vertex program always enabled:
-    */
-   vs->vs6.vs_enable = 1;
-
-   /* Set the sampler state pointer, and its reloc
-    */
-   if (stage_state->sampler_count) {
-      /* BRW_NEW_SAMPLER_STATE_TABLE - reloc */
-      vs->vs5.sampler_state_pointer =
-         (brw->batch.bo->offset64 + stage_state->sampler_offset) >> 5;
-      brw_emit_reloc(&brw->batch,
-                     stage_state->state_offset +
-                     offsetof(struct brw_vs_unit_state, vs5),
-                     brw->batch.bo,
-                     (stage_state->sampler_offset | vs->vs5.sampler_count),
-                     I915_GEM_DOMAIN_INSTRUCTION, 0);
-   }
-
-   /* Emit scratch space relocation */
-   if (prog_data->total_scratch != 0) {
-      brw_emit_reloc(&brw->batch,
-                     stage_state->state_offset +
-                     offsetof(struct brw_vs_unit_state, thread2),
-                     stage_state->scratch_bo,
-                     vs->thread2.per_thread_scratch_space,
-                     I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
-   }
-
-   brw->ctx.NewDriverState |= BRW_NEW_GEN4_UNIT_STATE;
-}
-
-const struct brw_tracked_state brw_vs_unit = {
-   .dirty = {
-      .mesa  = 0,
-      .brw   = BRW_NEW_BATCH |
-               BRW_NEW_BLORP |
-               BRW_NEW_PROGRAM_CACHE |
-               BRW_NEW_PUSH_CONSTANT_ALLOCATION |
-               BRW_NEW_SAMPLER_STATE_TABLE |
-               BRW_NEW_URB_FENCE |
-               BRW_NEW_VS_PROG_DATA,
-   },
-   .emit = brw_upload_vs_unit,
-};
diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c
index 4c6cb1a9b71..bc7f068d5bb 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -130,12 +130,23 @@ instruction_bo(struct brw_bo *bo, uint32_t offset)
             .read_domains = I915_GEM_DOMAIN_INSTRUCTION,
             .write_domain = I915_GEM_DOMAIN_INSTRUCTION,
    };
 }
 
 static inline struct brw_address
+instruction_ro_bo(struct brw_bo *bo, uint32_t offset)
+{
+   return (struct brw_address) {
+            .bo = bo,
+            .offset = offset,
+            .read_domains = I915_GEM_DOMAIN_INSTRUCTION,
+            .write_domain = 0,
+   };
+}
+
+static inline struct brw_address
 vertex_bo(struct brw_bo *bo, uint32_t offset)
 {
    return (struct brw_address) {
             .bo = bo,
             .offset = offset,
             .read_domains = I915_GEM_DOMAIN_VERTEX,
@@ -1690,14 +1701,28 @@ static const struct brw_tracked_state genX(wm_state) = {
    .emit = genX(upload_wm),
 };
 #endif
 
 /* ---------------------------------------------------------------------- */
 
+#if GEN_GEN == 4
+static inline struct brw_address
+KSP(struct brw_context *brw, uint32_t offset)
+{
+   return instruction_bo(brw->cache.bo, offset);
+}
+#else
+static inline uint32_t
+KSP(struct brw_context *brw, uint32_t offset)
+{
+   return offset;
+}
+#endif
+
 #define INIT_THREAD_DISPATCH_FIELDS(pkt, prefix) \
-   pkt.KernelStartPointer = stage_state->prog_offset;                     \
+   pkt.KernelStartPointer = KSP(brw, stage_state->prog_offset);           \
    pkt.SamplerCount       =                                               \
       DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4);          \
    pkt.BindingTableEntryCount =                                           \
       stage_prog_data->binding_table.size_bytes / 4;                      \
    pkt.FloatingPointMode  = stage_prog_data->use_alt_mode;                \
                                                                           \
@@ -1713,18 +1738,18 @@ static const struct brw_tracked_state genX(wm_state) = {
    pkt.prefix##URBEntryReadLength = vue_prog_data->urb_read_length;       \
    pkt.prefix##URBEntryReadOffset = 0;                                    \
                                                                           \
    pkt.StatisticsEnable = true;                                           \
    pkt.Enable           = true;
 
-#if GEN_GEN >= 6
 static void
 genX(upload_vs_state)(struct brw_context *brw)
 {
+   UNUSED struct gl_context *ctx = &brw->ctx;
    const struct gen_device_info *devinfo = &brw->screen->devinfo;
-   const struct brw_stage_state *stage_state = &brw->vs.base;
+   struct brw_stage_state *stage_state = &brw->vs.base;
 
    /* BRW_NEW_VS_PROG_DATA */
    const struct brw_vue_prog_data *vue_prog_data =
       brw_vue_prog_data(brw->vs.base.prog_data);
    const struct brw_stage_prog_data *stage_prog_data = &vue_prog_data->base;
 
@@ -1752,17 +1777,50 @@ genX(upload_vs_state)(struct brw_context *brw)
    }
 #endif
 
    if (GEN_GEN == 7 && devinfo->is_ivybridge)
       gen7_emit_vs_workaround_flush(brw);
 
+#if GEN_GEN >= 6
    brw_batch_emit(brw, GENX(3DSTATE_VS), vs) {
+#else
+   ctx->NewDriverState |= BRW_NEW_GEN4_UNIT_STATE;
+   brw_state_emit(brw, GENX(VS_STATE), 32, &stage_state->state_offset, vs) {
+#endif
       INIT_THREAD_DISPATCH_FIELDS(vs, Vertex);
 
       vs.MaximumNumberofThreads = devinfo->max_vs_threads - 1;
 
+#if GEN_GEN < 6
+      vs.GRFRegisterCount = DIV_ROUND_UP(vue_prog_data->total_grf, 16) - 1;
+      vs.ConstantURBEntryReadLength = stage_prog_data->curb_read_length;
+      vs.ConstantURBEntryReadOffset = brw->curbe.vs_start * 2;
+
+      vs.NumberofURBEntries = brw->urb.nr_vs_entries >> (GEN_GEN == 5 ? 2 : 0);
+      vs.URBEntryAllocationSize = brw->urb.vsize - 1;
+
+      vs.MaximumNumberofThreads =
+         CLAMP(brw->urb.nr_vs_entries / 2, 1, devinfo->max_vs_threads) - 1;
+
+      vs.StatisticsEnable = false;
+      vs.SamplerStateOffset =
+         instruction_ro_bo(brw->batch.bo, stage_state->sampler_offset);
+#endif
+
+#if GEN_GEN == 5
+      /* Force single program flow on Ironlake.  We cannot reliably get
+       * all applications working without it.  See:
+       * https://bugs.freedesktop.org/show_bug.cgi?id=29172
+       *
+       * The most notable and reliably failing application is the Humus
+       * demo "CelShading"
+       */
+      vs.SingleProgramFlow = true;
+      vs.SamplerCount = 0; /* hardware requirement */
+#endif
+
 #if GEN_GEN >= 8
       vs.SIMD8DispatchEnable =
          vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8;
 
       vs.UserClipDistanceCullTestEnableBitmask =
          vue_prog_data->cull_distance_mask;
@@ -1798,17 +1856,21 @@ static const struct brw_tracked_state genX(vs_state) = {
    .dirty = {
       .mesa  = (GEN_GEN == 6 ? (_NEW_PROGRAM_CONSTANTS | _NEW_TRANSFORM) : 0),
       .brw   = BRW_NEW_BATCH |
                BRW_NEW_BLORP |
                BRW_NEW_CONTEXT |
                BRW_NEW_VS_PROG_DATA |
-               (GEN_GEN == 6 ? BRW_NEW_VERTEX_PROGRAM : 0),
+               (GEN_GEN == 6 ? BRW_NEW_VERTEX_PROGRAM : 0) |
+               (GEN_GEN <= 5 ? BRW_NEW_PUSH_CONSTANT_ALLOCATION |
+                               BRW_NEW_PROGRAM_CACHE |
+                               BRW_NEW_SAMPLER_STATE_TABLE |
+                               BRW_NEW_URB_FENCE
+                             : 0),
    },
    .emit = genX(upload_vs_state),
 };
-#endif
 
 /* ---------------------------------------------------------------------- */
 
 #if GEN_GEN >= 6
 static void
 brw_calculate_guardband_size(const struct gen_device_info *devinfo,
@@ -3881,13 +3943,13 @@ genX(init_atoms)(struct brw_context *brw)
       &brw_vs_samplers,
 
       /* These set up state for brw_psp_urb_cbs */
       &brw_wm_unit,
       &brw_sf_vp,
       &brw_sf_unit,
-      &brw_vs_unit,		/* always required, enabled or not */
+      &genX(vs_state), /* always required, enabled or not */
       &brw_clip_unit,
       &brw_gs_unit,
 
       /* Command packets:
        */
       &brw_invariant_state,
-- 
2.12.2



More information about the mesa-dev mailing list