Mesa (master): i965: Allocate the whole URB to the VS and fix calculations for Gen6.

Kenneth Graunke kwg at kemper.freedesktop.org
Mon Apr 18 22:26:37 UTC 2011


Module: Mesa
Branch: master
Commit: 42a805700039e81a9245f46f153e2cd9705cd0d7
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=42a805700039e81a9245f46f153e2cd9705cd0d7

Author: Kenneth Graunke <kenneth at whitecape.org>
Date:   Tue Apr 12 15:42:06 2011 -0700

i965: Allocate the whole URB to the VS and fix calculations for Gen6.

Since we never enable the GS on Sandybridge, there's no need to allocate
it any URB space.

Furthermore, the previous calculation was incorrect: it neglected to
multiply by nr_vs_entries, instead comparing whether twice the size of
a single VS URB entry was bigger than the entire URB space.  It also
neglected to take into account that vs_size is in units of 128 byte
blocks, while urb_size is in bytes.

Despite the above problems, the calculations resulted in an acceptable
programming of the URB in most cases, at least on GT2.

Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
Reviewed-by: Eric Anholt <eric at anholt.net>

---

 src/mesa/drivers/dri/i965/brw_context.c |    5 +++-
 src/mesa/drivers/dri/i965/brw_context.h |    5 +++-
 src/mesa/drivers/dri/i965/gen6_urb.c    |   34 +++++++++++++++---------------
 3 files changed, 25 insertions(+), 19 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index a74ba5c..230d326 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -182,7 +182,6 @@ GLboolean brwCreateContext( int api,
 
    /* WM maximum threads is number of EUs times number of threads per EU. */
    if (intel->gen >= 6) {
-      brw->urb.size = 1024;
       if (IS_GT2(intel->intelScreen->deviceID)) {
 	 /* This could possibly be 80, but is supposed to require
 	  * disabling of WIZ hashing (bit 6 of GT_MODE, 0x20d0) and a
@@ -190,9 +189,13 @@ GLboolean brwCreateContext( int api,
 	  */
 	 brw->wm_max_threads = 40;
 	 brw->vs_max_threads = 60;
+	 brw->urb.size = 64;            /* volume 5c.5 section 5.1 */
+	 brw->urb.max_vs_handles = 128; /* volume 2a (see 3DSTATE_URB) */
       } else {
 	 brw->wm_max_threads = 40;
 	 brw->vs_max_threads = 24;
+	 brw->urb.size = 32;            /* volume 5c.5 section 5.1 */
+	 brw->urb.max_vs_handles = 256; /* volume 2a (see 3DSTATE_URB) */
       }
    } else if (intel->gen == 5) {
       brw->urb.size = 1024;
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 4b4dfba..1daa49a 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -568,6 +568,9 @@ struct brw_context
 
       GLboolean constrained;
 
+      GLuint max_vs_handles;	/* Maximum number of VS handles */
+      GLuint max_gs_handles;	/* Maximum number of GS handles */
+
       GLuint nr_vs_entries;
       GLuint nr_gs_entries;
       GLuint nr_clip_entries;
@@ -579,7 +582,7 @@ struct brw_context
        * a number of 1024-bit (128-byte) rows.  Should be >= 1.
        */
       GLuint vs_size;
-/*       GLuint gs_size; */
+      GLuint gs_size;
 
       GLuint vs_start;
       GLuint gs_start;
diff --git a/src/mesa/drivers/dri/i965/gen6_urb.c b/src/mesa/drivers/dri/i965/gen6_urb.c
index c3819f9..909e1bb 100644
--- a/src/mesa/drivers/dri/i965/gen6_urb.c
+++ b/src/mesa/drivers/dri/i965/gen6_urb.c
@@ -34,26 +34,25 @@
 static void
 prepare_urb( struct brw_context *brw )
 {
-   int urb_size, max_urb_entry;
-   struct intel_context *intel = &brw->intel;
-
-   if (IS_GT1(intel->intelScreen->deviceID)) {
-	urb_size = 32 * 1024;
-	max_urb_entry = 128;
-   } else {
-	urb_size = 64 * 1024;
-	max_urb_entry = 256;
-   }
-
-   brw->urb.nr_vs_entries = max_urb_entry;
-   brw->urb.nr_gs_entries = max_urb_entry;
+   int nr_vs_entries;
 
    /* CACHE_NEW_VS_PROG */
    brw->urb.vs_size = MAX2(brw->vs.prog_data->urb_entry_size, 1);
 
-   if (2 * brw->urb.vs_size > urb_size)
-	   brw->urb.nr_vs_entries = brw->urb.nr_gs_entries = 
-		(urb_size ) / (2 * brw->urb.vs_size);
+   /* Calculate how many VS URB entries fit in the total URB size */
+   nr_vs_entries = (brw->urb.size * 1024) / (brw->urb.vs_size * 128);
+
+   if (nr_vs_entries > brw->urb.max_vs_handles)
+      nr_vs_entries = brw->urb.max_vs_handles;
+
+   /* According to volume 2a, nr_vs_entries must be a multiple of 4. */
+   brw->urb.nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, 4);
+
+   /* Since we currently don't support Geometry Shaders, we always put the
+    * GS unit in passthrough mode and don't allocate it any URB space.
+    */
+   brw->urb.nr_gs_entries = 0;
+   brw->urb.gs_size = 1; /* Incorrect, but with 0 GS entries it doesn't matter. */
 }
 
 static void
@@ -61,6 +60,7 @@ upload_urb(struct brw_context *brw)
 {
    struct intel_context *intel = &brw->intel;
 
+   assert(brw->urb.nr_vs_entries >= 24);
    assert(brw->urb.nr_vs_entries % 4 == 0);
    assert(brw->urb.nr_gs_entries % 4 == 0);
    /* GS requirement */
@@ -70,7 +70,7 @@ upload_urb(struct brw_context *brw)
    OUT_BATCH(_3DSTATE_URB << 16 | (3 - 2));
    OUT_BATCH(((brw->urb.vs_size - 1) << GEN6_URB_VS_SIZE_SHIFT) |
 	     ((brw->urb.nr_vs_entries) << GEN6_URB_VS_ENTRIES_SHIFT));
-   OUT_BATCH(((brw->urb.vs_size - 1) << GEN6_URB_GS_SIZE_SHIFT) |
+   OUT_BATCH(((brw->urb.gs_size - 1) << GEN6_URB_GS_SIZE_SHIFT) |
 	     ((brw->urb.nr_gs_entries) << GEN6_URB_GS_ENTRIES_SHIFT));
    ADVANCE_BATCH();
 }




More information about the mesa-commit mailing list