Mesa (master): i965/fs: When >64 input components, order them to match prev pipeline stage.

Paul Berry stereotype441 at kemper.freedesktop.org
Mon Sep 16 20:07:16 UTC 2013


Module: Mesa
Branch: master
Commit: 875972029eddfd53cb90a8e34e9f27b2afed119f
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=875972029eddfd53cb90a8e34e9f27b2afed119f

Author: Paul Berry <stereotype441 at gmail.com>
Date:   Tue Sep  3 12:15:53 2013 -0700

i965/fs: When >64 input components, order them to match prev pipeline stage.

Since the SF/SBE stage is only capable of performing arbitrary
reorderings of 16 varying slots, we can't arrange the fragment shader
inputs in an arbitrary order if there are more than 16 input varying
slots in use.  We need to make sure that slots 16-31 match the
corresponding outputs of the previous pipeline stage.

The easiest way to accomplish this is to just make all varying slots
match up with the previous pipeline stage.

Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>

---

 src/mesa/drivers/dri/i965/brw_fs.cpp |   49 +++++++++++++++++++++++++++++----
 src/mesa/drivers/dri/i965/brw_wm.c   |    3 +-
 2 files changed, 45 insertions(+), 7 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 004b3b5..2ebadc8 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1237,11 +1237,47 @@ fs_visitor::calculate_urb_setup()
    int urb_next = 0;
    /* Figure out where each of the incoming setup attributes lands. */
    if (brw->gen >= 6) {
-      for (unsigned int i = 0; i < VARYING_SLOT_MAX; i++) {
-	 if (fp->Base.InputsRead & BRW_FS_VARYING_INPUT_MASK &
-             BITFIELD64_BIT(i)) {
-	    c->prog_data.urb_setup[i] = urb_next++;
-	 }
+      if (_mesa_bitcount_64(fp->Base.InputsRead &
+                            BRW_FS_VARYING_INPUT_MASK) <= 16) {
+         /* The SF/SBE pipeline stage can do arbitrary rearrangement of the
+          * first 16 varying inputs, so we can put them wherever we want.
+          * Just put them in order.
+          *
+          * This is useful because it means that (a) inputs not used by the
+          * fragment shader won't take up valuable register space, and (b) we
+          * won't have to recompile the fragment shader if it gets paired with
+          * a different vertex (or geometry) shader.
+          */
+         for (unsigned int i = 0; i < VARYING_SLOT_MAX; i++) {
+            if (fp->Base.InputsRead & BRW_FS_VARYING_INPUT_MASK &
+                BITFIELD64_BIT(i)) {
+               c->prog_data.urb_setup[i] = urb_next++;
+            }
+         }
+      } else {
+         /* We have enough input varyings that the SF/SBE pipeline stage can't
+          * arbitrarily rearrange them to suit our whim; we have to put them
+          * in an order that matches the output of the previous pipeline stage
+          * (geometry or vertex shader).
+          */
+         struct brw_vue_map prev_stage_vue_map;
+         brw_compute_vue_map(brw, &prev_stage_vue_map,
+                             c->key.input_slots_valid);
+         int first_slot = 2 * BRW_SF_URB_ENTRY_READ_OFFSET;
+         assert(prev_stage_vue_map.num_slots <= first_slot + 32);
+         for (int slot = first_slot; slot < prev_stage_vue_map.num_slots;
+              slot++) {
+            int varying = prev_stage_vue_map.slot_to_varying[slot];
+            /* Note that varying == BRW_VARYING_SLOT_COUNT when a slot is
+             * unused.
+             */
+            if (varying != BRW_VARYING_SLOT_COUNT &&
+                (fp->Base.InputsRead & BRW_FS_VARYING_INPUT_MASK &
+                 BITFIELD64_BIT(varying))) {
+               c->prog_data.urb_setup[varying] = slot - first_slot;
+            }
+         }
+         urb_next = prev_stage_vue_map.num_slots - first_slot;
       }
    } else {
       /* FINISHME: The sf doesn't map VS->FS inputs for us very well. */
@@ -3149,7 +3185,8 @@ brw_fs_precompile(struct gl_context *ctx, struct gl_shader_program *prog)
       key.iz_lookup |= IZ_DEPTH_WRITE_ENABLE_BIT;
    }
 
-   if (brw->gen < 6)
+   if (brw->gen < 6 || _mesa_bitcount_64(fp->Base.InputsRead &
+                                         BRW_FS_VARYING_INPUT_MASK) > 16)
       key.input_slots_valid = fp->Base.InputsRead | VARYING_BIT_POS;
 
    key.clamp_fragment_color = ctx->API == API_OPENGL_COMPAT;
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
index 1b23a4f..3d7ca2a 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -466,7 +466,8 @@ static void brw_wm_populate_key( struct brw_context *brw,
       (ctx->Multisample.SampleAlphaToCoverage || ctx->Color.AlphaEnabled);
 
    /* BRW_NEW_VUE_MAP_GEOM_OUT */
-   if (brw->gen < 6)
+   if (brw->gen < 6 || _mesa_bitcount_64(fp->program.Base.InputsRead &
+                                         BRW_FS_VARYING_INPUT_MASK) > 16)
       key->input_slots_valid = brw->vue_map_geom_out.slots_valid;
 
    /* The unique fragment program ID */




More information about the mesa-commit mailing list