[Mesa-dev] [PATCH 1/2] i965/fs: Add gen6 register spilling support.

Eric Anholt eric at anholt.net
Thu Apr 14 23:53:22 PDT 2011


Most of this is code movement to get the scratch space allocated in a
shared location.  Other than that, the only real changes are that the
old oword block messages now operate on oword-aligned areas (with new
messages for unaligned access, which we don't do), and that the
caching control is in the SFID part of the descriptor instead of
message control.

Fixes glsl-fs-convolution-1.
---
 src/mesa/drivers/dri/i965/brw_eu_emit.c           |   45 +++++++++++++++++----
 src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp |    2 -
 src/mesa/drivers/dri/i965/brw_wm.c                |   15 +++++++
 src/mesa/drivers/dri/i965/brw_wm_state.c          |   20 ---------
 src/mesa/drivers/dri/i965/gen6_wm_state.c         |    7 +++-
 5 files changed, 58 insertions(+), 31 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 57313a5..2d654e7 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -496,6 +496,8 @@ static void brw_set_dp_write_message( struct brw_context *brw,
        insn->bits3.dp_render_cache.response_length = response_length;
        insn->bits3.dp_render_cache.msg_length = msg_length;
        insn->bits3.dp_render_cache.end_of_thread = end_of_thread;
+
+       /* We always use the render cache for write messages */
        insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
 	/* XXX really need below? */
        insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
@@ -539,6 +541,13 @@ brw_set_dp_read_message(struct brw_context *brw,
    brw_set_src1(insn, brw_imm_d(0));
 
    if (intel->gen >= 6) {
+       uint32_t target_function;
+
+       if (target_cache == BRW_DATAPORT_READ_TARGET_DATA_CACHE)
+	  target_function = BRW_MESSAGE_TARGET_DATAPORT_READ; /* data cache */
+       else
+	  target_function = BRW_MESSAGE_TARGET_DATAPORT_WRITE; /* render cache */
+
        insn->bits3.dp_render_cache.binding_table_index = binding_table_index;
        insn->bits3.dp_render_cache.msg_control = msg_control;
        insn->bits3.dp_render_cache.pixel_scoreboard_clear = 0;
@@ -548,9 +557,9 @@ brw_set_dp_read_message(struct brw_context *brw,
        insn->bits3.dp_render_cache.response_length = response_length;
        insn->bits3.dp_render_cache.msg_length = msg_length;
        insn->bits3.dp_render_cache.end_of_thread = 0;
-       insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_DATAPORT_READ;
+       insn->header.destreg__conditionalmod = target_function;
 	/* XXX really need below? */
-       insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
+       insn->bits2.send_gen5.sfid = target_function;
        insn->bits2.send_gen5.end_of_thread = 0;
    } else if (intel->gen == 5) {
        insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
@@ -1486,9 +1495,12 @@ void brw_oword_block_write_scratch(struct brw_compile *p,
 				   GLuint offset)
 {
    struct intel_context *intel = &p->brw->intel;
-   uint32_t msg_control;
+   uint32_t msg_control, msg_type;
    int mlen;
 
+   if (intel->gen >= 6)
+      offset /= 16;
+
    mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
 
    if (num_regs == 1) {
@@ -1554,13 +1566,22 @@ void brw_oword_block_write_scratch(struct brw_compile *p,
       }
 
       brw_set_dest(p, insn, dest);
-      brw_set_src0(insn, brw_null_reg());
+      if (intel->gen >= 6) {
+	 brw_set_src0(insn, mrf);
+      } else {
+	 brw_set_src0(insn, brw_null_reg());
+      }
+
+      if (intel->gen >= 6)
+	 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
+      else
+	 msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
 
       brw_set_dp_write_message(p->brw,
 			       insn,
 			       255, /* binding table index (255=stateless) */
 			       msg_control,
-			       BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
+			       msg_type,
 			       mlen,
 			       GL_TRUE, /* header_present */
 			       0, /* pixel scoreboard */
@@ -1585,9 +1606,13 @@ brw_oword_block_read_scratch(struct brw_compile *p,
 			     int num_regs,
 			     GLuint offset)
 {
+   struct intel_context *intel = &p->brw->intel;
    uint32_t msg_control;
    int rlen;
 
+   if (intel->gen >= 6)
+      offset /= 16;
+
    mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
    dest = retype(dest, BRW_REGISTER_TYPE_UW);
 
@@ -1624,14 +1649,18 @@ brw_oword_block_read_scratch(struct brw_compile *p,
       insn->header.destreg__conditionalmod = mrf.nr;
 
       brw_set_dest(p, insn, dest);	/* UW? */
-      brw_set_src0(insn, brw_null_reg());
+      if (intel->gen >= 6) {
+	 brw_set_src0(insn, mrf);
+      } else {
+	 brw_set_src0(insn, brw_null_reg());
+      }
 
       brw_set_dp_read_message(p->brw,
 			      insn,
 			      255, /* binding table index (255=stateless) */
 			      msg_control,
 			      BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
-			      1, /* target cache (render/scratch) */
+			      BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
 			      1, /* msg_length */
 			      rlen);
    }
@@ -1839,7 +1868,7 @@ void brw_dp_READ_4_vs_relative(struct brw_compile *p,
 			   bind_table_index,
 			   BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
 			   msg_type,
-			   0, /* source cache = data cache */
+			   BRW_DATAPORT_READ_TARGET_DATA_CACHE,
 			   2, /* msg_length */
 			   1); /* response_length */
 }
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index 479a914..67f29ce 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -228,8 +228,6 @@ fs_visitor::assign_regs()
 
       if (reg == -1) {
 	 fail("no register to spill\n");
-      } else if (intel->gen >= 6) {
-	 fail("no spilling support on gen6 yet\n");
       } else {
 	 spill_reg(reg);
       }
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
index 2dd28fd..ab731a8 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -185,6 +185,7 @@ static void do_wm_prog( struct brw_context *brw,
 			struct brw_fragment_program *fp, 
 			struct brw_wm_prog_key *key)
 {
+   struct intel_context *intel = &brw->intel;
    struct brw_wm_compile *c;
    const GLuint *program;
    GLuint program_size;
@@ -238,12 +239,26 @@ static void do_wm_prog( struct brw_context *brw,
 
    /* Scratch space is used for register spilling */
    if (c->last_scratch) {
+      uint32_t total_scratch;
+
       /* Per-thread scratch space is power-of-two sized. */
       for (c->prog_data.total_scratch = 1024;
 	   c->prog_data.total_scratch <= c->last_scratch;
 	   c->prog_data.total_scratch *= 2) {
 	 /* empty */
       }
+      total_scratch = c->prog_data.total_scratch * brw->wm_max_threads;
+
+      if (brw->wm.scratch_bo && total_scratch > brw->wm.scratch_bo->size) {
+	 drm_intel_bo_unreference(brw->wm.scratch_bo);
+	 brw->wm.scratch_bo = NULL;
+      }
+      if (brw->wm.scratch_bo == NULL) {
+	 brw->wm.scratch_bo = drm_intel_bo_alloc(intel->bufmgr,
+						 "wm scratch",
+						 total_scratch,
+						 4096);
+      }
    }
    else {
       c->prog_data.total_scratch = 0;
diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c
index 5b5afc4..be4b260 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_state.c
@@ -278,30 +278,10 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
 
 static void upload_wm_unit( struct brw_context *brw )
 {
-   struct intel_context *intel = &brw->intel;
    struct brw_wm_unit_key key;
    drm_intel_bo *reloc_bufs[3];
    wm_unit_populate_key(brw, &key);
 
-   /* Allocate the necessary scratch space if we haven't already.  Don't
-    * bother reducing the allocation later, since we use scratch so
-    * rarely.
-    */
-   if (key.total_scratch) {
-      GLuint total = key.total_scratch * brw->wm_max_threads;
-
-      if (brw->wm.scratch_bo && total > brw->wm.scratch_bo->size) {
-	 drm_intel_bo_unreference(brw->wm.scratch_bo);
-	 brw->wm.scratch_bo = NULL;
-      }
-      if (brw->wm.scratch_bo == NULL) {
-	 brw->wm.scratch_bo = drm_intel_bo_alloc(intel->bufmgr,
-						 "wm scratch",
-						 total,
-						 4096);
-      }
-   }
-
    reloc_bufs[0] = brw->wm.prog_bo;
    reloc_bufs[1] = brw->wm.scratch_bo;
    reloc_bufs[2] = brw->wm.sampler_bo;
diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c
index f4f0475..8215cb1 100644
--- a/src/mesa/drivers/dri/i965/gen6_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c
@@ -184,7 +184,12 @@ upload_wm_state(struct brw_context *brw)
    OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2));
    OUT_RELOC(brw->wm.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
    OUT_BATCH(dw2);
-   OUT_BATCH(0); /* scratch space base offset */
+   if (brw->wm.prog_data->total_scratch) {
+      OUT_RELOC(brw->wm.scratch_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
+		ffs(brw->wm.prog_data->total_scratch) - 11);
+   } else {
+      OUT_BATCH(0);
+   }
    OUT_BATCH(dw4);
    OUT_BATCH(dw5);
    OUT_BATCH(dw6);
-- 
1.7.4.1



More information about the mesa-dev mailing list