[Mesa-dev] [PATCH 1/2] i965/fs: Add gen6 register spilling support.

Zou, Nanhai nanhai.zou at intel.com
Fri Apr 15 00:00:25 PDT 2011


Hi Eric,
	BSpec says VS and WM should fall to single thread to avoid racing if use scratch space.

Thanks
Zou Nanhai

>>-----Original Message-----
>>From: mesa-dev-bounces+nanhai.zou=intel.com at lists.freedesktop.org
>>[mailto:mesa-dev-bounces+nanhai.zou=intel.com at lists.freedesktop.org] On
>>Behalf Of Eric Anholt
>>Sent: 2011年4月15日 14:53
>>To: mesa-dev at lists.freedesktop.org
>>Subject: [Mesa-dev] [PATCH 1/2] i965/fs: Add gen6 register spilling support.
>>
>>Most of this is code movement to get the scratch space allocated in a
>>shared location.  Other than that, the only real changes are that the
>>old oword block messages now operate on oword-aligned areas (with new
>>messages for unaligned access, which we don't do), and that the
>>caching control is in the SFID part of the descriptor instead of
>>message control.
>>
>>Fixes glsl-fs-convolution-1.
>>---
>> src/mesa/drivers/dri/i965/brw_eu_emit.c           |   45
>>+++++++++++++++++----
>> src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp |    2 -
>> src/mesa/drivers/dri/i965/brw_wm.c                |   15 +++++++
>> src/mesa/drivers/dri/i965/brw_wm_state.c          |   20 ---------
>> src/mesa/drivers/dri/i965/gen6_wm_state.c         |    7 +++-
>> 5 files changed, 58 insertions(+), 31 deletions(-)
>>
>>diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c
>>b/src/mesa/drivers/dri/i965/brw_eu_emit.c
>>index 57313a5..2d654e7 100644
>>--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
>>+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
>>@@ -496,6 +496,8 @@ static void brw_set_dp_write_message( struct brw_context
>>*brw,
>>        insn->bits3.dp_render_cache.response_length = response_length;
>>        insn->bits3.dp_render_cache.msg_length = msg_length;
>>        insn->bits3.dp_render_cache.end_of_thread = end_of_thread;
>>+
>>+       /* We always use the render cache for write messages */
>>        insn->header.destreg__conditionalmod =
>>BRW_MESSAGE_TARGET_DATAPORT_WRITE;
>> 	/* XXX really need below? */
>>        insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
>>@@ -539,6 +541,13 @@ brw_set_dp_read_message(struct brw_context *brw,
>>    brw_set_src1(insn, brw_imm_d(0));
>>
>>    if (intel->gen >= 6) {
>>+       uint32_t target_function;
>>+
>>+       if (target_cache == BRW_DATAPORT_READ_TARGET_DATA_CACHE)
>>+	  target_function = BRW_MESSAGE_TARGET_DATAPORT_READ; /* data cache */
>>+       else
>>+	  target_function = BRW_MESSAGE_TARGET_DATAPORT_WRITE; /* render cache
>>*/
>>+
>>        insn->bits3.dp_render_cache.binding_table_index =
>>binding_table_index;
>>        insn->bits3.dp_render_cache.msg_control = msg_control;
>>        insn->bits3.dp_render_cache.pixel_scoreboard_clear = 0;
>>@@ -548,9 +557,9 @@ brw_set_dp_read_message(struct brw_context *brw,
>>        insn->bits3.dp_render_cache.response_length = response_length;
>>        insn->bits3.dp_render_cache.msg_length = msg_length;
>>        insn->bits3.dp_render_cache.end_of_thread = 0;
>>-       insn->header.destreg__conditionalmod =
>>BRW_MESSAGE_TARGET_DATAPORT_READ;
>>+       insn->header.destreg__conditionalmod = target_function;
>> 	/* XXX really need below? */
>>-       insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
>>+       insn->bits2.send_gen5.sfid = target_function;
>>        insn->bits2.send_gen5.end_of_thread = 0;
>>    } else if (intel->gen == 5) {
>>        insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
>>@@ -1486,9 +1495,12 @@ void brw_oword_block_write_scratch(struct brw_compile
>>*p,
>> 				   GLuint offset)
>> {
>>    struct intel_context *intel = &p->brw->intel;
>>-   uint32_t msg_control;
>>+   uint32_t msg_control, msg_type;
>>    int mlen;
>>
>>+   if (intel->gen >= 6)
>>+      offset /= 16;
>>+
>>    mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
>>
>>    if (num_regs == 1) {
>>@@ -1554,13 +1566,22 @@ void brw_oword_block_write_scratch(struct brw_compile
>>*p,
>>       }
>>
>>       brw_set_dest(p, insn, dest);
>>-      brw_set_src0(insn, brw_null_reg());
>>+      if (intel->gen >= 6) {
>>+	 brw_set_src0(insn, mrf);
>>+      } else {
>>+	 brw_set_src0(insn, brw_null_reg());
>>+      }
>>+
>>+      if (intel->gen >= 6)
>>+	 msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
>>+      else
>>+	 msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
>>
>>       brw_set_dp_write_message(p->brw,
>> 			       insn,
>> 			       255, /* binding table index (255=stateless) */
>> 			       msg_control,
>>-			       BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /*
>>msg_type */
>>+			       msg_type,
>> 			       mlen,
>> 			       GL_TRUE, /* header_present */
>> 			       0, /* pixel scoreboard */
>>@@ -1585,9 +1606,13 @@ brw_oword_block_read_scratch(struct brw_compile *p,
>> 			     int num_regs,
>> 			     GLuint offset)
>> {
>>+   struct intel_context *intel = &p->brw->intel;
>>    uint32_t msg_control;
>>    int rlen;
>>
>>+   if (intel->gen >= 6)
>>+      offset /= 16;
>>+
>>    mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
>>    dest = retype(dest, BRW_REGISTER_TYPE_UW);
>>
>>@@ -1624,14 +1649,18 @@ brw_oword_block_read_scratch(struct brw_compile *p,
>>       insn->header.destreg__conditionalmod = mrf.nr;
>>
>>       brw_set_dest(p, insn, dest);	/* UW? */
>>-      brw_set_src0(insn, brw_null_reg());
>>+      if (intel->gen >= 6) {
>>+	 brw_set_src0(insn, mrf);
>>+      } else {
>>+	 brw_set_src0(insn, brw_null_reg());
>>+      }
>>
>>       brw_set_dp_read_message(p->brw,
>> 			      insn,
>> 			      255, /* binding table index (255=stateless) */
>> 			      msg_control,
>> 			      BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type
>>*/
>>-			      1, /* target cache (render/scratch) */
>>+			      BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
>> 			      1, /* msg_length */
>> 			      rlen);
>>    }
>>@@ -1839,7 +1868,7 @@ void brw_dp_READ_4_vs_relative(struct brw_compile *p,
>> 			   bind_table_index,
>> 			   BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
>> 			   msg_type,
>>-			   0, /* source cache = data cache */
>>+			   BRW_DATAPORT_READ_TARGET_DATA_CACHE,
>> 			   2, /* msg_length */
>> 			   1); /* response_length */
>> }
>>diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
>>b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
>>index 479a914..67f29ce 100644
>>--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
>>+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
>>@@ -228,8 +228,6 @@ fs_visitor::assign_regs()
>>
>>       if (reg == -1) {
>> 	 fail("no register to spill\n");
>>-      } else if (intel->gen >= 6) {
>>-	 fail("no spilling support on gen6 yet\n");
>>       } else {
>> 	 spill_reg(reg);
>>       }
>>diff --git a/src/mesa/drivers/dri/i965/brw_wm.c
>>b/src/mesa/drivers/dri/i965/brw_wm.c
>>index 2dd28fd..ab731a8 100644
>>--- a/src/mesa/drivers/dri/i965/brw_wm.c
>>+++ b/src/mesa/drivers/dri/i965/brw_wm.c
>>@@ -185,6 +185,7 @@ static void do_wm_prog( struct brw_context *brw,
>> 			struct brw_fragment_program *fp,
>> 			struct brw_wm_prog_key *key)
>> {
>>+   struct intel_context *intel = &brw->intel;
>>    struct brw_wm_compile *c;
>>    const GLuint *program;
>>    GLuint program_size;
>>@@ -238,12 +239,26 @@ static void do_wm_prog( struct brw_context *brw,
>>
>>    /* Scratch space is used for register spilling */
>>    if (c->last_scratch) {
>>+      uint32_t total_scratch;
>>+
>>       /* Per-thread scratch space is power-of-two sized. */
>>       for (c->prog_data.total_scratch = 1024;
>> 	   c->prog_data.total_scratch <= c->last_scratch;
>> 	   c->prog_data.total_scratch *= 2) {
>> 	 /* empty */
>>       }
>>+      total_scratch = c->prog_data.total_scratch * brw->wm_max_threads;
>>+
>>+      if (brw->wm.scratch_bo && total_scratch > brw->wm.scratch_bo->size) {
>>+	 drm_intel_bo_unreference(brw->wm.scratch_bo);
>>+	 brw->wm.scratch_bo = NULL;
>>+      }
>>+      if (brw->wm.scratch_bo == NULL) {
>>+	 brw->wm.scratch_bo = drm_intel_bo_alloc(intel->bufmgr,
>>+						 "wm scratch",
>>+						 total_scratch,
>>+						 4096);
>>+      }
>>    }
>>    else {
>>       c->prog_data.total_scratch = 0;
>>diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c
>>b/src/mesa/drivers/dri/i965/brw_wm_state.c
>>index 5b5afc4..be4b260 100644
>>--- a/src/mesa/drivers/dri/i965/brw_wm_state.c
>>+++ b/src/mesa/drivers/dri/i965/brw_wm_state.c
>>@@ -278,30 +278,10 @@ wm_unit_create_from_key(struct brw_context *brw, struct
>>brw_wm_unit_key *key,
>>
>> static void upload_wm_unit( struct brw_context *brw )
>> {
>>-   struct intel_context *intel = &brw->intel;
>>    struct brw_wm_unit_key key;
>>    drm_intel_bo *reloc_bufs[3];
>>    wm_unit_populate_key(brw, &key);
>>
>>-   /* Allocate the necessary scratch space if we haven't already.  Don't
>>-    * bother reducing the allocation later, since we use scratch so
>>-    * rarely.
>>-    */
>>-   if (key.total_scratch) {
>>-      GLuint total = key.total_scratch * brw->wm_max_threads;
>>-
>>-      if (brw->wm.scratch_bo && total > brw->wm.scratch_bo->size) {
>>-	 drm_intel_bo_unreference(brw->wm.scratch_bo);
>>-	 brw->wm.scratch_bo = NULL;
>>-      }
>>-      if (brw->wm.scratch_bo == NULL) {
>>-	 brw->wm.scratch_bo = drm_intel_bo_alloc(intel->bufmgr,
>>-						 "wm scratch",
>>-						 total,
>>-						 4096);
>>-      }
>>-   }
>>-
>>    reloc_bufs[0] = brw->wm.prog_bo;
>>    reloc_bufs[1] = brw->wm.scratch_bo;
>>    reloc_bufs[2] = brw->wm.sampler_bo;
>>diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c
>>b/src/mesa/drivers/dri/i965/gen6_wm_state.c
>>index f4f0475..8215cb1 100644
>>--- a/src/mesa/drivers/dri/i965/gen6_wm_state.c
>>+++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c
>>@@ -184,7 +184,12 @@ upload_wm_state(struct brw_context *brw)
>>    OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2));
>>    OUT_RELOC(brw->wm.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
>>    OUT_BATCH(dw2);
>>-   OUT_BATCH(0); /* scratch space base offset */
>>+   if (brw->wm.prog_data->total_scratch) {
>>+      OUT_RELOC(brw->wm.scratch_bo, I915_GEM_DOMAIN_RENDER,
>>I915_GEM_DOMAIN_RENDER,
>>+		ffs(brw->wm.prog_data->total_scratch) - 11);
>>+   } else {
>>+      OUT_BATCH(0);
>>+   }
>>    OUT_BATCH(dw4);
>>    OUT_BATCH(dw5);
>>    OUT_BATCH(dw6);
>>--
>>1.7.4.1
>>
>>_______________________________________________
>>mesa-dev mailing list
>>mesa-dev at lists.freedesktop.org
>>http://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list