[Mesa-dev] [PATCH 1/2] i965/fs: Add gen6 register spilling support.
Zou, Nanhai
nanhai.zou at intel.com
Fri Apr 15 00:00:25 PDT 2011
Hi Eric,
BSpec says VS and WM should fall to single thread to avoid racing if use scratch space.
Thanks
Zou Nanhai
>>-----Original Message-----
>>From: mesa-dev-bounces+nanhai.zou=intel.com at lists.freedesktop.org
>>[mailto:mesa-dev-bounces+nanhai.zou=intel.com at lists.freedesktop.org] On
>>Behalf Of Eric Anholt
>>Sent: 2011年4月15日 14:53
>>To: mesa-dev at lists.freedesktop.org
>>Subject: [Mesa-dev] [PATCH 1/2] i965/fs: Add gen6 register spilling support.
>>
>>Most of this is code movement to get the scratch space allocated in a
>>shared location. Other than that, the only real changes are that the
>>old oword block messages now operate on oword-aligned areas (with new
>>messages for unaligned access, which we don't do), and that the
>>caching control is in the SFID part of the descriptor instead of
>>message control.
>>
>>Fixes glsl-fs-convolution-1.
>>---
>> src/mesa/drivers/dri/i965/brw_eu_emit.c | 45
>>+++++++++++++++++----
>> src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 2 -
>> src/mesa/drivers/dri/i965/brw_wm.c | 15 +++++++
>> src/mesa/drivers/dri/i965/brw_wm_state.c | 20 ---------
>> src/mesa/drivers/dri/i965/gen6_wm_state.c | 7 +++-
>> 5 files changed, 58 insertions(+), 31 deletions(-)
>>
>>diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c
>>b/src/mesa/drivers/dri/i965/brw_eu_emit.c
>>index 57313a5..2d654e7 100644
>>--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
>>+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
>>@@ -496,6 +496,8 @@ static void brw_set_dp_write_message( struct brw_context
>>*brw,
>> insn->bits3.dp_render_cache.response_length = response_length;
>> insn->bits3.dp_render_cache.msg_length = msg_length;
>> insn->bits3.dp_render_cache.end_of_thread = end_of_thread;
>>+
>>+ /* We always use the render cache for write messages */
>> insn->header.destreg__conditionalmod =
>>BRW_MESSAGE_TARGET_DATAPORT_WRITE;
>> /* XXX really need below? */
>> insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
>>@@ -539,6 +541,13 @@ brw_set_dp_read_message(struct brw_context *brw,
>> brw_set_src1(insn, brw_imm_d(0));
>>
>> if (intel->gen >= 6) {
>>+ uint32_t target_function;
>>+
>>+ if (target_cache == BRW_DATAPORT_READ_TARGET_DATA_CACHE)
>>+ target_function = BRW_MESSAGE_TARGET_DATAPORT_READ; /* data cache */
>>+ else
>>+ target_function = BRW_MESSAGE_TARGET_DATAPORT_WRITE; /* render cache
>>*/
>>+
>> insn->bits3.dp_render_cache.binding_table_index =
>>binding_table_index;
>> insn->bits3.dp_render_cache.msg_control = msg_control;
>> insn->bits3.dp_render_cache.pixel_scoreboard_clear = 0;
>>@@ -548,9 +557,9 @@ brw_set_dp_read_message(struct brw_context *brw,
>> insn->bits3.dp_render_cache.response_length = response_length;
>> insn->bits3.dp_render_cache.msg_length = msg_length;
>> insn->bits3.dp_render_cache.end_of_thread = 0;
>>- insn->header.destreg__conditionalmod =
>>BRW_MESSAGE_TARGET_DATAPORT_READ;
>>+ insn->header.destreg__conditionalmod = target_function;
>> /* XXX really need below? */
>>- insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
>>+ insn->bits2.send_gen5.sfid = target_function;
>> insn->bits2.send_gen5.end_of_thread = 0;
>> } else if (intel->gen == 5) {
>> insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
>>@@ -1486,9 +1495,12 @@ void brw_oword_block_write_scratch(struct brw_compile
>>*p,
>> GLuint offset)
>> {
>> struct intel_context *intel = &p->brw->intel;
>>- uint32_t msg_control;
>>+ uint32_t msg_control, msg_type;
>> int mlen;
>>
>>+ if (intel->gen >= 6)
>>+ offset /= 16;
>>+
>> mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
>>
>> if (num_regs == 1) {
>>@@ -1554,13 +1566,22 @@ void brw_oword_block_write_scratch(struct brw_compile
>>*p,
>> }
>>
>> brw_set_dest(p, insn, dest);
>>- brw_set_src0(insn, brw_null_reg());
>>+ if (intel->gen >= 6) {
>>+ brw_set_src0(insn, mrf);
>>+ } else {
>>+ brw_set_src0(insn, brw_null_reg());
>>+ }
>>+
>>+ if (intel->gen >= 6)
>>+ msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
>>+ else
>>+ msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
>>
>> brw_set_dp_write_message(p->brw,
>> insn,
>> 255, /* binding table index (255=stateless) */
>> msg_control,
>>- BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /*
>>msg_type */
>>+ msg_type,
>> mlen,
>> GL_TRUE, /* header_present */
>> 0, /* pixel scoreboard */
>>@@ -1585,9 +1606,13 @@ brw_oword_block_read_scratch(struct brw_compile *p,
>> int num_regs,
>> GLuint offset)
>> {
>>+ struct intel_context *intel = &p->brw->intel;
>> uint32_t msg_control;
>> int rlen;
>>
>>+ if (intel->gen >= 6)
>>+ offset /= 16;
>>+
>> mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
>> dest = retype(dest, BRW_REGISTER_TYPE_UW);
>>
>>@@ -1624,14 +1649,18 @@ brw_oword_block_read_scratch(struct brw_compile *p,
>> insn->header.destreg__conditionalmod = mrf.nr;
>>
>> brw_set_dest(p, insn, dest); /* UW? */
>>- brw_set_src0(insn, brw_null_reg());
>>+ if (intel->gen >= 6) {
>>+ brw_set_src0(insn, mrf);
>>+ } else {
>>+ brw_set_src0(insn, brw_null_reg());
>>+ }
>>
>> brw_set_dp_read_message(p->brw,
>> insn,
>> 255, /* binding table index (255=stateless) */
>> msg_control,
>> BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type
>>*/
>>- 1, /* target cache (render/scratch) */
>>+ BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
>> 1, /* msg_length */
>> rlen);
>> }
>>@@ -1839,7 +1868,7 @@ void brw_dp_READ_4_vs_relative(struct brw_compile *p,
>> bind_table_index,
>> BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
>> msg_type,
>>- 0, /* source cache = data cache */
>>+ BRW_DATAPORT_READ_TARGET_DATA_CACHE,
>> 2, /* msg_length */
>> 1); /* response_length */
>> }
>>diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
>>b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
>>index 479a914..67f29ce 100644
>>--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
>>+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
>>@@ -228,8 +228,6 @@ fs_visitor::assign_regs()
>>
>> if (reg == -1) {
>> fail("no register to spill\n");
>>- } else if (intel->gen >= 6) {
>>- fail("no spilling support on gen6 yet\n");
>> } else {
>> spill_reg(reg);
>> }
>>diff --git a/src/mesa/drivers/dri/i965/brw_wm.c
>>b/src/mesa/drivers/dri/i965/brw_wm.c
>>index 2dd28fd..ab731a8 100644
>>--- a/src/mesa/drivers/dri/i965/brw_wm.c
>>+++ b/src/mesa/drivers/dri/i965/brw_wm.c
>>@@ -185,6 +185,7 @@ static void do_wm_prog( struct brw_context *brw,
>> struct brw_fragment_program *fp,
>> struct brw_wm_prog_key *key)
>> {
>>+ struct intel_context *intel = &brw->intel;
>> struct brw_wm_compile *c;
>> const GLuint *program;
>> GLuint program_size;
>>@@ -238,12 +239,26 @@ static void do_wm_prog( struct brw_context *brw,
>>
>> /* Scratch space is used for register spilling */
>> if (c->last_scratch) {
>>+ uint32_t total_scratch;
>>+
>> /* Per-thread scratch space is power-of-two sized. */
>> for (c->prog_data.total_scratch = 1024;
>> c->prog_data.total_scratch <= c->last_scratch;
>> c->prog_data.total_scratch *= 2) {
>> /* empty */
>> }
>>+ total_scratch = c->prog_data.total_scratch * brw->wm_max_threads;
>>+
>>+ if (brw->wm.scratch_bo && total_scratch > brw->wm.scratch_bo->size) {
>>+ drm_intel_bo_unreference(brw->wm.scratch_bo);
>>+ brw->wm.scratch_bo = NULL;
>>+ }
>>+ if (brw->wm.scratch_bo == NULL) {
>>+ brw->wm.scratch_bo = drm_intel_bo_alloc(intel->bufmgr,
>>+ "wm scratch",
>>+ total_scratch,
>>+ 4096);
>>+ }
>> }
>> else {
>> c->prog_data.total_scratch = 0;
>>diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c
>>b/src/mesa/drivers/dri/i965/brw_wm_state.c
>>index 5b5afc4..be4b260 100644
>>--- a/src/mesa/drivers/dri/i965/brw_wm_state.c
>>+++ b/src/mesa/drivers/dri/i965/brw_wm_state.c
>>@@ -278,30 +278,10 @@ wm_unit_create_from_key(struct brw_context *brw, struct
>>brw_wm_unit_key *key,
>>
>> static void upload_wm_unit( struct brw_context *brw )
>> {
>>- struct intel_context *intel = &brw->intel;
>> struct brw_wm_unit_key key;
>> drm_intel_bo *reloc_bufs[3];
>> wm_unit_populate_key(brw, &key);
>>
>>- /* Allocate the necessary scratch space if we haven't already. Don't
>>- * bother reducing the allocation later, since we use scratch so
>>- * rarely.
>>- */
>>- if (key.total_scratch) {
>>- GLuint total = key.total_scratch * brw->wm_max_threads;
>>-
>>- if (brw->wm.scratch_bo && total > brw->wm.scratch_bo->size) {
>>- drm_intel_bo_unreference(brw->wm.scratch_bo);
>>- brw->wm.scratch_bo = NULL;
>>- }
>>- if (brw->wm.scratch_bo == NULL) {
>>- brw->wm.scratch_bo = drm_intel_bo_alloc(intel->bufmgr,
>>- "wm scratch",
>>- total,
>>- 4096);
>>- }
>>- }
>>-
>> reloc_bufs[0] = brw->wm.prog_bo;
>> reloc_bufs[1] = brw->wm.scratch_bo;
>> reloc_bufs[2] = brw->wm.sampler_bo;
>>diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c
>>b/src/mesa/drivers/dri/i965/gen6_wm_state.c
>>index f4f0475..8215cb1 100644
>>--- a/src/mesa/drivers/dri/i965/gen6_wm_state.c
>>+++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c
>>@@ -184,7 +184,12 @@ upload_wm_state(struct brw_context *brw)
>> OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2));
>> OUT_RELOC(brw->wm.prog_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
>> OUT_BATCH(dw2);
>>- OUT_BATCH(0); /* scratch space base offset */
>>+ if (brw->wm.prog_data->total_scratch) {
>>+ OUT_RELOC(brw->wm.scratch_bo, I915_GEM_DOMAIN_RENDER,
>>I915_GEM_DOMAIN_RENDER,
>>+ ffs(brw->wm.prog_data->total_scratch) - 11);
>>+ } else {
>>+ OUT_BATCH(0);
>>+ }
>> OUT_BATCH(dw4);
>> OUT_BATCH(dw5);
>> OUT_BATCH(dw6);
>>--
>>1.7.4.1
>>
>>_______________________________________________
>>mesa-dev mailing list
>>mesa-dev at lists.freedesktop.org
>>http://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list