[Mesa-dev] [PATCH 1/2] i965/brw: Emit state for hiz and separate stencil buffers
Chad Versace
chad at chad-versace.us
Sat Jun 4 16:29:40 PDT 2011
On 06/03/2011 03:33 PM, Kenneth Graunke wrote:
> On 06/03/2011 12:47 PM, Chad Versace wrote:
>> When emitting 3DSTATE_DEPTH_BUFFER, also emit 3DSTATE_HIER_DEPTH_BUFFER if
>> there is a hiz buffer. Ditto for 3DSTATE_STENCIL_BUFFER and a separate
>> stencil buffer.
>>
>> Signed-off-by: Chad Versace<chad at chad-versace.us>
>
> Overall, looks good...a few comments inline.
>
>> ---
>> src/mesa/drivers/dri/i965/brw_misc_state.c | 114 ++++++++++++++++++++--
>> src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 1 +
>> 2 files changed, 106 insertions(+), 9 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
>> index 3ec9009..84120da 100644
>> --- a/src/mesa/drivers/dri/i965/brw_misc_state.c
>> +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
>> @@ -202,6 +202,8 @@ static void prepare_depthbuffer(struct brw_context *brw)
>>
>> if (drb)
>> brw_add_validated_bo(brw, drb->region->buffer);
>> + if (drb&& drb->hiz_region)
>> + brw_add_validated_bo(brw, drb->hiz_region->buffer);
>> if (srb)
>> brw_add_validated_bo(brw, srb->region->buffer);
>> }
>> @@ -212,14 +214,28 @@ static void emit_depthbuffer(struct brw_context *brw)
>> struct gl_context *ctx =&intel->ctx;
>> struct gl_framebuffer *fb = ctx->DrawBuffer;
>> /* _NEW_BUFFERS */
>> - struct intel_renderbuffer *irb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
>> + struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
>> + struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
>> + struct intel_region *hiz_region = depth_irb ? depth_irb->hiz_region : NULL;
>> unsigned int len;
>>
>> - /* If we're combined depth stencil but no depth is attached, look
>> - * up stencil.
>> + /*
>> + * If depth and stencil buffers are identical, then don't use separate
>> + * stencil.
>> */
>> - if (!irb)
>> - irb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
>> + if (depth_irb&& depth_irb == stencil_irb) {
>> + stencil_irb = NULL;
>> + }
>> +
>> + /*
>> + * If stencil buffer uses combined depth/stencil format, but no depth buffer
>> + * is attached, then use stencil buffer as depth buffer.
>> + */
>> + if (!depth_irb&& stencil_irb
>> +&& stencil_irb->Base.Format == MESA_FORMAT_S8_Z24) {
>> + depth_irb = stencil_irb;
>> + stencil_irb = NULL;
>> + }
>>
>> if (intel->gen>= 6)
>> len = 7;
>> @@ -228,7 +244,7 @@ static void emit_depthbuffer(struct brw_context *brw)
>> else
>> len = 5;
>>
>> - if (!irb) {
>> + if (!depth_irb&& !stencil_irb) {
>> BEGIN_BATCH(len);
>> OUT_BATCH(_3DSTATE_DEPTH_BUFFER<< 16 | (len - 2));
>> OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT<< 18) |
>> @@ -244,11 +260,57 @@ static void emit_depthbuffer(struct brw_context *brw)
>> OUT_BATCH(0);
>>
>> ADVANCE_BATCH();
>> +
>> + } else if (!depth_irb&& stencil_irb) {
>> + /*
>> + * There exists a separate stencil buffer but no depth buffer.
>> + *
>> + * The stencil buffer inherits most of its fields from
>> + * 3DSTATE_DEPTH_BUFFER: namely the tile walk, surface type, width, and
>> + * height.
>> + *
>> + * Since the stencil buffer has quirky pitch requirements, its region
>> + * was allocated with half height and double cpp. So we need
>> + * a multiplier of 2 to obtain the surface's real height.
>> + *
>> + * Enable the hiz bit because it and the separate stencil bit must have
>> + * the same value. From Section 2.11.5.6.1.1 3DSTATE_DEPTH_BUFFER, Bit
>> + * 1.21 "Separate Stencil Enable":
>> + * [DevIL]: If this field is enabled, Hierarchical Depth Buffer
>> + * Enable must also be enabled.
>> + *
>> + * [DevGT]: This field must be set to the same value (enabled or
>> + * disabled) as Hierarchical Depth Buffer Enable
>> + */
>> + assert(intel->has_separate_stencil);
>> + assert(stencil_irb->Base.Format == MESA_FORMAT_S8);
>> +
>> + BEGIN_BATCH(len);
>> + OUT_BATCH(_3DSTATE_DEPTH_BUFFER<< 16 | (len - 2));
>> + OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT<< 18) |
>> + (1<< 21) | /* separate stencil enable */
>> + (1<< 22) | /* hiz enable */
>> + (BRW_TILEWALK_YMAJOR<< 26) |
>> + (BRW_SURFACE_2D<< 29));
>> + OUT_BATCH(0);
>> + OUT_BATCH(((stencil_irb->region->width - 1)<< 6) |
>> + (2 * stencil_irb->region->height - 1)<< 19);
>> + OUT_BATCH(0);
>> + OUT_BATCH(0);
>> +
>> + if (intel->gen>= 6)
>> + OUT_BATCH(0);
>> +
>> + ADVANCE_BATCH();
>> +
>> } else {
>> - struct intel_region *region = irb->region;
>> + struct intel_region *region = depth_irb->region;
>> unsigned int format;
>> uint32_t tile_x, tile_y, offset;
>>
>> + /* If using separate stencil, hiz must be enabled. */
>> + assert(!stencil_irb || hiz_region);
>> +
>> switch (region->cpp) {
>> case 2:
>> format = BRW_DEPTHFORMAT_D16_UNORM;
>> @@ -256,6 +318,8 @@ static void emit_depthbuffer(struct brw_context *brw)
>> case 4:
>> if (intel->depth_buffer_is_float)
>> format = BRW_DEPTHFORMAT_D32_FLOAT;
>> + else if (hiz_region)
>> + format = BRW_DEPTHFORMAT_D24_UNORM_X8_UINT;
>> else
>> format = BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
>> break;
>> @@ -267,11 +331,14 @@ static void emit_depthbuffer(struct brw_context *brw)
>> offset = intel_region_tile_offsets(region,&tile_x,&tile_y);
>>
>> assert(intel->gen< 6 || region->tiling == I915_TILING_Y);
>> + assert(!hiz_region || region->tiling == I915_TILING_Y);
>>
>> BEGIN_BATCH(len);
>> OUT_BATCH(_3DSTATE_DEPTH_BUFFER<< 16 | (len - 2));
>> OUT_BATCH(((region->pitch * region->cpp) - 1) |
>> (format<< 18) |
>> + ((hiz_region ? 1 : 0)<< 21) | /* separate stencil enable */
>> + ((hiz_region ? 1 : 0)<< 22) | /* hiz enable */
>
> This surprised me for a moment. The reason these two need to be the same is the DevGT comment - HiZ if-and-only-if Separate
> Stencil. But stencil_irb may not exist here. I don't know if that's really a problem, though...is it?
It doesn't cause problems. I've tested this on FBO's with depth attachment but no stencil attachment. You can see the test
results here [1]. See hiz-depth-*-fbo-d24-s0 in column 6.
[1] http://people.freedesktop.org/~chadversary/piglit-results/2011-06-04-1600-hiz-compare/summary/index.html
>
>> (BRW_TILEWALK_YMAJOR<< 26) |
>> ((region->tiling != I915_TILING_NONE)<< 27) |
>> (BRW_SURFACE_2D<< 29));
>> @@ -294,8 +361,37 @@ static void emit_depthbuffer(struct brw_context *brw)
>> ADVANCE_BATCH();
>> }
>>
>> - /* Initialize it for safety. */
>> - if (intel->gen>= 6) {
>> + /* Emit hiz buffer. */
>> + if (hiz_region) {
>> + BEGIN_BATCH(3);
>> + OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER<< 16) | (3 - 2));
>> + OUT_BATCH(hiz_region->pitch * hiz_region->cpp - 1);
>> + OUT_RELOC(hiz_region->buffer,
>> + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
>> + 0);
>> + ADVANCE_BATCH();
>> + }
>> +
>> + /* Emit stencil buffer. */
>> + if (stencil_irb) {
>> + BEGIN_BATCH(3);
>> + OUT_BATCH((_3DSTATE_STENCIL_BUFFER<< 16) | (3 - 2));
>> + OUT_BATCH(stencil_irb->region->pitch * stencil_irb->region->cpp - 1);
>> + OUT_RELOC(stencil_irb->region->buffer,
>> + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
>> + 0);
>> + ADVANCE_BATCH();
>> + }
>
> Do we need to emit 3DSTATE_STENCIL_BUFFER with all 0's in the stencil_irb == NULL case? Ditto for HiZ I guess. Just being a
> bit paranoid.
The test results for these paranoiac cases pass, so paranoia is unneeded. Regarding "Do we need to emit 3DSTATE_STENCIL_BUFFER
with all 0's in the stencil_irb == NULL case", see tests:
* hiz-depth-test-fbo-d24-s0 : column 6
* hiz-depth-stencil-fbo-d24-s0 : columns 3, 6
Regarding "Ditto for HiZ", the following test runs emit a stencil buffer but no hiz buffer:
* hiz-stencil-test-fbo-d0-s8 : column 6
* hiz-stencil-read-fbo-d0-s8 : column 6
* hiz-depth-stencil-fbo-d0-s8 : column 6
>
>> + /*
>> + * On Gen>= 6, emit clear params for safety. If using hiz, then clear
>> + * params must be emitted.
>> + *
>> + * From Section 2.11.5.6.4.1 3DSTATE_CLEAR_PARAMS:
>> + * 3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE packet
>> + * when HiZ is enabled and the DEPTH_BUFFER_STATE changes.
>> + */
>> + if (intel->gen>= 6 || hiz_region) {
>> BEGIN_BATCH(2);
>> OUT_BATCH(_3DSTATE_CLEAR_PARAMS<< 16 | (2 - 2));
>> OUT_BATCH(0);
>> diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
>> index 6c1eba6..5dadb5b 100644
>> --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
>> +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
>> @@ -134,6 +134,7 @@ brw_render_target_supported(gl_format format)
>> */
>> if (format == MESA_FORMAT_S8_Z24 ||
>> format == MESA_FORMAT_X8_Z24 ||
>> + format == MESA_FORMAT_S8 ||
>> format == MESA_FORMAT_Z16) {
>> return true;
>> }
>
> This seems unrelated to the rest of the patch, but is necessary.
True. I agree its only tangentially related, but this hunk doesn't make sense in its own patch either. I can't think of a
descriptive one-line commit title for such an isolated change.
> While you're at it, please add "case MESA_FORMAT_X8_Z24:" to
> translate_tex_format a few lines later. I had to add that to avoid assertion failures on Ivybridge when running
> glsl-fs-shadow2d.shader_test.
On your suggestion, I added this hunk to the patch.
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 5dadb5b..f560bc3 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -169,6 +169,7 @@ translate_tex_format(gl_format mesa_format,
return BRW_SURFACEFORMAT_L16_UNORM;
case MESA_FORMAT_S8_Z24:
+ case MESA_FORMAT_X8_Z24:
/* XXX: these different surface formats don't seem to
* make any difference for shadow sampler/compares.
*/
>
> --Kenneth
--
Chad Versace
chad at chad-versace.us
More information about the mesa-dev
mailing list