[Mesa-dev] [PATCH v2 3/5] anv: Add support for the PMA fix on Broadwell
Jason Ekstrand
jason at jlekstrand.net
Thu Feb 9 05:38:52 UTC 2017
On Wed, Feb 8, 2017 at 8:11 PM, Jason Ekstrand <jason at jlekstrand.net> wrote:
> On Wed, Feb 8, 2017 at 6:27 PM, Jason Ekstrand <jason at jlekstrand.net>
> wrote:
>
>> On Wed, Feb 8, 2017 at 5:34 PM, Nanley Chery <nanleychery at gmail.com>
>> wrote:
>>
>>> On Thu, Feb 02, 2017 at 01:26:05PM -0800, Jason Ekstrand wrote:
>>> > In order to get good performance numbers for this, I had to hack up the
>>> > driver to whack wm_prog_data::uses_kill to true to emulate a discard
>>> and
>>> > used the Sascha "shadowmapping" demo. Setting uses_kill to true
>>> dropped
>>> > the framerate on the demo by 25-30%. Enabling the PMA fix brought it
>>> > back up to around 90% of the original framerate. This doesn't seem to
>>> > really impact Dota 2; probably because it doesn't use 16-bit depth.
>>> >
>>> > Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
>>> > ---
>>> > src/intel/vulkan/TODO | 1 -
>>> > src/intel/vulkan/anv_cmd_buffer.c | 2 +
>>> > src/intel/vulkan/anv_genX.h | 3 +
>>> > src/intel/vulkan/anv_private.h | 17 +++++
>>> > src/intel/vulkan/gen7_cmd_buffer.c | 7 ++
>>> > src/intel/vulkan/gen8_cmd_buffer.c | 133
>>> +++++++++++++++++++++++++++++++++++++
>>> > src/intel/vulkan/genX_blorp_exec.c | 5 ++
>>> > src/intel/vulkan/genX_cmd_buffer.c | 15 ++++-
>>> > src/intel/vulkan/genX_pipeline.c | 38 +++++++++++
>>> > 9 files changed, 219 insertions(+), 2 deletions(-)
>>> >
>>> > diff --git a/src/intel/vulkan/TODO b/src/intel/vulkan/TODO
>>> > index 38acc0d..f8b73a1 100644
>>> > --- a/src/intel/vulkan/TODO
>>> > +++ b/src/intel/vulkan/TODO
>>> > @@ -12,5 +12,4 @@ Performance:
>>> > - Compressed multisample support
>>> > - Pushing pieces of UBOs?
>>> > - Enable guardband clipping
>>> > - - pma stall workaround
>>> > - Use soft-pin to avoid relocations
>>> > diff --git a/src/intel/vulkan/anv_cmd_buffer.c
>>> b/src/intel/vulkan/anv_cmd_buffer.c
>>> > index 5886fa6..8c08f8d 100644
>>> > --- a/src/intel/vulkan/anv_cmd_buffer.c
>>> > +++ b/src/intel/vulkan/anv_cmd_buffer.c
>>> > @@ -135,6 +135,8 @@ anv_cmd_state_reset(struct anv_cmd_buffer
>>> *cmd_buffer)
>>> > state->restart_index = UINT32_MAX;
>>> > state->dynamic = default_dynamic_state;
>>> > state->need_query_wa = true;
>>> > + state->pma_fix_enabled = false;
>>> > + state->hiz_enabled = false;
>>> >
>>> > if (state->attachments != NULL) {
>>> > vk_free(&cmd_buffer->pool->alloc, state->attachments);
>>> > diff --git a/src/intel/vulkan/anv_genX.h b/src/intel/vulkan/anv_genX.h
>>> > index d04fe38..67147b0 100644
>>> > --- a/src/intel/vulkan/anv_genX.h
>>> > +++ b/src/intel/vulkan/anv_genX.h
>>> > @@ -55,6 +55,9 @@ void genX(cmd_buffer_flush_dynamic_state)(struct
>>> anv_cmd_buffer *cmd_buffer);
>>> >
>>> > void genX(cmd_buffer_flush_compute_state)(struct anv_cmd_buffer
>>> *cmd_buffer);
>>> >
>>> > +void genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer
>>> *cmd_buffer,
>>> > + bool enable);
>>> > +
>>> > void
>>> > genX(emit_urb_setup)(struct anv_device *device, struct anv_batch
>>> *batch,
>>> > const struct gen_l3_config *l3_config,
>>> > diff --git a/src/intel/vulkan/anv_private.h
>>> b/src/intel/vulkan/anv_private.h
>>> > index 4fe3ebc..6efe4ea 100644
>>> > --- a/src/intel/vulkan/anv_private.h
>>> > +++ b/src/intel/vulkan/anv_private.h
>>> > @@ -1163,6 +1163,20 @@ struct anv_cmd_state {
>>> > bool need_query_wa;
>>> >
>>> > /**
>>> > + * Whether or not the gen8 PMA fix is enabled. We ensure that, at
>>> the top
>>> > + * of any command buffer it disabled by disabling it in
>>> EndCommandBuffer
>>> ^
>>> is?
>>>
>>> > + * and before invoking the secondary in ExecuteCommands.
>>> > + */
>>> > + bool pma_fix_enabled;
>>> > +
>>> > + /**
>>> > + * Whether or not we now for certain that HiZ is enabled for the
>>> current
>>> ^
>>> know
>>>
>>> > + * subpass. If, for whatever reason, we are unsure as to whether
>>> HiZ is
>>> > + * enabled or not, this will be false.
>>> > + */
>>> > + bool hiz_enabled;
>>> > +
>>> > + /**
>>> > * Array length is anv_cmd_state::pass::attachment_count. Array
>>> content is
>>> > * valid only when recording a render pass instance.
>>> > */
>>> > @@ -1465,8 +1479,11 @@ struct anv_pipeline {
>>> >
>>> > uint32_t cs_right_mask;
>>> >
>>> > + bool writes_depth;
>>> > + bool depth_test_enable;
>>> > bool writes_stencil;
>>> > bool depth_clamp_enable;
>>> > + bool kill_pixel;
>>> >
>>> > struct {
>>> > uint32_t sf[7];
>>> > diff --git a/src/intel/vulkan/gen7_cmd_buffer.c
>>> b/src/intel/vulkan/gen7_cmd_buffer.c
>>> > index 013ed87..c1a25e8 100644
>>> > --- a/src/intel/vulkan/gen7_cmd_buffer.c
>>> > +++ b/src/intel/vulkan/gen7_cmd_buffer.c
>>> > @@ -260,6 +260,13 @@ genX(cmd_buffer_flush_dynamic_state)(struct
>>> anv_cmd_buffer *cmd_buffer)
>>> > cmd_buffer->state.dirty = 0;
>>> > }
>>> >
>>> > +void
>>> > +genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer,
>>> > + bool enable)
>>> > +{
>>> > + /* The NP PMA fix doesn't exist on gen7 */
>>> > +}
>>> > +
>>> > void genX(CmdSetEvent)(
>>> > VkCommandBuffer commandBuffer,
>>> > VkEvent event,
>>> > diff --git a/src/intel/vulkan/gen8_cmd_buffer.c
>>> b/src/intel/vulkan/gen8_cmd_buffer.c
>>> > index 8c8de62..271ab3f 100644
>>> > --- a/src/intel/vulkan/gen8_cmd_buffer.c
>>> > +++ b/src/intel/vulkan/gen8_cmd_buffer.c
>>> > @@ -155,6 +155,135 @@ __emit_sf_state(struct anv_cmd_buffer
>>> *cmd_buffer)
>>> > #endif
>>> >
>>> > void
>>> > +genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer,
>>> bool enable)
>>> > +{
>>> > +#if GEN_GEN == 8
>>> > + if (cmd_buffer->state.pma_fix_enabled == enable)
>>> > + return;
>>> > +
>>> > + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
>>> > + pc.DepthCacheFlushEnable = true;
>>> > + pc.CommandStreamerStallEnable = true;
>>> > + pc.RenderTargetCacheFlushEnable = true;
>>>
>>> Instead of flushing the depth and RT caches, what do you think about
>>> implementing a stall that writes 0 to the workaround bo? That should
>>> consume less bandwidth.
>>>
>>> To make the pipeline idle, flushing the caches shouldn't be necessary,
>>> we should be able to do it with a valid pipecontrol packet that has the
>>> CS stall bit set.
>>>
>>
>> Without having an extremely good idea of how these bits interact with
>> caching, I'm very reluctant to try too hard to over-optimize the pipe
>> control.
>>
>>
>>> The BDW PRM states:
>>>
>>> If the stall bit is set, the command streamer waits until the pipe is
>>> completely flushed.
>>>
>>
>> It waits until whatever flush/stall operation you gave it is finished.
>> You also need to specify a stall/flush operation. If you just do a depth
>> stall for instance and no depth reads/writes are in-flight, I don't think
>> it actually stalls at all. We may be able to use StallAtPixelScoreboard
>> instead but, again, I don't know for sure how this interacts with caches.
>>
>>
>>> > + }
>>> > +
>>> > + uint32_t cache_mode;
>>> > + anv_pack_struct(&cache_mode, GENX(CACHE_MODE_1),
>>> > + .NPPMAFixEnable = enable,
>>> > + .NPEarlyZFailsDisableMask = enable,
>>> ^
>>> I don't think you intended to set the mask here. It should be:
>>>
>>> .NPEarlyZFailsDisable = enable,
>>>
>>> Does fixing this impact your FPS measurements significantly?
>>>
>>
>> Thanks for catching that! I'm a bit surprised that it worked at all.
>> I'll definitely re-measure.
>>
>
> I just ran Dota2 and fixing this seems to help by around 4%! I'll do a
> full re-run and get fresh numbers.
>
I did a full re-benchmark of the series with the fixes from your review.
Now Dota 2 is helped by 8-9% by the PMA fix and none of the other patches
seem to help. I'm very confused.
When I say "none of the other patches help" that's not entirely true. I'm
seeing occational hangs (1 out of 4 runs) and they they seem to
substantially decrease with the patch to disable stencil writes when the
writemasks are zero. I'm not sure what's going on there. Maybe we're
missing a workaround somewhere?
--Jason
>
>>
>>> > + .NPPMAFixEnableMask = true,
>>> > + .NPEarlyZFailsDisableMask = true);
>>> > + anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_IMM),
>>> lri) {
>>> > + lri.RegisterOffset = GENX(CACHE_MODE_1_num);
>>> > + lri.DataDWord = cache_mode;
>>> > + }
>>> > +
>>> > + /* After the LRI, a PIPE_CONTROL with both the Depth Stall and
>>> Depth Cache
>>> > + * Flush bits is often necessary. We do it regardless because
>>> it's easier.
>>> > + * The render cache flush is also necessary if stencil writes are
>>> enabled.
>>> > + */
>>>
>>> I couldn't find a requirement to flush these specific caches in the
>>> docs, but I did find:
>>>
>>
>> Both of these flushes are documented in the bspec (not PRM) docs for
>> PIPE_CONTROL.
>>
>>
>>> To ensure this command gets executed before upcoming commands in the
>>> ring, either a stalling pipecontrol should be sent after this
>>> command, or MMIO 0x20C0 bit 7 should be set to 1.
>>>
>>> Perhaps we could use the flush I suggested earlier?
>>>
>>> > + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
>>> > + pc.DepthStallEnable = true;
>>> > + pc.DepthCacheFlushEnable = true;
>>> > + pc.RenderTargetCacheFlushEnable = true;
>>> > + }
>>> > +
>>> > + cmd_buffer->state.pma_fix_enabled = enable;
>>> > +#endif /* GEN_GEN == 8 */
>>> > +}
>>> > +
>>> > +static inline bool
>>> > +want_depth_pma_fix(struct anv_cmd_buffer *cmd_buffer)
>>> > +{
>>> > + assert(GEN_GEN == 8);
>>> > +
>>> > + /* From the Broadwell PRM Vol. 2c CACHE_MODE_1::NP_PMA_FIX_ENABLE:
>>> > + *
>>> > + * SW must set this bit in order to enable this fix when
>>> following
>>> > + * expression is TRUE.
>>> > + *
>>> > + * 3DSTATE_WM::ForceThreadDispatch != 1 &&
>>> > + * !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0) &&
>>> > + * (3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL) &&
>>> > + * (3DSTATE_DEPTH_BUFFER::HIZ Enable) &&
>>> > + * !(3DSTATE_WM::EDSC_Mode == EDSC_PREPS) &&
>>> > + * (3DSTATE_PS_EXTRA::PixelShaderValid) &&
>>> > + * !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
>>> > + * 3DSTATE_WM_HZ_OP::DepthBufferResolve ||
>>> > + * 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable
>>> ||
>>> > + * 3DSTATE_WM_HZ_OP::StencilBufferClear) &&
>>> > + * (3DSTATE_WM_DEPTH_STENCIL::DepthTestEnable) &&
>>> > + * (((3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
>>> > + * 3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
>>> > + * 3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
>>> > + * 3DSTATE_PS_BLEND::AlphaTestEnable ||
>>> > + * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) &&
>>> > + * 3DSTATE_WM::ForceKillPix != ForceOff &&
>>> > + * ((3DSTATE_WM_DEPTH_STENCIL::DepthWriteEnable &&
>>> > + * 3DSTATE_DEPTH_BUFFER::DEPTH_WRITE_ENABLE) ||
>>> > + * (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
>>> > + * 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE &&
>>> > + * 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE))) ||
>>> > + * (3DSTATE_PS_EXTRA:: Pixel Shader Computed Depth mode !=
>>> PSCDEPTH_OFF))
>>> > + *
>>> > + * This function only takes care of the pipeline parts of the
>>> equation.
>>>
>>> Which parts are the pipeline parts? It looks like you take care of the
>>> entire equation in this function.
>>>
>>
>> Yes, I do.
>>
>>
>>> > + */
>>> > +
>>> > + /* These are always true:
>>> > + * 3DSTATE_WM::ForceThreadDispatch != 1 &&
>>> > + * !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0)
>>> > + */
>>>
>>> I couldn't quite understand what ForcedSampleCount does from looking at
>>> the HW docs. If you know, could you tell me what this does and why it
>>> wouldn't change under any circumstance in the future? Otherwise, I
>>> think we should put an assertion where we assign ForcedSampleCount and
>>> reference this PMA optimization.
>>>
>>
>> Honestly, I'm not 100% sure what it's for. I think it lets you make the
>> rasterizer run at a different number of samples than the underlying
>> surface. We've never used this bit in Vulkan or GL. Also, there's no flag
>> for us to assert on.
>>
>>
>>> > +
>>> > + /* We only enable the PMA fix if we know for certain that HiZ is
>>> enabled.
>>> > + * If we don't know whether HiZ is enabled or not, we disable the
>>> PMA fix
>>> > + * and there is no harm.
>>> > + *
>>> > + * (3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL) &&
>>> > + * 3DSTATE_DEPTH_BUFFER::HIZ Enable
>>> > + */
>>> > + if (!cmd_buffer->state.hiz_enabled)
>>> > + return false;
>>> > +
>>> > + /* 3DSTATE_PS_EXTRA::PixelShaderValid */
>>> > + struct anv_pipeline *pipeline = cmd_buffer->state.pipeline;
>>> > + if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT))
>>> > + return false;
>>> > +
>>> > + /* !(3DSTATE_WM::EDSC_Mode == EDSC_PREPS) */
>>> > + const struct brw_wm_prog_data *wm_prog_data =
>>> get_wm_prog_data(pipeline);
>>> > + if (wm_prog_data->early_fragment_tests)
>>> > + return false;
>>>
>>> In keeping with your top-to-bottom evaluation of the logical statement,
>>> could you move this part to be above the PixelShaderValid part?
>>>
>>
>> Not really. If we don't' have a fragment shader, we wm_prog_data is NULL
>>
>>
>>> > +
>>> > + /* We never use anv_pipeline for HiZ ops so this is trivially true:
>>> > + * !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
>>> ^
>>> misaligned asterisk
>>>
>>
>> Fixed.
>>
>>
>>> > + * 3DSTATE_WM_HZ_OP::DepthBufferResolve ||
>>> > + * 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable
>>> ||
>>> > + * 3DSTATE_WM_HZ_OP::StencilBufferClear)
>>> > + */
>>> > +
>>> > + /* 3DSTATE_WM_DEPTH_STENCIL::DepthTestEnable */
>>> > + if (!pipeline->depth_test_enable)
>>> > + return false;
>>> > +
>>> > + /* (((3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
>>> > + * 3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
>>> > + * 3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
>>> > + * 3DSTATE_PS_BLEND::AlphaTestEnable ||
>>> > + * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) &&
>>> > + * 3DSTATE_WM::ForceKillPix != ForceOff &&
>>> > + * ((3DSTATE_WM_DEPTH_STENCIL::DepthWriteEnable &&
>>> > + * 3DSTATE_DEPTH_BUFFER::DEPTH_WRITE_ENABLE) ||
>>> > + * (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
>>> > + * 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE &&
>>> > + * 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE))) ||
>>> > + * (3DSTATE_PS_EXTRA:: Pixel Shader Computed Depth mode !=
>>> PSCDEPTH_OFF))
>>> > + */
>>> > + return (pipeline->kill_pixel && (pipeline->writes_depth ||
>>> > + pipeline->writes_stencil)) ||
>>>
>>> anv_pipeline::writes_stencil !=
>>> (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
>>> 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE &&
>>> 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE);
>>>
>>> The last two lines are not accounted for - we don't detect if the
>>> currently bound depth_stencil attachment lacks or possesses a stencil
>>> buffer.
>>>
>>
>> We should be checking for the presence of a stencil aspect in the
>> pipeline. I think that got missed when I added the writes_stencil bit. We
>> do by the end of the series, but I'll patch up the disable stencil writes
>> patch to make it check.
>>
>>
>>> This is a valid scenario according to section 25.9. Stencil Test of the
>>> Vulkan spec, which states:
>>>
>>> If there is no stencil framebuffer attachment, stencil modification
>>> cannot occur, and it is as if the stencil tests always pass.
>>>
>>> anv_pipeline::writes_depth should be correct because hiz_enabled is true
>>> by the time we reach it. For hiz_enabled to be true, there must be a
>>> depth buffer present.
>>>
>>
>> Yup
>>
>>
>>> > + wm_prog_data->computed_depth_mode != PSCDEPTH_OFF;
>>> > +}
>>> > +
>>> > +void
>>> > genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer
>>> *cmd_buffer)
>>> > {
>>> > struct anv_pipeline *pipeline = cmd_buffer->state.pipeline;
>>> > @@ -211,6 +340,7 @@ genX(cmd_buffer_flush_dynamic_state)(struct
>>> anv_cmd_buffer *cmd_buffer)
>>> > }
>>> >
>>> > if (cmd_buffer->state.dirty & (ANV_CMD_DIRTY_PIPELINE |
>>> > + ANV_CMD_DIRTY_RENDER_TARGETS |
>>>
>>> Why is this necessary?
>>>
>>
>> We set RENDER_TARGETS dirty when the depth buffer changes due to
>> NextSubpass or BeginCommandBuffer
>>
>>
>>> > ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK
>>> |
>>> > ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK))
>>> {
>>> > uint32_t wm_depth_stencil_dw[GENX(3DSTA
>>> TE_WM_DEPTH_STENCIL_length)];
>>> > @@ -234,6 +364,9 @@ genX(cmd_buffer_flush_dynamic_state)(struct
>>> anv_cmd_buffer *cmd_buffer)
>>> >
>>> > anv_batch_emit_merge(&cmd_buffer->batch, wm_depth_stencil_dw,
>>> > pipeline->gen8.wm_depth_stencil);
>>> > +
>>> > + genX(cmd_buffer_enable_pma_fix)(cmd_buffer,
>>> > + want_depth_pma_fix(cmd_buffer)
>>> );
>>> > }
>>> > #else
>>> > if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS)
>>> {
>>> > diff --git a/src/intel/vulkan/genX_blorp_exec.c
>>> b/src/intel/vulkan/genX_blorp_exec.c
>>> > index 663e6c9..6f0b063 100644
>>> > --- a/src/intel/vulkan/genX_blorp_exec.c
>>> > +++ b/src/intel/vulkan/genX_blorp_exec.c
>>> > @@ -154,6 +154,11 @@ genX(blorp_exec)(struct blorp_batch *batch,
>>> >
>>> > genX(cmd_buffer_emit_gen7_depth_flush)(cmd_buffer);
>>> >
>>> > + /* BLORP doesn't do anything fancy with depth such as discards, so
>>> we want
>>> > + * the PMA fix off. Also, off is always the safe option.
>>> > + */
>>> > + genX(cmd_buffer_enable_pma_fix)(cmd_buffer, false);
>>> > +
>>> > blorp_exec(batch, params);
>>> >
>>> > cmd_buffer->state.vb_dirty = ~0;
>>> > diff --git a/src/intel/vulkan/genX_cmd_buffer.c
>>> b/src/intel/vulkan/genX_cmd_buffer.c
>>> > index b6b7f74..66de93a 100644
>>> > --- a/src/intel/vulkan/genX_cmd_buffer.c
>>> > +++ b/src/intel/vulkan/genX_cmd_buffer.c
>>> > @@ -627,6 +627,11 @@ genX(EndCommandBuffer)(
>>> > {
>>> > ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
>>> >
>>> > + /* We want every command buffer to start with the PMA fix in a
>>> known state,
>>> > + * so we disable it at the end of the command buffer.
>>> > + */
>>> > + genX(cmd_buffer_enable_pma_fix)(cmd_buffer, false);
>>> > +
>>> > genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
>>> >
>>> > anv_cmd_buffer_end_batch_buffer(cmd_buffer);
>>> > @@ -644,6 +649,11 @@ genX(CmdExecuteCommands)(
>>> >
>>> > assert(primary->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
>>> >
>>> > + /* The secondary command buffers will assume that the PMA fix is
>>> disabled
>>> > + * when they begin executing. Make sure this is true.
>>> > + */
>>> > + genX(cmd_buffer_enable_pma_fix)(primary, false);
>>> > +
>>> > for (uint32_t i = 0; i < commandBufferCount; i++) {
>>> > ANV_FROM_HANDLE(anv_cmd_buffer, secondary, pCmdBuffers[i]);
>>> >
>>> > @@ -2181,7 +2191,8 @@ cmd_buffer_emit_depth_stencil(struct
>>> anv_cmd_buffer *cmd_buffer)
>>> > const bool has_stencil =
>>> > image && (image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT);
>>> >
>>> > - /* FIXME: Implement the PMA stall W/A */
>>> > + cmd_buffer->state.hiz_enabled = has_hiz;
>>> > +
>>> > /* FIXME: Width and Height are wrong */
>>> >
>>> > genX(cmd_buffer_emit_gen7_depth_flush)(cmd_buffer);
>>> > @@ -2419,6 +2430,8 @@ void genX(CmdEndRenderPass)(
>>> >
>>> > anv_cmd_buffer_resolve_subpass(cmd_buffer);
>>> >
>>> > + cmd_buffer->state.hiz_enabled = false;
>>> > +
>>> > #ifndef NDEBUG
>>> > anv_dump_add_framebuffer(cmd_buffer,
>>> cmd_buffer->state.framebuffer);
>>> > #endif
>>> > diff --git a/src/intel/vulkan/genX_pipeline.c
>>> b/src/intel/vulkan/genX_pipeline.c
>>> > index 18fe48c..0588d01 100644
>>> > --- a/src/intel/vulkan/genX_pipeline.c
>>> > +++ b/src/intel/vulkan/genX_pipeline.c
>>> > @@ -653,6 +653,8 @@ emit_ds_state(struct anv_pipeline *pipeline,
>>> > * to make sure it's initialized to something useful.
>>> > */
>>> > pipeline->writes_stencil = false;
>>> > + pipeline->writes_depth = false;
>>> > + pipeline->depth_test_enable = false;
>>> > memset(depth_stencil_dw, 0, sizeof(depth_stencil_dw));
>>> > return;
>>> > }
>>> > @@ -711,6 +713,9 @@ emit_ds_state(struct anv_pipeline *pipeline,
>>> > if (info->depthTestEnable && info->depthCompareOp ==
>>> VK_COMPARE_OP_EQUAL)
>>> > depth_stencil.DepthBufferWriteEnable = false;
>>> >
>>> > + pipeline->writes_depth = depth_stencil.DepthBufferWriteEnable;
>>> > + pipeline->depth_test_enable = depth_stencil.DepthTestEnable;
>>> > +
>>> > #if GEN_GEN <= 7
>>> > GENX(DEPTH_STENCIL_STATE_pack)(NULL, depth_stencil_dw,
>>> &depth_stencil);
>>> > #else
>>> > @@ -1429,6 +1434,38 @@ emit_3dstate_vf_topology(struct anv_pipeline
>>> *pipeline)
>>> > }
>>> > #endif
>>> >
>>> > +static void
>>> > +compute_kill_pixel(struct anv_pipeline *pipeline,
>>> > + const VkPipelineMultisampleStateCreateInfo
>>> *ms_info,
>>> > + const struct anv_subpass *subpass)
>>> > +{
>>> > + if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
>>> > + pipeline->kill_pixel = false;
>>> > + return;
>>> > + }
>>> > +
>>> > + const struct brw_wm_prog_data *wm_prog_data =
>>> get_wm_prog_data(pipeline);
>>> > +
>>> > + /* This computes the KillPixel portion of the computation for
>>> whether or
>>> > + * not we want to enable the PMA fix on gen8. It's given by this
>>> chunk of
>>> > + * the giant formula:
>>> > + *
>>> > + * (3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
>>> > + * 3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
>>> > + * 3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
>>> > + * 3DSTATE_PS_BLEND::AlphaTestEnable ||
>>> > + * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable)
>>> > + *
>>> > + * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable is always false and
>>> so is
>>> > + * 3DSTATE_PS_BLEND::AlphaTestEnable since Vulkan doesn't have a
>>> concept
>>> > + * of an alpha test.
>>> > + */
>>> > + pipeline->kill_pixel =
>>> > + subpass->has_ds_self_dep || wm_prog_data->uses_kill ||
>>>
>>> Why do we set kill_pixel in the presence of has_ds_self_dep?
>>>
>>> Section 7.1 of the Vulkan spec states:
>>>
>>> If a subpass uses the same attachment as both an input attachment and
>>> either a color attachment or a depth/stencil attachment, writes via
>>> the color or depth/stencil attachment are not automatically made
>>> visible to reads via the input attachment, causing a feedback loop,
>>> except in any of the following conditions:
>>>
>>> [...]
>>>
>>> I don't see how any of the conditions are satisfied through the use
>>> of kill_pixel.
>>>
>>
>> There's a really good comment where we set 3DSTATE_PS::PixelShaderKillsPixel
>> and 3DSTATE_PS_EXTRA::PixelShaderKillsPixel
>>
>>
>>> > + wm_prog_data->uses_omask ||
>>> > + (ms_info && ms_info->alphaToCoverageEnable);
>>> > +}
>>> > +
>>> > static VkResult
>>> > genX(graphics_pipeline_create)(
>>> > VkDevice _device,
>>> > @@ -1466,6 +1503,7 @@ genX(graphics_pipeline_create)(
>>> > emit_ds_state(pipeline, pCreateInfo->pDepthStencilState, pass,
>>> subpass);
>>> > emit_cb_state(pipeline, pCreateInfo->pColorBlendState,
>>> > pCreateInfo->pMultisampleState);
>>> > + compute_kill_pixel(pipeline, pCreateInfo->pMultisampleState,
>>> subpass);
>>> >
>>> > emit_urb_setup(pipeline);
>>> >
>>> > --
>>> > 2.5.0.400.gff86faf
>>> >
>>> > _______________________________________________
>>> > mesa-dev mailing list
>>> > mesa-dev at lists.freedesktop.org
>>> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>>>
>>
>>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20170208/d12ab364/attachment-0001.html>
More information about the mesa-dev
mailing list