[Mesa-dev] [PATCH 3/3] anv: Implement VK_EXT_conditional_rendering for gen 7.5+
Danylo Piliaiev
danylo.piliaiev at gmail.com
Tue Nov 6 10:36:40 UTC 2018
On 11/6/18 1:05 AM, Jason Ekstrand wrote:
> On Wed, Oct 17, 2018 at 6:59 AM Danylo Piliaiev <danylo.piliaiev at gmail.com>
> wrote:
>
>> Conditional rendering affects next functions:
>> - vkCmdDraw, vkCmdDrawIndexed, vkCmdDrawIndirect, vkCmdDrawIndexedIndirect
>> - vkCmdDrawIndirectCountKHR, vkCmdDrawIndexedIndirectCountKHR
>> - vkCmdDispatch, vkCmdDispatchIndirect, vkCmdDispatchBase
>> - vkCmdClearAttachments
>>
>> To reduce readings from the memory a result of the condition is calculated
>> and stored into designated register MI_ALU_REG15.
>>
>> In current implementation affected functions expect MI_PREDICATE_RESULT
>> being set before their call so any code which changes the predicate
>> should restore it with restore_conditional_render_predicate.
>> An alternative is to restore MI_PREDICATE_RESULT in all affected
>> functions at their beginning.
>>
>> Signed-off-by: Danylo Piliaiev <danylo.piliaiev at globallogic.com>
>> ---
>> src/intel/vulkan/anv_blorp.c | 7 +-
>> src/intel/vulkan/anv_device.c | 12 ++
>> src/intel/vulkan/anv_extensions.py | 1 +
>> src/intel/vulkan/anv_private.h | 2 +
>> src/intel/vulkan/genX_cmd_buffer.c | 192 ++++++++++++++++++++++++++++-
>> 5 files changed, 209 insertions(+), 5 deletions(-)
>>
>> diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
>> index 478b8e7a3d..157875d16f 100644
>> --- a/src/intel/vulkan/anv_blorp.c
>> +++ b/src/intel/vulkan/anv_blorp.c
>> @@ -1144,8 +1144,11 @@ void anv_CmdClearAttachments(
>> * trash our depth and stencil buffers.
>> */
>> struct blorp_batch batch;
>> - blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer,
>> - BLORP_BATCH_NO_EMIT_DEPTH_STENCIL);
>> + enum blorp_batch_flags flags = BLORP_BATCH_NO_EMIT_DEPTH_STENCIL;
>> + if (cmd_buffer->state.conditional_render_enabled) {
>> + flags |= BLORP_BATCH_PREDICATE_ENABLE;
>> + }
>> + blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer,
>> flags);
>>
>> for (uint32_t a = 0; a < attachmentCount; ++a) {
>> if (pAttachments[a].aspectMask & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV)
>> {
>> diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
>> index a2551452eb..930a192c25 100644
>> --- a/src/intel/vulkan/anv_device.c
>> +++ b/src/intel/vulkan/anv_device.c
>> @@ -957,6 +957,18 @@ void anv_GetPhysicalDeviceFeatures2(
>> break;
>> }
>>
>> + case
>> VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT: {
>> + VkPhysicalDeviceConditionalRenderingFeaturesEXT *features =
>> + (VkPhysicalDeviceConditionalRenderingFeaturesEXT*)ext;
>> + ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
>> +
>> + features->conditionalRendering = pdevice->info.gen >= 8 ||
>> + pdevice->info.is_haswell;
>> + features->inheritedConditionalRendering = pdevice->info.gen >= 8
>> ||
>> +
>> pdevice->info.is_haswell;
>> + break;
>> + }
>> +
>> default:
>> anv_debug_ignored_stype(ext->sType);
>> break;
>> diff --git a/src/intel/vulkan/anv_extensions.py
>> b/src/intel/vulkan/anv_extensions.py
>> index c13ce531ee..2ef7a52d01 100644
>> --- a/src/intel/vulkan/anv_extensions.py
>> +++ b/src/intel/vulkan/anv_extensions.py
>> @@ -127,6 +127,7 @@ EXTENSIONS = [
>> Extension('VK_EXT_vertex_attribute_divisor', 3, True),
>> Extension('VK_EXT_post_depth_coverage', 1,
>> 'device->info.gen >= 9'),
>> Extension('VK_EXT_sampler_filter_minmax', 1,
>> 'device->info.gen >= 9'),
>> + Extension('VK_EXT_conditional_rendering', 1,
>> 'device->info.gen >= 8 || device->info.is_haswell'),
>> ]
>>
>> class VkVersion:
>> diff --git a/src/intel/vulkan/anv_private.h
>> b/src/intel/vulkan/anv_private.h
>> index 599b903f25..108da51a59 100644
>> --- a/src/intel/vulkan/anv_private.h
>> +++ b/src/intel/vulkan/anv_private.h
>> @@ -2032,6 +2032,8 @@ struct anv_cmd_state {
>> */
>> bool hiz_enabled;
>>
>> + bool
>> conditional_render_enabled;
>> +
>> /**
>> * Array length is anv_cmd_state::pass::attachment_count. Array
>> content is
>> * valid only when recording a render pass instance.
>> diff --git a/src/intel/vulkan/genX_cmd_buffer.c
>> b/src/intel/vulkan/genX_cmd_buffer.c
>> index f07a6aa7c9..87abc443b6 100644
>> --- a/src/intel/vulkan/genX_cmd_buffer.c
>> +++ b/src/intel/vulkan/genX_cmd_buffer.c
>> @@ -479,8 +479,9 @@ transition_depth_buffer(struct anv_cmd_buffer
>> *cmd_buffer,
>> 0, 0, 1, hiz_op);
>> }
>>
>> -#define MI_PREDICATE_SRC0 0x2400
>> -#define MI_PREDICATE_SRC1 0x2408
>> +#define MI_PREDICATE_SRC0 0x2400
>> +#define MI_PREDICATE_SRC1 0x2408
>> +#define MI_PREDICATE_RESULT 0x2418
>>
>> static void
>> set_image_compressed_bit(struct anv_cmd_buffer *cmd_buffer,
>> @@ -545,6 +546,14 @@ mi_alu(uint32_t opcode, uint32_t operand1, uint32_t
>> operand2)
>>
>> #define CS_GPR(n) (0x2600 + (n) * 8)
>>
>> +#if GEN_GEN >= 8 || GEN_IS_HASWELL
>> +static void
>> +restore_conditional_render_predicate(struct anv_cmd_buffer *cmd_buffer)
>> +{
>> + emit_lrr(&cmd_buffer->batch, MI_PREDICATE_RESULT,
>> CS_GPR(MI_ALU_REG15));
>>
> Does this work? Is it sufficient to just set MI_PREDICATE_RESULT or do we
> actually need to use an MI_PREDICATE? I genuinely don't know and this
> strikes me as odd.
>
It does work. However I didn't see that being _explicitly_ said in docs.
- It is explicitly stated that MI_PREDICATE uses MI_PREDICATE_DATA register
for calculations and MI_PREDICATE_RESULT to store predicate bit.
- We are not forbidden to write there.
- The description of "Predicate Enable" bit has:
If set, this command is executed (or not) depending on the current
value of the MI Predicate internal state bit.
So docs indirectly tell us that it should work.
I would agree that this may require some explicit confirmation.
>> +}
>> +#endif
>> +
>> /* This is only really practical on haswell and above because it requires
>> * MI math in order to get it correct.
>> */
>> @@ -1144,6 +1153,12 @@ transition_color_buffer(struct anv_cmd_buffer
>> *cmd_buffer,
>> }
>> }
>>
>> +#if GEN_GEN >= 8 || GEN_IS_HASWELL
>> + if (cmd_buffer->state.conditional_render_enabled) {
>> + restore_conditional_render_predicate(cmd_buffer);
>> + }
>> +#endif
>> +
>> cmd_buffer->state.pending_pipe_bits |=
>> ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
>> }
>> @@ -1397,6 +1412,26 @@ genX(BeginCommandBuffer)(
>> cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_RENDER_TARGETS;
>> }
>>
>> +#if GEN_GEN >= 8 || GEN_IS_HASWELL
>> + if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) {
>> + vk_foreach_struct_const(s, pBeginInfo->pInheritanceInfo->pNext) {
>> + switch (s->sType) {
>> + case
>> VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_CONDITIONAL_RENDERING_INFO_EXT:
>> {
>> + const VkCommandBufferInheritanceConditionalRenderingInfoEXT
>> *conditional_rendering_info =
>> + (const
>> VkCommandBufferInheritanceConditionalRenderingInfoEXT *) s;
>> + /* We should emit commands as if conditional render is
>> enabled. */
>> + cmd_buffer->state.conditional_render_enabled =
>> + conditional_rendering_info->conditionalRenderingEnable;
>>
> Might be easier to just use vk_find_struct_const() instead of the loop.
>
Missed vk_find_struct_const, thanks!
>> + break;
>> + }
>> + default:
>> + anv_debug_ignored_stype(s->sType);
>> + break;
>> + }
>> + }
>> + }
>> +#endif
>> +
>> return result;
>> }
>>
>> @@ -1501,6 +1536,20 @@ genX(CmdExecuteCommands)(
>> assert(secondary->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY);
>> assert(!anv_batch_has_error(&secondary->batch));
>>
>> +#if GEN_GEN >= 8 || GEN_IS_HASWELL
>> + if (secondary->state.conditional_render_enabled) {
>> + /* Secondary buffer is constructed as if it will be executed
>> + * with conditional rendering, we should satisfy this dependency
>> + * regardless of conditional rendering being enabled in primary.
>> + */
>> + if (!primary->state.conditional_render_enabled) {
>> + emit_lri(&primary->batch, CS_GPR(MI_ALU_REG15), 1);
>> + emit_lri(&primary->batch, CS_GPR(MI_ALU_REG15) + 4, 0);
>> + emit_lrr(&primary->batch, MI_PREDICATE_RESULT,
>> CS_GPR(MI_ALU_REG15));
>> + }
>> + }
>> +#endif
>> +
>> if (secondary->usage_flags &
>> VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT) {
>> /* If we're continuing a render pass from the primary, we need to
>> @@ -2761,6 +2810,7 @@ void genX(CmdDraw)(
>> instanceCount *= anv_subpass_view_count(cmd_buffer->state.subpass);
>>
>> anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {
>> + prim.PredicateEnable =
>> cmd_buffer->state.conditional_render_enabled;
>> prim.VertexAccessType = SEQUENTIAL;
>> prim.PrimitiveTopologyType = pipeline->topology;
>> prim.VertexCountPerInstance = vertexCount;
>> @@ -2800,6 +2850,7 @@ void genX(CmdDrawIndexed)(
>> instanceCount *= anv_subpass_view_count(cmd_buffer->state.subpass);
>>
>> anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {
>> + prim.PredicateEnable =
>> cmd_buffer->state.conditional_render_enabled;
>> prim.VertexAccessType = RANDOM;
>> prim.PrimitiveTopologyType = pipeline->topology;
>> prim.VertexCountPerInstance = indexCount;
>> @@ -2935,6 +2986,7 @@ void genX(CmdDrawIndirect)(
>>
>> anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {
>> prim.IndirectParameterEnable = true;
>> + prim.PredicateEnable =
>> cmd_buffer->state.conditional_render_enabled;
>> prim.VertexAccessType = SEQUENTIAL;
>> prim.PrimitiveTopologyType = pipeline->topology;
>> }
>> @@ -2974,6 +3026,7 @@ void genX(CmdDrawIndexedIndirect)(
>>
>> anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {
>> prim.IndirectParameterEnable = true;
>> + prim.PredicateEnable =
>> cmd_buffer->state.conditional_render_enabled;
>> prim.VertexAccessType = RANDOM;
>> prim.PrimitiveTopologyType = pipeline->topology;
>> }
>> @@ -3024,6 +3077,42 @@ emit_draw_count_predicate(struct anv_cmd_buffer
>> *cmd_buffer,
>> }
>> }
>>
>> +#if GEN_GEN >= 8 || GEN_IS_HASWELL
>> +static void
>> +emit_draw_count_predicate_with_conditional_render(
>> + struct anv_cmd_buffer *cmd_buffer,
>> + struct anv_address count_address,
>> + uint32_t draw_index)
>> +{
>> + const int draw_index_reg = MI_ALU_REG0;
>> + const int draw_count_reg = MI_ALU_REG14;
>> + const int condition_reg = MI_ALU_REG15;
>> + const int tmp_result_reg = MI_ALU_REG1;
>> +
>> + emit_lri(&cmd_buffer->batch, CS_GPR(draw_index_reg), draw_index);
>> + emit_lri(&cmd_buffer->batch, CS_GPR(draw_index_reg) + 4, 0);
>> +
>> + uint32_t *dw;
>> + /* Compute (draw_index < draw_count).
>> + * We do this by subtracting and storing the carry bit.
>> + */
>> + dw = anv_batch_emitn(&cmd_buffer->batch, 5, GENX(MI_MATH));
>> + dw[1] = mi_alu(MI_ALU_LOAD, MI_ALU_SRCA, draw_index_reg);
>> + dw[2] = mi_alu(MI_ALU_LOAD, MI_ALU_SRCB, draw_count_reg);
>> + dw[3] = mi_alu(MI_ALU_SUB, 0, 0);
>> + dw[4] = mi_alu(MI_ALU_STORE, tmp_result_reg, MI_ALU_CF);
>> +
>> + /* & condition */
>> + dw = anv_batch_emitn(&cmd_buffer->batch, 5, GENX(MI_MATH));
>> + dw[1] = mi_alu(MI_ALU_LOAD, MI_ALU_SRCA, tmp_result_reg);
>> + dw[2] = mi_alu(MI_ALU_LOAD, MI_ALU_SRCB, condition_reg);
>> + dw[3] = mi_alu(MI_ALU_AND, 0, 0);
>> + dw[4] = mi_alu(MI_ALU_STORE, tmp_result_reg, MI_ALU_ACCU);
>> +
>> + emit_lrr(&cmd_buffer->batch, MI_PREDICATE_RESULT,
>> CS_GPR(tmp_result_reg));
>>
> Again, is this sufficient? Maybe I'm missing something.
>
>
>> +}
>> +#endif
>> +
>> void genX(CmdDrawIndirectCountKHR)(
>> VkCommandBuffer commandBuffer,
>> VkBuffer _buffer,
>> @@ -3063,7 +3152,15 @@ void genX(CmdDrawIndirectCountKHR)(
>> for (uint32_t i = 0; i < maxDrawCount; i++) {
>> struct anv_address draw = anv_address_add(buffer->address, offset);
>>
>> +#if GEN_GEN >= 8 || GEN_IS_HASWELL
>> + if (cmd_state->conditional_render_enabled) {
>> + emit_draw_count_predicate_with_conditional_render(cmd_buffer,
>> count_address, i);
>> + } else {
>> + emit_draw_count_predicate(cmd_buffer, count_address, i);
>> + }
>> +#else
>> emit_draw_count_predicate(cmd_buffer, count_address, i);
>> +#endif
>>
>> if (vs_prog_data->uses_firstvertex ||
>> vs_prog_data->uses_baseinstance)
>> @@ -3082,6 +3179,12 @@ void genX(CmdDrawIndirectCountKHR)(
>>
>> offset += stride;
>> }
>> +
>> +#if GEN_GEN >= 8 || GEN_IS_HASWELL
>> + if (cmd_state->conditional_render_enabled) {
>> + restore_conditional_render_predicate(cmd_buffer);
>> + }
>> +#endif
>> }
>>
>> void genX(CmdDrawIndexedIndirectCountKHR)(
>> @@ -3123,7 +3226,15 @@ void genX(CmdDrawIndexedIndirectCountKHR)(
>> for (uint32_t i = 0; i < maxDrawCount; i++) {
>> struct anv_address draw = anv_address_add(buffer->address, offset);
>>
>> +#if GEN_GEN >= 8 || GEN_IS_HASWELL
>> + if (cmd_state->conditional_render_enabled) {
>> + emit_draw_count_predicate_with_conditional_render(cmd_buffer,
>> count_address, i);
>> + } else {
>> + emit_draw_count_predicate(cmd_buffer, count_address, i);
>> + }
>> +#else
>> emit_draw_count_predicate(cmd_buffer, count_address, i);
>> +#endif
>>
>> /* TODO: We need to stomp base vertex to 0 somehow */
>> if (vs_prog_data->uses_firstvertex ||
>> @@ -3143,6 +3254,12 @@ void genX(CmdDrawIndexedIndirectCountKHR)(
>>
>> offset += stride;
>> }
>> +
>> +#if GEN_GEN >= 8 || GEN_IS_HASWELL
>> + if (cmd_state->conditional_render_enabled) {
>> + restore_conditional_render_predicate(cmd_buffer);
>> + }
>> +#endif
>> }
>>
>> static VkResult
>> @@ -3351,6 +3468,7 @@ void genX(CmdDispatchBase)(
>> genX(cmd_buffer_flush_compute_state)(cmd_buffer);
>>
>> anv_batch_emit(&cmd_buffer->batch, GENX(GPGPU_WALKER), ggw) {
>> + ggw.PredicateEnable =
>> cmd_buffer->state.conditional_render_enabled;
>> ggw.SIMDSize = prog_data->simd_size / 16;
>> ggw.ThreadDepthCounterMaximum = 0;
>> ggw.ThreadHeightCounterMaximum = 0;
>> @@ -3448,7 +3566,8 @@ void genX(CmdDispatchIndirect)(
>>
>> anv_batch_emit(batch, GENX(GPGPU_WALKER), ggw) {
>> ggw.IndirectParameterEnable = true;
>> - ggw.PredicateEnable = GEN_GEN <= 7;
>> + ggw.PredicateEnable = GEN_GEN <= 7 ||
>> +
>> cmd_buffer->state.conditional_render_enabled;
>> ggw.SIMDSize = prog_data->simd_size / 16;
>> ggw.ThreadDepthCounterMaximum = 0;
>> ggw.ThreadHeightCounterMaximum = 0;
>> @@ -4158,3 +4277,70 @@ void genX(CmdEndRenderPass2KHR)(
>> {
>> genX(CmdEndRenderPass)(commandBuffer);
>> }
>> +
>> +#if GEN_GEN >= 8 || GEN_IS_HASWELL
>> +void genX(CmdBeginConditionalRenderingEXT)(
>> + VkCommandBuffer commandBuffer,
>> + const VkConditionalRenderingBeginInfoEXT*
>> pConditionalRenderingBegin)
>> +{
>> + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
>> + ANV_FROM_HANDLE(anv_buffer, buffer,
>> pConditionalRenderingBegin->buffer);
>> + struct anv_cmd_state *cmd_state = &cmd_buffer->state;
>> + struct anv_address value_address =
>> + anv_address_add(buffer->address,
>> pConditionalRenderingBegin->offset);
>> +
>> + const bool inverted = pConditionalRenderingBegin->flags &
>> + VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT;
>> +
>> + cmd_state->conditional_render_enabled = true;
>> +
>> + /* Needed to ensure the memory is coherent for the
>> MI_LOAD_REGISTER_MEM
>> + * command when loading the values into the predicate source
>> registers.
>> + */
>> + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
>> + pc.PipeControlFlushEnable = true;
>> + }
>> +
>> + /* Section 19.4 of the Vulkan 1.1.85 spec says:
>> + *
>> + * If the value of the predicate in buffer memory changes
>> + * while conditional rendering is active, the rendering commands
>> + * may be discarded in an implementation-dependent way.
>> + * Some implementations may latch the value of the predicate
>> + * upon beginning conditional rendering while others
>> + * may read it before every rendering command.
>> + *
>> + * So it's perfectly fine to read a value from the buffer once.
>> + */
>> +
>> + emit_lrm(&cmd_buffer->batch, MI_PREDICATE_SRC0, value_address);
>> + /* Zero the top 32-bits of MI_PREDICATE_SRC0 */
>> + emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC0 + 4, 0);
>> + emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1, 0);
>> + emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1 + 4, 0);
>> +
>> + anv_batch_emit(&cmd_buffer->batch, GENX(MI_PREDICATE), mip) {
>> + mip.LoadOperation = inverted ? LOAD_LOAD : LOAD_LOADINV;
>> + mip.CombineOperation = COMBINE_SET;
>> + mip.CompareOperation = COMPARE_SRCS_EQUAL;
>> + }
>> +
>> + /* Calculate predicate result once and store it in MI_ALU_REG15
>> + * to prevent recalculating it when interacting with
>> + * VK_KHR_draw_indirect_count which also uses predicates.
>> + * It is also the only way to support conditional render of
>> + * secondary buffers because they are formed before we
>> + * know whether conditional render is enabled.
>> + */
>> + emit_lrr(&cmd_buffer->batch, CS_GPR(MI_ALU_REG15),
>> MI_PREDICATE_RESULT);
>> +}
>> +
>> +void genX(CmdEndConditionalRenderingEXT)(
>> + VkCommandBuffer commandBuffer)
>> +{
>> + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
>> + struct anv_cmd_state *cmd_state = &cmd_buffer->state;
>> +
>> + cmd_state->conditional_render_enabled = false;
>> +}
>> +#endif
>> --
>> 2.18.0
>>
>>
More information about the mesa-dev
mailing list