[Mesa-dev] [PATCH 1/3] anv: Implement VK_KHR_draw_indirect_count for gen 7.5+
Danylo Piliaiev
danylo.piliaiev at gmail.com
Tue Nov 6 09:55:27 UTC 2018
On 11/6/18 12:39 AM, Jason Ekstrand wrote:
> On Wed, Oct 17, 2018 at 6:59 AM Danylo Piliaiev <danylo.piliaiev at gmail.com>
> wrote:
>
>> Signed-off-by: Danylo Piliaiev <danylo.piliaiev at globallogic.com>
>> ---
>> src/intel/vulkan/anv_extensions.py | 1 +
>> src/intel/vulkan/genX_cmd_buffer.c | 155 +++++++++++++++++++++++++++++
>> 2 files changed, 156 insertions(+)
>>
>> diff --git a/src/intel/vulkan/anv_extensions.py
>> b/src/intel/vulkan/anv_extensions.py
>> index d4915c9501..7f44da6648 100644
>> --- a/src/intel/vulkan/anv_extensions.py
>> +++ b/src/intel/vulkan/anv_extensions.py
>> @@ -113,6 +113,7 @@ EXTENSIONS = [
>> Extension('VK_KHR_xlib_surface', 6,
>> 'VK_USE_PLATFORM_XLIB_KHR'),
>> Extension('VK_KHR_multiview', 1, True),
>> Extension('VK_KHR_display', 23,
>> 'VK_USE_PLATFORM_DISPLAY_KHR'),
>> + Extension('VK_KHR_draw_indirect_count', 1,
>> 'device->info.gen >= 8 || device->info.is_haswell'),
>> Extension('VK_EXT_acquire_xlib_display', 1,
>> 'VK_USE_PLATFORM_XLIB_XRANDR_EXT'),
>> Extension('VK_EXT_debug_report', 8, True),
>> Extension('VK_EXT_direct_mode_display', 1,
>> 'VK_USE_PLATFORM_DISPLAY_KHR'),
>> diff --git a/src/intel/vulkan/genX_cmd_buffer.c
>> b/src/intel/vulkan/genX_cmd_buffer.c
>> index 43a02f2256..d7b94efd19 100644
>> --- a/src/intel/vulkan/genX_cmd_buffer.c
>> +++ b/src/intel/vulkan/genX_cmd_buffer.c
>> @@ -2982,6 +2982,161 @@ void genX(CmdDrawIndexedIndirect)(
>> }
>> }
>>
>> +#if GEN_IS_HASWELL || GEN_GEN >= 8
>> +static void
>> +emit_draw_count_predicate(struct anv_cmd_buffer *cmd_buffer,
>> + struct anv_address count_address,
>> + uint32_t draw_index)
>> +{
>> + /* Upload the current draw count from the draw parameters buffer to
>> + * MI_PREDICATE_SRC0.
>> + */
>> + emit_lrr(&cmd_buffer->batch, MI_PREDICATE_SRC0, CS_GPR(MI_ALU_REG14));
>>
> Do we also need to set MI_PREDICATE_SRC0 + 4 to 0? I suspect we do.
Yes.
I'll also recheck other places.
>
> Also, we can likely save some batch space if we have a "prepare" function
> which sets MI_PREDICATE_SRC0, SRC0 + 4, and SRC1 + 4 and only emit one
> LOAD_REGISTER_IMM and the MI_PREDICATE per-draw. For lots of primitives,
> those extra three MI_LOAD_REGISTER_* calls will add up.
>
Makes sense
>> +
>> + /* Upload the index of the current primitive to MI_PREDICATE_SRC1. */
>> + emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1, draw_index);
>> + emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1 + 4, 0);
>> +
>> + if (draw_index == 0) {
>> + anv_batch_emit(&cmd_buffer->batch, GENX(MI_PREDICATE), mip) {
>> + mip.LoadOperation = LOAD_LOADINV;
>> + mip.CombineOperation = COMBINE_SET;
>> + mip.CompareOperation = COMPARE_SRCS_EQUAL;
>> + }
>> + } else {
>> + /* While draw_index < draw_count the predicate's result will be
>> + * (draw_index == draw_count) ^ TRUE = TRUE
>> + * When draw_index == draw_count the result is
>> + * (TRUE) ^ TRUE = FALSE
>> + * After this all results will be:
>> + * (FALSE) ^ FALSE = FALSE
>> + */
>> + anv_batch_emit(&cmd_buffer->batch, GENX(MI_PREDICATE), mip) {
>> + mip.LoadOperation = LOAD_LOAD;
>> + mip.CombineOperation = COMBINE_XOR;
>> + mip.CompareOperation = COMPARE_SRCS_EQUAL;
>> + }
>> + }
>> +}
>> +
>> +void genX(CmdDrawIndirectCountKHR)(
>> + VkCommandBuffer commandBuffer,
>> + VkBuffer _buffer,
>> + VkDeviceSize offset,
>> + VkBuffer _countBuffer,
>> + VkDeviceSize countBufferOffset,
>> + uint32_t maxDrawCount,
>> + uint32_t stride)
>> +{
>> + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
>> + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
>> + ANV_FROM_HANDLE(anv_buffer, count_buffer, _countBuffer);
>> + struct anv_cmd_state *cmd_state = &cmd_buffer->state;
>> + struct anv_pipeline *pipeline = cmd_state->gfx.base.pipeline;
>> + const struct brw_vs_prog_data *vs_prog_data =
>> get_vs_prog_data(pipeline);
>> +
>> + if (anv_batch_has_error(&cmd_buffer->batch))
>> + return;
>> +
>> + genX(cmd_buffer_flush_state)(cmd_buffer);
>> +
>> + struct anv_address count_address =
>> + anv_address_add(count_buffer->address, countBufferOffset);
>> +
>> + /* Needed to ensure the memory is coherent for the MI_LOAD_REGISTER_MEM
>> + * command when loading the values into the predicate source registers.
>> + */
>> + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
>> + pc.PipeControlFlushEnable = true;
>> + }
>>
> Have you seen this be an actual problem? If not, why? A documentation
> citation would be nice.
>
You are right - citation is needed:
Volume 7: 3D-Media-GPGPU (Skylake), MI_PREDICATE:
MI_LOAD_REGISTER_MEM commands can be used to load the MItemp0, MItemp1,
and PredicateData registers prior to MI_PREDICATE.
To ensure the memory sources of the MI_LOAD_REGISTER_MEM commands
are coherent
with previous 3D_PIPECONTROL store-DWord operations, software can
use the
new Pipe Control Flush Enable bit in the PIPE_CONTROL command.
It looks like memory may be not coherent here unless it is enforced
elsewhere.
>> +
>> + emit_lrm(&cmd_buffer->batch, CS_GPR(MI_ALU_REG14), count_address);
>> + emit_lri(&cmd_buffer->batch, CS_GPR(MI_ALU_REG14) + 4, 0);
>> +
>> + for (uint32_t i = 0; i < maxDrawCount; i++) {
>> + struct anv_address draw = anv_address_add(buffer->address, offset);
>> +
>> + emit_draw_count_predicate(cmd_buffer, count_address, i);
>> +
>> + if (vs_prog_data->uses_firstvertex ||
>> + vs_prog_data->uses_baseinstance)
>> + emit_base_vertex_instance_bo(cmd_buffer, anv_address_add(draw,
>> 8));
>> + if (vs_prog_data->uses_drawid)
>> + emit_draw_index(cmd_buffer, i);
>> +
>> + load_indirect_parameters(cmd_buffer, draw, false);
>> +
>> + anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {
>> + prim.IndirectParameterEnable = true;
>> + prim.PredicateEnable = true;
>> + prim.VertexAccessType = SEQUENTIAL;
>> + prim.PrimitiveTopologyType = pipeline->topology;
>> + }
>> +
>> + offset += stride;
>> + }
>> +}
>> +
>> +void genX(CmdDrawIndexedIndirectCountKHR)(
>> + VkCommandBuffer commandBuffer,
>> + VkBuffer _buffer,
>> + VkDeviceSize offset,
>> + VkBuffer _countBuffer,
>> + VkDeviceSize countBufferOffset,
>> + uint32_t maxDrawCount,
>> + uint32_t stride)
>> +{
>> + ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
>> + ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
>> + ANV_FROM_HANDLE(anv_buffer, count_buffer, _countBuffer);
>> + struct anv_cmd_state *cmd_state = &cmd_buffer->state;
>> + struct anv_pipeline *pipeline = cmd_state->gfx.base.pipeline;
>> + const struct brw_vs_prog_data *vs_prog_data =
>> get_vs_prog_data(pipeline);
>> +
>> + if (anv_batch_has_error(&cmd_buffer->batch))
>> + return;
>> +
>> + genX(cmd_buffer_flush_state)(cmd_buffer);
>> +
>> + struct anv_address count_address =
>> + anv_address_add(count_buffer->address, countBufferOffset);
>> +
>> + /* Needed to ensure the memory is coherent for the MI_LOAD_REGISTER_MEM
>> + * command when loading the values into the predicate source registers.
>> + */
>> + anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
>> + pc.PipeControlFlushEnable = true;
>> + }
>> +
>> + emit_lrm(&cmd_buffer->batch, CS_GPR(MI_ALU_REG14), count_address);
>> + emit_lri(&cmd_buffer->batch, CS_GPR(MI_ALU_REG14) + 4, 0);
>> +
>> + for (uint32_t i = 0; i < maxDrawCount; i++) {
>> + struct anv_address draw = anv_address_add(buffer->address, offset);
>> +
>> + emit_draw_count_predicate(cmd_buffer, count_address, i);
>> +
>> + /* TODO: We need to stomp base vertex to 0 somehow */
>> + if (vs_prog_data->uses_firstvertex ||
>> + vs_prog_data->uses_baseinstance)
>> + emit_base_vertex_instance_bo(cmd_buffer, anv_address_add(draw,
>> 12));
>> + if (vs_prog_data->uses_drawid)
>> + emit_draw_index(cmd_buffer, i);
>> +
>> + load_indirect_parameters(cmd_buffer, draw, true);
>> +
>> + anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {
>> + prim.IndirectParameterEnable = true;
>> + prim.PredicateEnable = true;
>> + prim.VertexAccessType = RANDOM;
>> + prim.PrimitiveTopologyType = pipeline->topology;
>> + }
>> +
>> + offset += stride;
>> + }
>> +}
>> +#endif
>> +
>> static VkResult
>> flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer)
>> {
>> --
>> 2.18.0
>>
>>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20181106/9865b7f2/attachment-0001.html>
More information about the mesa-dev
mailing list