[Mesa-dev] [PATCH 22/22] anv: Predicate fast-clear resolves
Jason Ekstrand
jason at jlekstrand.net
Wed May 3 22:39:51 UTC 2017
On Thu, Apr 27, 2017 at 11:32 AM, Nanley Chery <nanleychery at gmail.com>
wrote:
> There's no image layout to represent a full-RT-cleared color attachment.
> That's one reason we can end up with redundant resolves. Testing has
> shown that such resolves can measurably hurt performance and that
> predicating them can avoid the penalty.
>
> Signed-off-by: Nanley Chery <nanley.g.chery at intel.com>
> ---
> src/intel/vulkan/anv_blorp.c | 3 +-
> src/intel/vulkan/anv_image.c | 6 +++
> src/intel/vulkan/genX_cmd_buffer.c | 106 ++++++++++++++++++++++++++++++
> ++++---
> 3 files changed, 108 insertions(+), 7 deletions(-)
>
> diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
> index 821d01a077..32f0edf316 100644
> --- a/src/intel/vulkan/anv_blorp.c
> +++ b/src/intel/vulkan/anv_blorp.c
> @@ -1500,7 +1500,8 @@ anv_ccs_resolve(struct anv_cmd_buffer * const
> cmd_buffer,
>
> struct blorp_batch batch;
> blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer,
> - BLORP_BATCH_NO_EMIT_DEPTH_STENCIL);
> + BLORP_BATCH_NO_EMIT_DEPTH_STENCIL |
> + BLORP_BATCH_PREDICATE_ENABLE);
>
> struct blorp_surf surf;
> get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT,
> diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c
> index 751f2d6026..92ee86dab5 100644
> --- a/src/intel/vulkan/anv_image.c
> +++ b/src/intel/vulkan/anv_image.c
> @@ -208,6 +208,12 @@ anv_fill_ccs_resolve_ss(const struct anv_device *
> const device,
> .aux_usage = image->aux_usage ==
> ISL_AUX_USAGE_NONE ?
> ISL_AUX_USAGE_CCS_D :
> image->aux_usage,
> .mocs = device->default_mocs);
> +
> + /* The following dword is used as a flag to represent whether or not
> this
> + * CCS subresource needs resolving. We want this to be zero by default,
> + * which means that a resolve is not necessary.
> + */
> + assert(*(uint32_t *)(data + device->isl_dev.ss.addr_offset) == 0);
>
This seems like kind-of an odd place to put it... Not a problem, just odd.
> }
>
> /**
> diff --git a/src/intel/vulkan/genX_cmd_buffer.c
> b/src/intel/vulkan/genX_cmd_buffer.c
> index 95729ec8a8..041301290e 100644
> --- a/src/intel/vulkan/genX_cmd_buffer.c
> +++ b/src/intel/vulkan/genX_cmd_buffer.c
> @@ -402,6 +402,82 @@ transition_depth_buffer(struct anv_cmd_buffer
> *cmd_buffer,
> anv_gen8_hiz_op_resolve(cmd_buffer, image, hiz_op);
> }
>
> +static uint32_t
> +clear_value_buffer_offset(const struct anv_cmd_buffer * const cmd_buffer,
> + const struct anv_image * const image,
> + const uint8_t level)
> +{
> + return image->offset +
> + image->aux_surface.offset + image->aux_surface.isl.size +
> + cmd_buffer->device->isl_dev.ss.size * level;
> +}
> +
> +#define MI_PREDICATE_SRC0 0x2400
> +#define MI_PREDICATE_SRC1 0x2408
> +
> +enum ccs_resolve_state {
> + CCS_RESOLVE_NOT_NEEDED,
> + CCS_RESOLVE_NEEDED,
> + CCS_RESOLVE_STARTING,
> +};
> +
> +/* Manages the state of an color image subresource to ensure resolves are
> + * performed properly.
> + */
> +static void
> +genX(set_resolve_state)(struct anv_cmd_buffer * const cmd_buffer,
> + const struct anv_image * const image,
> + const uint8_t level,
> + const enum ccs_resolve_state state)
> +{
> + assert(cmd_buffer && image);
> +
> + /* The image subresource range must have a color auxiliary buffer. */
> + assert(anv_image_has_color_aux(image));
> + assert(level < anv_color_aux_levels(image));
> +
> + /* We store the resolve flag in the location of the surface base
> address
> + * field for simplicity and because it is initialized to zero when the
> + * clear value buffer is initialized.
> + */
> + const uint32_t resolve_flag_offset =
> + clear_value_buffer_offset(cmd_buffer, image, level) +
> + cmd_buffer->device->isl_dev.ss.addr_offset;
> +
> + /* An update operation should not overwrite the fast clear value. */
> + if (cmd_buffer->device->isl_dev.ss.addr_offset <
> + cmd_buffer->device->isl_dev.ss.clear_value_offset) {
> + assert(cmd_buffer->device->isl_dev.ss.addr_offset + 4 <=
> + cmd_buffer->device->isl_dev.ss.clear_value_offset);
> + }
> +
> + if (state != CCS_RESOLVE_STARTING) {
> + assert(state == CCS_RESOLVE_NEEDED || state ==
> CCS_RESOLVE_NOT_NEEDED);
> + /* The HW docs say that there is no way to guarantee the completion
> of
> + * the following command. We use it nevertheless because it shows no
> + * issues in testing is currently being used in the GL driver.
> + */
> + anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) {
> + sdi.Address = (struct anv_address) { image->bo,
> resolve_flag_offset };
> + sdi.ImmediateData = state == CCS_RESOLVE_NEEDED;
> + }
> + } else {
> + /* Make the pending predicated resolve a no-op if one is not needed.
> + * predicate = do_resolve = resolve_flag != 0;
> + */
> + emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1 , 0);
> + emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1 + 4, 0);
> + emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC0 , 0);
> + emit_lrm(&cmd_buffer->batch, MI_PREDICATE_SRC0 + 4,
> + image->bo, resolve_flag_offset);
> + anv_batch_emit(&cmd_buffer->batch, GENX(MI_PREDICATE), mip) {
> + mip.LoadOperation = LOAD_LOADINV;
> + mip.CombineOperation = COMBINE_SET;
> + mip.CompareOperation = COMPARE_SRCS_EQUAL;
> + }
>
I really wish the MI_PREDICATE were closer to the anv_ccs_resolve. Maybe
we could just inline this function? If we had a helper to get the
anv_address of the resolve flag, it wouldn't be bad at all.
> + }
> +}
> +
>
> /* Copies clear value dwords between a surface state object and an image's
> * clear value buffer.
> @@ -420,9 +496,7 @@ genX(transfer_clear_value)(struct anv_cmd_buffer *
> const cmd_buffer,
> assert(level < anv_color_aux_levels(image));
>
> const uint32_t img_clear_offset =
> - image->offset + image->aux_surface.offset +
> - image->aux_surface.isl.size +
> - cmd_buffer->device->isl_dev.ss.size * level +
> + clear_value_buffer_offset(cmd_buffer, image, level) +
> cmd_buffer->device->isl_dev.ss.clear_value_offset;
>
> struct anv_bo * const ss_bo =
> @@ -523,6 +597,8 @@ transition_color_buffer(struct anv_cmd_buffer * const
> cmd_buffer,
>
> for (uint32_t level = base_level; level < base_level + level_count;
> level++) {
>
> + genX(set_resolve_state)(cmd_buffer, image, level,
> CCS_RESOLVE_STARTING);
> +
> layer_count = MIN2(layer_count, anv_color_aux_layers(image, level));
> for (uint32_t layer = base_layer; layer < base_layer + layer_count;
> layer++) {
>
> @@ -541,6 +617,8 @@ transition_color_buffer(struct anv_cmd_buffer * const
> cmd_buffer,
> false);
> anv_ccs_resolve(cmd_buffer, surface_state, image, level, layer,
> op);
> }
> +
> + genX(set_resolve_state)(cmd_buffer, image, level,
> CCS_RESOLVE_NOT_NEEDED);
> }
>
> cmd_buffer->state.pending_pipe_bits |=
> @@ -693,6 +771,25 @@ genX(cmd_buffer_setup_attachments)(struct
> anv_cmd_buffer *cmd_buffer,
> genX(transfer_clear_value)(cmd_buffer,
> state->attachments[i].color_rt_state, iview->image,
> iview->isl.base_level, true /* copy_to_buffer */);
> +
> + /* If this image always has the auxiliary buffer enabled
> we can
> + * mark the subresource as not needing a resolve if the
> clear
> + * color will match what's in the RENDER_SURFACE_STATE
> object
> + * when it's being used for sampling.
> + */
> + if (iview->image->aux_usage == ISL_AUX_USAGE_CCS_E &&
> + clear_color.u32[0] == 0 &&
> + clear_color.u32[1] == 0 &&
> + clear_color.u32[2] == 0 &&
> + clear_color.u32[3] == 0) {
> + genX(set_resolve_state)(cmd_buffer, iview->image,
> + iview->isl.base_level,
> + CCS_RESOLVE_NOT_NEEDED);
> + } else {
> + genX(set_resolve_state)(cmd_buffer, iview->image,
> + iview->isl.base_level,
> + CCS_RESOLVE_NEEDED);
> + }
> } else if (att->load_op == VK_ATTACHMENT_LOAD_OP_LOAD &&
> state->attachments[i].aux_usage !=
> ISL_AUX_USAGE_NONE) {
> /* The attachment may have been fast-cleared in a previous
> @@ -2194,9 +2291,6 @@ void genX(CmdDispatch)(
> #define GPGPU_DISPATCHDIMY 0x2504
> #define GPGPU_DISPATCHDIMZ 0x2508
>
> -#define MI_PREDICATE_SRC0 0x2400
> -#define MI_PREDICATE_SRC1 0x2408
> -
> void genX(CmdDispatchIndirect)(
> VkCommandBuffer commandBuffer,
> VkBuffer _buffer,
> --
> 2.12.2
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20170503/02356fd5/attachment-0001.html>
More information about the mesa-dev
mailing list