[Mesa-dev] [PATCH v4 18/18] anv: Predicate fast-clear resolves
Jason Ekstrand
jason at jlekstrand.net
Fri Jul 21 22:56:05 UTC 2017
On Wed, Jul 19, 2017 at 2:22 PM, Nanley Chery <nanleychery at gmail.com> wrote:
> Image layouts only let us know that an image *may* be fast-cleared. For
> this reason we can end up with redundant resolves. Testing has shown
> that such resolves can measurably hurt performance and that predicating
> them can avoid the penalty.
>
> v2:
> - Introduce additional resolve state management function (Jason Ekstrand).
> - Enable easy retrieval of fast clear state fields.
>
> Signed-off-by: Nanley Chery <nanley.g.chery at intel.com>
> ---
> src/intel/vulkan/anv_blorp.c | 3 +-
> src/intel/vulkan/anv_private.h | 13 ++--
> src/intel/vulkan/genX_cmd_buffer.c | 120 ++++++++++++++++++++++++++++++
> +++----
> 3 files changed, 120 insertions(+), 16 deletions(-)
>
> diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
> index e9b2ccbbdf..ba34cec0bd 100644
> --- a/src/intel/vulkan/anv_blorp.c
> +++ b/src/intel/vulkan/anv_blorp.c
> @@ -1628,7 +1628,8 @@ anv_ccs_resolve(struct anv_cmd_buffer * const
> cmd_buffer,
> return;
>
> struct blorp_batch batch;
> - blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
> + blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer,
> + BLORP_BATCH_PREDICATE_ENABLE);
>
> struct blorp_surf surf;
> get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT,
> diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_
> private.h
> index e7b47ead36..588bf732df 100644
> --- a/src/intel/vulkan/anv_private.h
> +++ b/src/intel/vulkan/anv_private.h
> @@ -2090,11 +2090,16 @@ anv_fast_clear_state_entry_size(const struct
> anv_device *device)
> {
> assert(device);
> /* Entry contents:
> - * +----------------------+
> - * | clear value dword(s) |
> - * +----------------------+
> + * +--------------------------------------------+
> + * | clear value dword(s) | needs resolve dword |
> + * +--------------------------------------------+
> */
> - return device->isl_dev.ss.clear_value_size;
> +
> + /* Ensure that the needs resolve dword is in fact dword-aligned to
> enable
> + * GPU memcpy operations.
> + */
> + assert(device->isl_dev.ss.clear_value_size % 4 == 0);
> + return device->isl_dev.ss.clear_value_size + 4;
> }
>
> /* Returns true if a HiZ-enabled depth buffer can be sampled from. */
> diff --git a/src/intel/vulkan/genX_cmd_buffer.c
> b/src/intel/vulkan/genX_cmd_buffer.c
> index 611e77bddb..c4d67fe8c1 100644
> --- a/src/intel/vulkan/genX_cmd_buffer.c
> +++ b/src/intel/vulkan/genX_cmd_buffer.c
> @@ -407,21 +407,92 @@ transition_depth_buffer(struct anv_cmd_buffer
> *cmd_buffer,
> anv_gen8_hiz_op_resolve(cmd_buffer, image, hiz_op);
> }
>
> +enum fast_clear_state_field {
> + FAST_CLEAR_STATE_FIELD_CLEAR,
> + FAST_CLEAR_STATE_FIELD_RESOLVE,
>
Mind calling these CLEAR_COLOR and NEEDS_RESOLVE? Otherwise, it's not
clear what you're fetching.
> +};
> +
> static inline uint32_t
> -get_fast_clear_state_entry_offset(const struct anv_device *device,
> - const struct anv_image *image,
> - unsigned level)
> +get_fast_clear_state_offset(const struct anv_device *device,
> + const struct anv_image *image,
> + unsigned level, enum fast_clear_state_field
> field)
> {
> assert(device && image);
> assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
> assert(level < anv_image_aux_levels(image));
> - const uint32_t offset = image->offset + image->aux_surface.offset +
> - image->aux_surface.isl.size +
> - anv_fast_clear_state_entry_size(device) *
> level;
> + uint32_t offset = image->offset + image->aux_surface.offset +
> + image->aux_surface.isl.size +
> + anv_fast_clear_state_entry_size(device) * level;
> +
> + switch (field) {
> + case FAST_CLEAR_STATE_FIELD_RESOLVE:
> + offset += device->isl_dev.ss.clear_value_size;
> + /* Fall-through */
> + case FAST_CLEAR_STATE_FIELD_CLEAR:
> + break;
> + }
> +
> assert(offset < image->offset + image->size);
> return offset;
> }
>
> +#define MI_PREDICATE_SRC0 0x2400
> +#define MI_PREDICATE_SRC1 0x2408
> +
> +/* Manages the state of an color image subresource to ensure resolves are
> + * performed properly.
> + */
> +static void
> +genX(set_image_needs_resolve)(struct anv_cmd_buffer *cmd_buffer,
> + const struct anv_image *image,
> + unsigned level, bool needs_resolve)
> +{
> + assert(cmd_buffer && image);
> + assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
> + assert(level < anv_image_aux_levels(image));
> +
> + const uint32_t resolve_flag_offset =
> + get_fast_clear_state_offset(cmd_buffer->device, image, level,
> + FAST_CLEAR_STATE_FIELD_RESOLVE);
> +
> + /* The HW docs say that there is no way to guarantee the completion of
> + * the following command. We use it nevertheless because it shows no
> + * issues in testing is currently being used in the GL driver.
> + */
> + anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) {
> + sdi.Address = (struct anv_address) { image->bo, resolve_flag_offset
> };
> + sdi.ImmediateData = needs_resolve;
> + }
> +}
> +
> +static void
> +genX(load_needs_resolve_predicate)(struct anv_cmd_buffer *cmd_buffer,
> + const struct anv_image *image,
> + unsigned level)
> +{
> + assert(cmd_buffer && image);
> + assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
> + assert(level < anv_image_aux_levels(image));
> +
> + const uint32_t resolve_flag_offset =
> + get_fast_clear_state_offset(cmd_buffer->device, image, level,
> + FAST_CLEAR_STATE_FIELD_RESOLVE);
> +
> + /* Make the pending predicated resolve a no-op if one is not needed.
> + * predicate = do_resolve = resolve_flag != 0;
> + */
> + emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1 , 0);
> + emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC1 + 4, 0);
> + emit_lri(&cmd_buffer->batch, MI_PREDICATE_SRC0 , 0);
> + emit_lrm(&cmd_buffer->batch, MI_PREDICATE_SRC0 + 4,
> + image->bo, resolve_flag_offset);
> + anv_batch_emit(&cmd_buffer->batch, GENX(MI_PREDICATE), mip) {
> + mip.LoadOperation = LOAD_LOADINV;
> + mip.CombineOperation = COMBINE_SET;
> + mip.CompareOperation = COMPARE_SRCS_EQUAL;
> + }
> +}
>
Thanks for splitting these up. Much easier to read!
> +
> static void
> init_fast_clear_state_entry(struct anv_cmd_buffer *cmd_buffer,
> const struct anv_image *image,
> @@ -431,6 +502,15 @@ init_fast_clear_state_entry(struct anv_cmd_buffer
> *cmd_buffer,
> assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
> assert(level < anv_image_aux_levels(image));
>
> + /* The resolve flag should updated to signify that
> fast-clear/compression
> + * data needs to be removed when leaving the undefined layout. Such
> data
> + * may need to be removed if it would cause accesses to the color
> buffer
> + * to return incorrect data. The fast clear data in CCS_D buffers
> should
> + * be removed because CCS_D isn't enabled all the time.
> + */
> + genX(set_image_needs_resolve)(cmd_buffer, image, level,
> + image->aux_usage == ISL_AUX_USAGE_NONE);
> +
> /* The fast clear value dword(s) will be copied into a surface state
> object.
> * Ensure that the restrictions of the fields in the dword(s) are
> followed.
> *
> @@ -446,7 +526,8 @@ init_fast_clear_state_entry(struct anv_cmd_buffer
> *cmd_buffer,
> for (; i < cmd_buffer->device->isl_dev.ss.clear_value_size; i += 4) {
> anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) {
> const uint32_t entry_offset =
> - get_fast_clear_state_entry_offset(cmd_buffer->device, image,
> level);
> + get_fast_clear_state_offset(cmd_buffer->device, image, level,
> + FAST_CLEAR_STATE_FIELD_CLEAR);
> sdi.Address = (struct anv_address) { image->bo, entry_offset + i
> };
>
> if (GEN_GEN >= 9) {
> @@ -493,7 +574,8 @@ genX(copy_fast_clear_dwords)(struct anv_cmd_buffer
> *cmd_buffer,
> uint32_t ss_clear_offset = surface_state.offset +
> cmd_buffer->device->isl_dev.ss.clear_value_offset;
> uint32_t entry_offset =
> - get_fast_clear_state_entry_offset(cmd_buffer->device, image,
> level);
> + get_fast_clear_state_offset(cmd_buffer->device, image, level,
> + FAST_CLEAR_STATE_FIELD_CLEAR);
> unsigned copy_size = cmd_buffer->device->isl_dev.ss.clear_value_size;
>
> if (copy_from_surface_state) {
> @@ -670,6 +752,8 @@ transition_color_buffer(struct anv_cmd_buffer
> *cmd_buffer,
> layer_count = MIN2(layer_count, anv_image_aux_layers(image,
> level));
> }
>
> + genX(load_needs_resolve_predicate)(cmd_buffer, image, level);
> +
> /* Create a surface state with the right clear color and perform the
> * resolve.
> */
> @@ -701,6 +785,8 @@ transition_color_buffer(struct anv_cmd_buffer
> *cmd_buffer,
> image->aux_usage == ISL_AUX_USAGE_CCS_E ?
> BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL :
> BLORP_FAST_CLEAR_OP_RESOLVE_FULL);
> +
> + genX(set_image_needs_resolve)(cmd_buffer, image, level, false);
> }
>
> cmd_buffer->state.pending_pipe_bits |=
> @@ -2450,9 +2536,6 @@ void genX(CmdDispatch)(
> #define GPGPU_DISPATCHDIMY 0x2504
> #define GPGPU_DISPATCHDIMZ 0x2508
>
> -#define MI_PREDICATE_SRC0 0x2400
> -#define MI_PREDICATE_SRC1 0x2408
> -
> void genX(CmdDispatchIndirect)(
> VkCommandBuffer commandBuffer,
> VkBuffer _buffer,
> @@ -2859,6 +2942,21 @@ cmd_buffer_subpass_sync_fast_clear_values(struct
> anv_cmd_buffer *cmd_buffer)
> genX(copy_fast_clear_dwords)(cmd_buffer,
> att_state->color_rt_state,
> iview->image, iview->isl.base_level,
> true /* copy from ss */);
> +
> + /* Fast-clears impact whether or not a resolve will be
> necessary. */
> + if (iview->image->aux_usage == ISL_AUX_USAGE_CCS_E &&
> + att_state->clear_color_is_zero) {
> + /* This image always has the auxiliary buffer enabled. We can
> mark
> + * the subresource as not needing a resolve because the clear
> color
> + * will match what's in every RENDER_SURFACE_STATE object
> when it's
> + * being used for sampling.
> + */
> + genX(set_image_needs_resolve)(cmd_buffer, iview->image,
> + iview->isl.base_level, false);
> + } else {
> + genX(set_image_needs_resolve)(cmd_buffer, iview->image,
> + iview->isl.base_level, true);
> + }
> } else if (rp_att->load_op == VK_ATTACHMENT_LOAD_OP_LOAD) {
> /* The attachment may have been fast-cleared in a previous render
> * pass and the value is needed now. Update the surface state(s).
> --
> 2.13.3
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20170721/734b4159/attachment-0001.html>
More information about the mesa-dev
mailing list