[Mesa-dev] [PATCH 02/29] anv/blorp: Rework image clear/resolve helpers
Nanley Chery
nanleychery at gmail.com
Wed Dec 6 17:56:53 UTC 2017
On Wed, Dec 06, 2017 at 09:40:25AM -0800, Nanley Chery wrote:
> On Tue, Dec 05, 2017 at 03:48:45PM -0800, Nanley Chery wrote:
> > On Mon, Nov 27, 2017 at 07:05:52PM -0800, Jason Ekstrand wrote:
> > > This replaces image_fast_clear and ccs_resolve with two new helpers that
> > > simply perform an isl_aux_op whatever that may be on CCS or MCS. This
> > > is a bit cleaner as it separates performing the aux operation from which
> > > blorp helper we have to call to do it.
> > > ---
> > > src/intel/vulkan/anv_blorp.c | 218 ++++++++++++++++++++++---------------
> > > src/intel/vulkan/anv_private.h | 23 ++--
> > > src/intel/vulkan/genX_cmd_buffer.c | 28 +++--
> > > 3 files changed, 165 insertions(+), 104 deletions(-)
> > >
> > > diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
> > > index e244468..7c8a673 100644
> > > --- a/src/intel/vulkan/anv_blorp.c
> > > +++ b/src/intel/vulkan/anv_blorp.c
> > > @@ -1439,75 +1439,6 @@ fast_clear_aux_usage(const struct anv_image *image,
> > > }
> > >
> > > void
> > > -anv_image_fast_clear(struct anv_cmd_buffer *cmd_buffer,
> > > - const struct anv_image *image,
> > > - VkImageAspectFlagBits aspect,
> > > - const uint32_t base_level, const uint32_t level_count,
> > > - const uint32_t base_layer, uint32_t layer_count)
> > > -{
> > > - assert(image->type == VK_IMAGE_TYPE_3D || image->extent.depth == 1);
> > > -
> > > - if (image->type == VK_IMAGE_TYPE_3D) {
> > > - assert(base_layer == 0);
> > > - assert(layer_count == anv_minify(image->extent.depth, base_level));
> > > - }
> > > -
> > > - struct blorp_batch batch;
> > > - blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer, 0);
> > > -
> > > - struct blorp_surf surf;
> > > - get_blorp_surf_for_anv_image(cmd_buffer->device, image, aspect,
> > > - fast_clear_aux_usage(image, aspect),
> > > - &surf);
> > > -
> > > - /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear":
> > > - *
> > > - * "After Render target fast clear, pipe-control with color cache
> > > - * write-flush must be issued before sending any DRAW commands on
> > > - * that render target."
> > > - *
> > > - * This comment is a bit cryptic and doesn't really tell you what's going
> > > - * or what's really needed. It appears that fast clear ops are not
> > > - * properly synchronized with other drawing. This means that we cannot
> > > - * have a fast clear operation in the pipe at the same time as other
> > > - * regular drawing operations. We need to use a PIPE_CONTROL to ensure
> > > - * that the contents of the previous draw hit the render target before we
> > > - * resolve and then use a second PIPE_CONTROL after the resolve to ensure
> > > - * that it is completed before any additional drawing occurs.
> > > - */
> > > - cmd_buffer->state.pending_pipe_bits |=
> > > - ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
> > > -
> > > - uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect);
> > > - uint32_t width_div = image->format->planes[plane].denominator_scales[0];
> > > - uint32_t height_div = image->format->planes[plane].denominator_scales[1];
> > > -
> > > - for (uint32_t l = 0; l < level_count; l++) {
> > > - const uint32_t level = base_level + l;
> > > -
> > > - const VkExtent3D extent = {
> > > - .width = anv_minify(image->extent.width, level),
> > > - .height = anv_minify(image->extent.height, level),
> > > - .depth = anv_minify(image->extent.depth, level),
> > > - };
> > > -
> > > - if (image->type == VK_IMAGE_TYPE_3D)
> > > - layer_count = extent.depth;
> > > -
> > > - assert(level < anv_image_aux_levels(image, aspect));
> > > - assert(base_layer + layer_count <= anv_image_aux_layers(image, aspect, level));
> > > - blorp_fast_clear(&batch, &surf, surf.surf->format,
> > > - level, base_layer, layer_count,
> > > - 0, 0,
> > > - extent.width / width_div,
> > > - extent.height / height_div);
> > > - }
> > > -
> > > - cmd_buffer->state.pending_pipe_bits |=
> > > - ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
> > > -}
> > > -
> > > -void
> > > anv_cmd_buffer_resolve_subpass(struct anv_cmd_buffer *cmd_buffer)
> > > {
> > > struct anv_framebuffer *fb = cmd_buffer->state.framebuffer;
> > > @@ -1681,36 +1612,153 @@ anv_gen8_hiz_op_resolve(struct anv_cmd_buffer *cmd_buffer,
> > > }
> > >
> > > void
> > > -anv_ccs_resolve(struct anv_cmd_buffer * const cmd_buffer,
> > > - const struct anv_image * const image,
> > > - VkImageAspectFlagBits aspect,
> > > - const uint8_t level,
> > > - const uint32_t start_layer, const uint32_t layer_count,
> > > - const enum blorp_fast_clear_op op)
> > > +anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer,
> > > + const struct anv_image *image,
> > > + VkImageAspectFlagBits aspect,
> > > + uint32_t base_layer, uint32_t layer_count,
> > > + enum isl_aux_op mcs_op, bool predicate)
> > > {
> > > - assert(cmd_buffer && image);
> > > + assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
> > > + assert(image->samples > 1);
> > > + assert(base_layer + layer_count <= anv_image_aux_layers(image, aspect, 0));
> > >
> > > - uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect);
> > > + /* We don't support planar images with multisampling yet */
> > > + assert(image->n_planes == 1);
> > > +
> >
> > Is this true? I can't find a similar restriction in anv_formats.c.
> >
I forgot I had another comment on the YCbCr parts of this patch. Sorry
for the multiple emails.
Lionel also pointed out the spec text for this as well. According to
that, drivers aren't expected to support multisampling planar images.
The code comment makes it seem like a TODO.
-Nanley
> > > + struct blorp_batch batch;
> > > + blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer,
> > > + predicate ? BLORP_BATCH_PREDICATE_ENABLE : 0);
> > > +
> > > + struct blorp_surf surf;
> > > + get_blorp_surf_for_anv_image(cmd_buffer->device, image, aspect,
> > > + fast_clear_aux_usage(image, aspect),
> >
> > How about ANV_AUX_USAGE_DEFAULT instead? The fast_clear_aux_usage helper
> > seems only beneficial for CCS_D/CCS images. Not a big deal though.
> >
> > > + &surf);
> > > +
> > > + /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear":
> > > + *
> > > + * "After Render target fast clear, pipe-control with color cache
> > > + * write-flush must be issued before sending any DRAW commands on
> > > + * that render target."
> > > + *
> > > + * This comment is a bit cryptic and doesn't really tell you what's going
> > > + * or what's really needed. It appears that fast clear ops are not
> > > + * properly synchronized with other drawing. This means that we cannot
> > > + * have a fast clear operation in the pipe at the same time as other
> > > + * regular drawing operations. We need to use a PIPE_CONTROL to ensure
> > > + * that the contents of the previous draw hit the render target before we
> > > + * resolve and then use a second PIPE_CONTROL after the resolve to ensure
> > > + * that it is completed before any additional drawing occurs.
> > > + */
> > > + cmd_buffer->state.pending_pipe_bits |=
> > > + ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
> > > +
> > > + switch (mcs_op) {
> >
> > Are you missing this case?
> > case ISL_AUX_OP_NONE:
> > return;
> >
> > Seems like the NONE case is left out in a number of other switches. Was
> > this intentional?
> >
> > > + case ISL_AUX_OP_FAST_CLEAR:
> > > + blorp_fast_clear(&batch, &surf, surf.surf->format,
> > > + 0, base_layer, layer_count,
> > > + 0, 0, image->extent.width, image->extent.height);
> > > + break;
> > > + case ISL_AUX_OP_FULL_RESOLVE:
> > > + case ISL_AUX_OP_PARTIAL_RESOLVE:
> > > + case ISL_AUX_OP_AMBIGUATE:
> > > + default:
> > > + unreachable("Unsupported CCS operation");
> > ^
> > MCS
> > > + }
> > > +
> > > + cmd_buffer->state.pending_pipe_bits |=
> > > + ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
> > > +
> > > + blorp_batch_finish(&batch);
> > > +}
> > >
> > > - /* The resolved subresource range must have a CCS buffer. */
> > > +static enum blorp_fast_clear_op
> > > +isl_to_blorp_fast_clear_op(enum isl_aux_op isl_op)
> > > +{
> > > + switch (isl_op) {
> >
> > Are you missing this case?
> > case ISL_AUX_OP_NONE: return BLORP_FAST_CLEAR_OP_NONE;
> >
> > > + case ISL_AUX_OP_FAST_CLEAR: return BLORP_FAST_CLEAR_OP_CLEAR;
> > > + case ISL_AUX_OP_FULL_RESOLVE: return BLORP_FAST_CLEAR_OP_RESOLVE_FULL;
> > > + case ISL_AUX_OP_PARTIAL_RESOLVE: return BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL;
> > > + default:
> > > + unreachable("Unsupported HiZ aux op");
> > > + }
> > > +}
> > > +
> > > +void
> > > +anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer,
> > > + const struct anv_image *image,
> > > + VkImageAspectFlagBits aspect, uint32_t level,
> > > + uint32_t base_layer, uint32_t layer_count,
> > > + enum isl_aux_op ccs_op, bool predicate)
> > > +{
> > > + assert(image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV);
> > > + assert(image->samples == 1);
> > > assert(level < anv_image_aux_levels(image, aspect));
> > > - assert(start_layer + layer_count <=
> > > + assert(base_layer + layer_count <=
> > > anv_image_aux_layers(image, aspect, level));
> > > - assert(image->aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV && image->samples == 1);
> > > +
> > > + uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect);
> > > + uint32_t width_div = image->format->planes[plane].denominator_scales[0];
> > > + uint32_t height_div = image->format->planes[plane].denominator_scales[1];
> > > + uint32_t level_width = anv_minify(image->extent.width, level) / width_div;
> > > + uint32_t level_height = anv_minify(image->extent.height, level) / height_div;
> >
> > I can't find any spec text covering mipmaps and multi-planar images, but
> > the image level is no longer a valid YCbCr subresource if
> >
> > (anv_minify(image->extent.width , level) % width_div ) != 0
> > (anv_minify(image->extent.height, level) % height_div) != 0
> >
> > If this is an open issue, what do you think about some assertions for
> > this? This was a problem in the original code as well.
> >
>
> We're good. Lionel pointed out the relevant spec text that limits the
> level to one.
>
> -Nanley
>
> > >
> > > struct blorp_batch batch;
> > > blorp_batch_init(&cmd_buffer->device->blorp, &batch, cmd_buffer,
> > > - BLORP_BATCH_PREDICATE_ENABLE);
> > > + predicate ? BLORP_BATCH_PREDICATE_ENABLE : 0);
> > >
> > > struct blorp_surf surf;
> > > get_blorp_surf_for_anv_image(cmd_buffer->device, image, aspect,
> > > fast_clear_aux_usage(image, aspect),
> > > &surf);
> > > - surf.clear_color_addr = anv_to_blorp_address(
> > > - anv_image_get_clear_color_addr(cmd_buffer->device, image, aspect, level));
> > >
> > > - blorp_ccs_resolve(&batch, &surf, level, start_layer, layer_count,
> > > - image->planes[plane].surface.isl.format, op);
> > > + if (ccs_op == ISL_AUX_OP_FULL_RESOLVE ||
> > > + ccs_op == ISL_AUX_OP_PARTIAL_RESOLVE) {
> > > + /* If we're doing a resolve operation, then we need the indirect clear
> > > + * color. The clear and ambiguate operations just stomp the CCS to a
> > > + * particular value and don't care about format or clear value.
> > > + */
> > > + const struct anv_address clear_color_addr =
> > > + anv_image_get_clear_color_addr(cmd_buffer->device, image,
> > > + aspect, level);
> > > + surf.clear_color_addr = anv_to_blorp_address(clear_color_addr);
> > > + }
> > > +
> > > + /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear":
> > > + *
> > > + * "After Render target fast clear, pipe-control with color cache
> > > + * write-flush must be issued before sending any DRAW commands on
> > > + * that render target."
> > > + *
> > > + * This comment is a bit cryptic and doesn't really tell you what's going
> > > + * or what's really needed. It appears that fast clear ops are not
> > > + * properly synchronized with other drawing. This means that we cannot
> > > + * have a fast clear operation in the pipe at the same time as other
> > > + * regular drawing operations. We need to use a PIPE_CONTROL to ensure
> > > + * that the contents of the previous draw hit the render target before we
> > > + * resolve and then use a second PIPE_CONTROL after the resolve to ensure
> > > + * that it is completed before any additional drawing occurs.
> > > + */
> > > + cmd_buffer->state.pending_pipe_bits |=
> > > + ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
> > > +
> >
> >
> > We now explicitly flush:
> > * Between the levels of a multi-level layout transition.
> > * Around resolves.
> > Is there any performance penalty associated with this coarser-grained
> > flushing?
> >
> > -Nanley
> >
> > > + switch (ccs_op) {
> > > + case ISL_AUX_OP_FAST_CLEAR:
> > > + blorp_fast_clear(&batch, &surf, surf.surf->format,
> > > + level, base_layer, layer_count,
> > > + 0, 0, level_width, level_height);
> > > + break;
> > > + case ISL_AUX_OP_FULL_RESOLVE:
> > > + case ISL_AUX_OP_PARTIAL_RESOLVE:
> > > + blorp_ccs_resolve(&batch, &surf, level, base_layer, layer_count,
> > > + surf.surf->format, isl_to_blorp_fast_clear_op(ccs_op));
> > > + break;
> > > + case ISL_AUX_OP_AMBIGUATE:
> > > + default:
> > > + unreachable("Unsupported CCS operation");
> > > + }
> > > +
> > > + cmd_buffer->state.pending_pipe_bits |=
> > > + ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
> > >
> > > blorp_batch_finish(&batch);
> > > }
> > > diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
> > > index ca3644d..dc44ab6 100644
> > > --- a/src/intel/vulkan/anv_private.h
> > > +++ b/src/intel/vulkan/anv_private.h
> > > @@ -2533,20 +2533,19 @@ void
> > > anv_gen8_hiz_op_resolve(struct anv_cmd_buffer *cmd_buffer,
> > > const struct anv_image *image,
> > > enum blorp_hiz_op op);
> > > -void
> > > -anv_ccs_resolve(struct anv_cmd_buffer * const cmd_buffer,
> > > - const struct anv_image * const image,
> > > - VkImageAspectFlagBits aspect,
> > > - const uint8_t level,
> > > - const uint32_t start_layer, const uint32_t layer_count,
> > > - const enum blorp_fast_clear_op op);
> > >
> > > void
> > > -anv_image_fast_clear(struct anv_cmd_buffer *cmd_buffer,
> > > - const struct anv_image *image,
> > > - VkImageAspectFlagBits aspect,
> > > - const uint32_t base_level, const uint32_t level_count,
> > > - const uint32_t base_layer, uint32_t layer_count);
> > > +anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer,
> > > + const struct anv_image *image,
> > > + VkImageAspectFlagBits aspect,
> > > + uint32_t base_layer, uint32_t layer_count,
> > > + enum isl_aux_op mcs_op, bool predicate);
> > > +void
> > > +anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer,
> > > + const struct anv_image *image,
> > > + VkImageAspectFlagBits aspect, uint32_t level,
> > > + uint32_t base_layer, uint32_t layer_count,
> > > + enum isl_aux_op ccs_op, bool predicate);
> > >
> > > void
> > > anv_image_copy_to_shadow(struct anv_cmd_buffer *cmd_buffer,
> > > diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c
> > > index ab5590d..2e7a2cc 100644
> > > --- a/src/intel/vulkan/genX_cmd_buffer.c
> > > +++ b/src/intel/vulkan/genX_cmd_buffer.c
> > > @@ -689,9 +689,22 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer,
> > > "define an MCS buffer.");
> > > }
> > >
> > > - anv_image_fast_clear(cmd_buffer, image, aspect,
> > > - base_level, level_count,
> > > - base_layer, layer_count);
> > > + if (image->samples == 1) {
> > > + for (uint32_t l = 0; l < level_count; l++) {
> > > + const uint32_t level = base_level + l;
> > > + const uint32_t level_layer_count =
> > > + MIN2(layer_count, anv_image_aux_layers(image, aspect, level));
> > > + anv_image_ccs_op(cmd_buffer, image, aspect, level,
> > > + base_layer, level_layer_count,
> > > + ISL_AUX_OP_FAST_CLEAR, false);
> > > + }
> > > + } else {
> > > + assert(image->samples > 1);
> > > + assert(base_level == 0 && level_count == 1);
> > > + anv_image_mcs_op(cmd_buffer, image, aspect,
> > > + base_layer, layer_count,
> > > + ISL_AUX_OP_FAST_CLEAR, false);
> > > + }
> > > }
> > > /* At this point, some elements of the CCS buffer may have the fast-clear
> > > * bit-arrangement. As the user writes to a subresource, we need to have
> > > @@ -760,10 +773,11 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer,
> > >
> > > genX(load_needs_resolve_predicate)(cmd_buffer, image, aspect, level);
> > >
> > > - anv_ccs_resolve(cmd_buffer, image, aspect, level, base_layer, layer_count,
> > > - image->planes[plane].aux_usage == ISL_AUX_USAGE_CCS_E ?
> > > - BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL :
> > > - BLORP_FAST_CLEAR_OP_RESOLVE_FULL);
> > > + anv_image_ccs_op(cmd_buffer, image, aspect, level,
> > > + base_layer, layer_count,
> > > + image->planes[plane].aux_usage == ISL_AUX_USAGE_CCS_E ?
> > > + ISL_AUX_OP_PARTIAL_RESOLVE : ISL_AUX_OP_FULL_RESOLVE,
> > > + true);
> > >
> > > genX(set_image_needs_resolve)(cmd_buffer, image, aspect, level, false);
> > > }
> > > --
> > > 2.5.0.400.gff86faf
> > >
> > > _______________________________________________
> > > mesa-dev mailing list
> > > mesa-dev at lists.freedesktop.org
> > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list