[Mesa-dev] [PATCH 14/33] intel/blorp: Add an entrypoint for doing bit-for-bit copies

Pohjolainen, Topi topi.pohjolainen at gmail.com
Fri Sep 9 05:47:19 UTC 2016


On Thu, Sep 08, 2016 at 10:58:09AM -0700, Jason Ekstrand wrote:
>    On Wed, Sep 7, 2016 at 1:16 PM, Jason Ekstrand
>    <[1]jason at jlekstrand.net> wrote:
> 
>    On Sep 7, 2016 10:45 AM, "Nanley Chery" <[2]nanleychery at gmail.com>
>    wrote:
>    >
>    > On Wed, Sep 07, 2016 at 10:26:25AM -0700, Jason Ekstrand wrote:
>    > > On Wed, Sep 7, 2016 at 9:50 AM, Jason Ekstrand
>    <[3]jason at jlekstrand.net> wrote:
>    > >
>    > > > On Wed, Sep 7, 2016 at 9:36 AM, Nanley Chery
>    <[4]nanleychery at gmail.com>
>    > > > wrote:
>    > > >
>    > > >> On Tue, Sep 06, 2016 at 05:02:55PM -0700, Jason Ekstrand wrote:
>    > > >> > On Tue, Sep 6, 2016 at 4:12 PM, Nanley Chery
>    <[5]nanleychery at gmail.com>
>    > > >> wrote:
>    > > >> >
>    > > >> > > On Wed, Aug 31, 2016 at 02:22:33PM -0700, Jason Ekstrand
>    wrote:
>    > > >> > > > ---
>    > > >> > > >  src/intel/blorp/blorp.h      |  10 ++++
>    > > >> > > >  src/intel/blorp/blorp_blit.c | 133
>    ++++++++++++++++++++++++++++++
>    > > >> > > +++++++++++++
>    > > >> > > >  2 files changed, 143 insertions(+)
>    > > >> > > >
>    > > >> > > > diff --git a/src/intel/blorp/blorp.h
>    b/src/intel/blorp/blorp.h
>    > > >> > > > index c1e93fd..6574124 100644
>    > > >> > > > --- a/src/intel/blorp/blorp.h
>    > > >> > > > +++ b/src/intel/blorp/blorp.h
>    > > >> > > > @@ -109,6 +109,16 @@ blorp_blit(struct blorp_batch *batch,
>    > > >> > > >             uint32_t filter, bool mirror_x, bool
>    mirror_y);
>    > > >> > > >
>    > > >> > > >  void
>    > > >> > > > +blorp_copy(struct blorp_batch *batch,
>    > > >> > > > +           const struct blorp_surf *src_surf,
>    > > >> > > > +           unsigned src_level, unsigned src_layer,
>    > > >> > > > +           const struct blorp_surf *dst_surf,
>    > > >> > > > +           unsigned dst_level, unsigned dst_layer,
>    > > >> > > > +           uint32_t src_x, uint32_t src_y,
>    > > >> > > > +           uint32_t dst_x, uint32_t dst_y,
>    > > >> > > > +           uint32_t src_width, uint32_t src_height);
>    > > >> > > > +
>    > > >> > > > +void
>    > > >> > > >  blorp_fast_clear(struct blorp_batch *batch,
>    > > >> > > >                   const struct blorp_surf *surf,
>    > > >> > > >                   uint32_t level, uint32_t layer, enum
>    isl_format
>    > > >> format,
>    > > >> > > > diff --git a/src/intel/blorp/blorp_blit.c
>    > > >> b/src/intel/blorp/blorp_blit.c
>    > > >> > > > index 3ab39a3..42a502c 100644
>    > > >> > > > --- a/src/intel/blorp/blorp_blit.c
>    > > >> > > > +++ b/src/intel/blorp/blorp_blit.c
>    > > >> > > > @@ -1685,3 +1685,136 @@ blorp_blit(struct blorp_batch
>    *batch,
>    > > >> > > >                   dst_x0, dst_y0, dst_x1, dst_y1,
>    > > >> > > >                   mirror_x, mirror_y);
>    > > >> > > >  }
>    > > >> > > > +
>    > > >> > > > +static enum isl_format
>    > > >> > > > +get_copy_format_for_bpb(unsigned bpb)
>    > > >> > > > +{
>    > > >> > > > +   /* The choice of UNORM and UINT formats is very
>    intentional
>    > > >> here.
>    > > >> > > Most of
>    > > >> > > > +    * the time, we want to use a UINT format to avoid any
>    rounding
>    > > >> > > error in
>    > > >> > > > +    * the blit.  For stencil blits, R8_UINT is required
>    by the
>    > > >> hardware.
>    > > >> > > > +    * (It's the only format allowed in conjunction with
>    W-tiling.)
>    > > >> > > Also we
>    > > >> > > > +    * intentionally use the 4-channel formats whenever we
>    can.
>    > > >> This is
>    > > >> > > so
>    > > >> > > > +    * that, when we do a RGB <-> RGBX copy, the two
>    formats will
>    > > >> line
>    > > >> > > up even
>    > > >> > > > +    * though one of them is 3/4 the size of the other.
>    The choice
>    > > >> of
>    > > >> > > UNORM
>    > > >> > > > +    * vs. UINT is also very intentional because Haswell
>    doesn't
>    > > >> handle
>    > > >> > > 8 or
>    > > >> > > > +    * 16-bit RGB UINT formats at all so we have to use
>    UNORM there.
>    > > >> > > > +    * Fortunately, the only time we should ever use two
>    different
>    > > >> > > formats in
>    > > >> > > > +    * the table below is for RGB -> RGBA blits and so we
>    will never
>    > > >> > > have any
>    > > >> > > > +    * UNORM/UINT mismatch.
>    > > >> > > > +    */
>    > > >> > > > +   switch (bpb) {
>    > > >> > > > +   case 8:  return ISL_FORMAT_R8_UINT;
>    > > >> > > > +   case 16: return ISL_FORMAT_R8G8_UINT;
>    > > >> > > > +   case 24: return ISL_FORMAT_R8G8B8_UNORM;
>    > > >> > > > +   case 32: return ISL_FORMAT_R8G8B8A8_UNORM;
>    > > >> > > > +   case 48: return ISL_FORMAT_R16G16B16_UNORM;
>    > > >> > > > +   case 64: return ISL_FORMAT_R16G16B16A16_UNORM;
>    > > >> > > > +   case 96: return ISL_FORMAT_R32G32B32_UINT;
>    > > >> > > > +   case 128:return ISL_FORMAT_R32G32B32A32_UINT;
>    > > >> > > > +   default:
>    > > >> > > > +      unreachable("Unknown format bpb");
>    > > >> > > > +   }
>    > > >> > > > +}
>    > > >> > > > +
>    > > >> > > > +static void
>    > > >> > > > +surf_convert_to_uncompressed(const struct isl_device
>    *isl_dev,
>    > > >> > > > +                             struct
>    brw_blorp_surface_info *info,
>    > > >> > > > +                             uint32_t *x, uint32_t *y,
>    > > >> > > > +                             uint32_t *width, uint32_t
>    *height)
>    > > >> > > > +{
>    > > >> > > > +   const struct isl_format_layout *fmtl =
>    > > >> > > > +      isl_format_get_layout(info->surf.format);
>    > > >> > > > +
>    > > >> > > > +   assert(fmtl->bw > 1 || fmtl->bh > 1);
>    > > >> > > > +
>    > > >> > > > +   /* This is a compressed surface.  We need to convert
>    it to a
>    > > >> single
>    > > >> > > > +    * slice (becase compressed layouts don't perfectly
>    match
>    > > >> > > uncompressed
>    > > >> > > > +    * ones with the same bpb) and divide x, y, width, and
>    height
>    > > >> by the
>    > > >> > > > +    * block size.
>    > > >> > > > +    */
>    > > >> > > > +   surf_convert_to_single_slice(isl_dev, info);
>    > > >> > > > +
>    > > >> > > > +   if (width || height) {
>    > > >> > > > +      assert(*width % fmtl->bw == 0 ||
>    > > >> > > > +             *x + *width == info->surf.logical_level0_px.
>    width);
>    > > >> > > > +      assert(*height % fmtl->bh == 0 ||
>    > > >> > > > +             *y + *height ==
>    info->surf.logical_level0_px.height);
>    > > >> > > > +      *width = DIV_ROUND_UP(*width, fmtl->bw);
>    > > >> > > > +      *height = DIV_ROUND_UP(*height, fmtl->bh);
>    > > >> > > > +   }
>    > > >> > > > +
>    > > >> > > > +   assert(*x % fmtl->bw == 0);
>    > > >> > > > +   assert(*y % fmtl->bh == 0);
>    > > >> > > > +   *x /= fmtl->bw;
>    > > >> > > > +   *y /= fmtl->bh;
>    > > >> > > > +
>    > > >> > > > +   info->surf.logical_level0_px.width =
>    > > >> > > > +      DIV_ROUND_UP(info->surf.logical_level0_px.width,
>    fmtl->bw);
>    > > >> > > > +   info->surf.logical_level0_px.height =
>    > > >> > > > +      DIV_ROUND_UP(info->surf.logical_level0_px.height,
>    fmtl->bh);
>    > > >> > > > +
>    > > >> > > > +   assert(info->surf.phys_level0_sa.width % fmtl->bw ==
>    0);
>    > > >> > > > +   assert(info->surf.phys_level0_sa.height % fmtl->bh ==
>    0);
>    > > >> > > > +   info->surf.phys_level0_sa.width /= fmtl->bw;
>    > > >> > > > +   info->surf.phys_level0_sa.height /= fmtl->bh;
>    > > >> > > > +
>    > > >> > > > +   assert(info->tile_x_sa % fmtl->bw == 0);
>    > > >> > > > +   assert(info->tile_y_sa % fmtl->bh == 0);
>    > > >> > > > +   info->tile_x_sa /= fmtl->bw;
>    > > >> > > > +   info->tile_y_sa /= fmtl->bh;
>    > > >> > > > +
>    > > >> > > > +   /* It's now an uncompressed surface so we need an
>    uncompressed
>    > > >> > > format */
>    > > >> > > > +   info->surf.format = get_copy_format_for_bpb(fmtl->
>    bpb);
>    > > >> > > > +}
>    > > >> > > > +
>    > > >> > > > +void
>    > > >> > > > +blorp_copy(struct blorp_batch *batch,
>    > > >> > > > +           const struct blorp_surf *src_surf,
>    > > >> > > > +           unsigned src_level, unsigned src_layer,
>    > > >> > > > +           const struct blorp_surf *dst_surf,
>    > > >> > > > +           unsigned dst_level, unsigned dst_layer,
>    > > >> > > > +           uint32_t src_x, uint32_t src_y,
>    > > >> > > > +           uint32_t dst_x, uint32_t dst_y,
>    > > >> > > > +           uint32_t src_width, uint32_t src_height)
>    > > >> > > > +{
>    > > >> > > > +   struct blorp_params params;
>    > > >> > > > +   blorp_params_init(&params);
>    > > >> > > > +
>    > > >> > > > +   brw_blorp_surface_info_init(batch->blorp, &params.src,
>    > > >> src_surf,
>    > > >> > > src_level,
>    > > >> > > > +                               src_layer,
>    ISL_FORMAT_UNSUPPORTED,
>    > > >> > > false);
>    > > >> > > > +   brw_blorp_surface_info_init(batch->blorp, &params.dst,
>    > > >> dst_surf,
>    > > >> > > dst_level,
>    > > >> > > > +                               dst_layer,
>    ISL_FORMAT_UNSUPPORTED,
>    > > >> true);
>    > > >> > > > +
>    > > >> > > > +   struct brw_blorp_blit_prog_key wm_prog_key;
>    > > >> > > > +   memset(&wm_prog_key, 0, sizeof(wm_prog_key));
>    > > >> > > > +
>    > > >> > > > +   const struct isl_format_layout *src_fmtl =
>    > > >> > > > +      isl_format_get_layout(params.src.surf.format);
>    > > >> > > > +   const struct isl_format_layout *dst_fmtl =
>    > > >> > > > +      isl_format_get_layout(params.dst.surf.format);
>    > > >> > > > +
>    > > >> > > > +   params.src.view.format =
>    get_copy_format_for_bpb(src_fm
>    > > >> tl->bpb);
>    > > >> > > > +   if (src_fmtl->bw > 1 || src_fmtl->bh > 1) {
>    > > >> > > > +      surf_convert_to_uncompressed(batch->blorp->isl_dev,
>    > > >> &params.src,
>    > > >> > > > +                                   &src_x, &src_y,
>    &src_width,
>    > > >> > > &src_height);
>    > > >> > > > +      wm_prog_key.need_dst_offset = true;
>    > > >> > > > +   }
>    > > >> > > > +
>    > > >> > > > +   params.dst.view.format =
>    get_copy_format_for_bpb(dst_fm
>    > > >> tl->bpb);
>    > > >> > > > +   if (dst_fmtl->bw > 1 || dst_fmtl->bh > 1) {
>    > > >> > > > +      surf_convert_to_uncompressed(batch->blorp->isl_dev,
>    > > >> &params.dst,
>    > > >> > > > +                                   &dst_x, &dst_y, NULL,
>    NULL);
>    > > >> > > > +      wm_prog_key.need_dst_offset = true;
>    > > >> > > > +   }
>    > > >> > >
>    > > >> > > When this function is later used to replace meta in
>    vkCopyImage and
>    > > >> the
>    > > >> > > like,
>    > > >> > > I suspect that copying non-zero mip levels on D16 and S8
>    textures can
>    > > >> fail.
>    > > >> > > These textures have a non-4x4 alignment on SKL and, like
>    compressed
>    > > >> > > textures,
>    > > >> > > need surf_convert_to_single_slice().
>    > > >> > >
>    > > >> >
>    > > >> > I don't think that's an issue.  Depth and stencil are still
>    valid
>    > > >> multi-LOD
>    > > >> > textures.  You don't need 4x4, you just need an alignment that
>    we can
>    > > >> > validly express so 4x8 (stencil), for instance, is fine.  The
>    problem
>    > > >> with
>    > > >> > compressed is that, when you fake it as R32U32G32A32_UINT, you
>    can end
>    > > >> up
>    > > >> > with halign/valign that can't actually be expressed.  On SKL,
>    I think we
>    > > >> > can actually express them all so we may not need the stomp at
>    all.
>    > > >> >
>    > > >> >
>    > > >>
>    > > >> My suspicion was wrong. I was under the assumption that blorp
>    behaved like
>    > > >> meta in that it always created an image from the tile nearest to
>    the copy
>    > > >> region
>    > > >> of the src/dst image - that is not the case. Sorry for not
>    simply asking.
>    > > >> This
>    > > >> seems like a much better way to copy images.
>    > > >>
>    > > >> This series currently causes 6 crucbile tests to fail in
>    > > >> func.miptree.r8g8b8a8-unorm.aspect-color.view-3d. Does your
>    local copy
>    > > >> fix
>    > > >> these? If so, could you send it out?
>    > > >>
>    > > >
>    > > > Drp... I didn't run crucible.  No CTS failures but I'm seeing the
>    same 6
>    > > > crucible fails that you are.  I'll look into them.
>    > > >
>    > >
>    > > Looking at the crucible fail a bit.  The R8G8B8A8 failures were do
>    to
>    > > sanitizing coordinates with the type from the wrong image.  I also
>    found a
>    > > couple of other bugs.
>    > >
>    >
>    > Okay. I'll resume review after you've fixed the issues.
>    >
>    > > There are still failures on 3-D stencil in those tests.  I'll look
>    into
>    > > them.
>    > >
>    >
>    > In order to get 3D stencil tests running, are you using this patch?
>    > [6]https://patchwork.freedesktop.org/patch/109266/
>    > No stencil tests run for me otherwise.
>    >
>    > > The latest can always be found here:
>    > >
>    > > [7]https://cgit.freedesktop.org/~jekstrand/mesa/log/?h=wip/anv-
>    blorp
>    > >
>    >
>    > I checked this branch before asking for the newest revision, but it
>    was out of
>    > date. I see that it has now been updated however.
> 
>      It was up-to-date until I fixed some of those crucible tests.  I'll
>      continue cracking away at the 3D stencil failures when I get back to
>      my computer.  Hopefully it won't take long.
> 
>    Ok, it's passing crucible now.  There were two bugs.  One was a bug in
>    patch 8 where we weren't properly handling the z_offset field of
>    blorp_surface_info.  A new version can be found here:
>    [8]https://cgit.freedesktop.org/~jekstrand/mesa/commit/?h=wip/anv-blorp
>    &id=86ada5b877046b222cfc1a1e7c87e0f7c9ee60f0

Taking info->z_offset into account in surf_convert_to_single_slice() and
grounding it to zero looks right. Moving the assignment:


-   /* For some texture types, we need to pass the layer through the sampler. */
-   params.wm_inputs.src_z = params.src.z_offset;
-
    if (devinfo->gen > 6 &&
        params.dst.surf.msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED) {
       assert(params.dst.surf.samples > 1);
@@ -1620,6 +1623,9 @@ blorp_blit(struct blorp_batch *batch,
       wm_prog_key.persample_msaa_dispatch = true;
    }
 
+   /* For some texture types, we need to pass the layer through the sampler. */
+   params.wm_inputs.src_z = params.src.z_offset;
+
    brw_blorp_get_blit_kernel(batch->blorp, &params, &wm_prog_key);
 
    params.src.view.swizzle = src_swizzle;


looks unnecessary though. Is it intentional?

>    The second was a SKL-only bug in our handling of 3D stencil textures
>    that we've had since forever.  It only now showed up because this is
>    the firs time we've ever actually tested sampling from a 3D stencil
>    texture.  Hooray for test coverage, right?  That bug is fixed by the
>    second of the little two-patch series I just sent out.
>    I'm running a full CTS run on Haswell now to see if blorp breaks
>    anything there.  It did at one point, but I think that may be fixed
>    now.  It should now be good-to-go on BDW+
>    --Jason
> 
>      --Jason
> 
>    > - Nanley
>    >
>    > >
>    > > > --Jason
>    > > >
>    > > >
>    > > >> > > One idea is to call surf_convert_to_single_slice() when ever
>    a copy
>    > > >> occurs
>    > > >> > > with an image's subresource that isn't the 1st mip level on
>    the 1st
>    > > >> array
>    > > >> > > layer
>    > > >> > > /depth slice and the alignment differs from what would be
>    used the
>    > > >> copy
>    > > >> > > format.
>    > > >> > > Thoughts?
>    > > >> > >
>    > > >> >
>    > > >> > That seems a bit harsh.
>    > > >> >
>    > > >> >
>    > > >>
>    > > >> I agree.
>    > > >>
>    > > >> - Nanley
>    > > >>
>    > > >> > > - Nanley
>    > > >> > >
>    > > >> > > > +
>    > > >> > > > +   /* Once both surfaces are stompped to uncompressed as
>    needed,
>    > > >> the
>    > > >> > > > +    * destination size is the same as the source size.
>    > > >> > > > +    */
>    > > >> > > > +   uint32_t dst_width = src_width;
>    > > >> > > > +   uint32_t dst_height = src_height;
>    > > >> > > > +
>    > > >> > > > +   do_blorp_blit(batch, &params, &wm_prog_key,
>    > > >> > > > +                 src_x, src_y, src_x + src_width, src_y +
>    > > >> src_height,
>    > > >> > > > +                 dst_x, dst_y, dst_x + dst_width, dst_y +
>    > > >> dst_height,
>    > > >> > > > +                 false, false);
>    > > >> > > > +}
>    > > >> > > > --
>    > > >> > > > 2.5.0.400.gff86faf
>    > > >> > > >
>    > > >> > > > _______________________________________________
>    > > >> > > > mesa-dev mailing list
>    > > >> > > > [9]mesa-dev at lists.freedesktop.org
>    > > >> > > > [10]https://lists.freedesktop.org/
>    mailman/listinfo/mesa-dev
>    > > >> > >
>    > > >>
>    > > >
>    > > >
> 
> References
> 
>    1. mailto:jason at jlekstrand.net
>    2. mailto:nanleychery at gmail.com
>    3. mailto:jason at jlekstrand.net
>    4. mailto:nanleychery at gmail.com
>    5. mailto:nanleychery at gmail.com
>    6. https://patchwork.freedesktop.org/patch/109266/
>    7. https://cgit.freedesktop.org/~jekstrand/mesa/log/?h=wip/anv-blorp
>    8. https://cgit.freedesktop.org/~jekstrand/mesa/commit/?h=wip/anv-blorp&id=86ada5b877046b222cfc1a1e7c87e0f7c9ee60f0
>    9. mailto:mesa-dev at lists.freedesktop.org
>   10. https://lists.freedesktop.org/mailman/listinfo/mesa-dev

> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev



More information about the mesa-dev mailing list