[Mesa-dev] [PATCH 2/2] radeonsi: Add CIK SDMA support

Marek Olšák maraeo at gmail.com
Tue May 26 12:58:39 PDT 2015


I suggest we implement a thorough SDMA test first before trying to fix
it. We definitely want to test all implemented tiling modes,
dimensions, subrectangles of those (for the copies), bit depths,
number of samples, etc. and some of those parameters can differ
between src and dst. The list of tested dimensions and subrectangles
doesn't have to be exhaustive, but it should target various corner
cases (e.g. 1-129, then the next power-of-two value followed by a
non-power-of-two value, and repeat up to the maximum size). The test
should be part of the driver and should be invoked by
radeonsi_screen_create based on an environment variable. The
correctness of the test should be validated with resource_copy_region
(without SDMA).

After that, we can start fixing SDMA support.

Marek

On Tue, May 26, 2015 at 9:29 PM, Grigori Goronzy <greg at chown.ath.cx> wrote:
> On 26.05.2015 09:28, Michel Dänzer wrote:
>> From: Michel Dänzer <michel.daenzer at amd.com>
>>
>> Based on the corresponding SI support. Same as that, this is currently
>> only enabled for one-dimensional buffer copies due to issues with
>> multi-dimensional SDMA copies.
>>
>
> What a pity, so CIK has exactly the same issues as SI? We should really
> try to figure out what's wrong with tiled DMA copies.
>
> Anyway,
>
> Reviewed-by: Grigori Goronzy <greg at chown.ath.cx>
>
>> Signed-off-by: Michel Dänzer <michel.daenzer at amd.com>
>> ---
>>  src/gallium/drivers/radeonsi/Makefile.sources |   1 +
>>  src/gallium/drivers/radeonsi/cik_sdma.c       | 364 ++++++++++++++++++++++++++
>>  src/gallium/drivers/radeonsi/si_dma.c         |  20 --
>>  src/gallium/drivers/radeonsi/si_pipe.h        |   9 +
>>  src/gallium/drivers/radeonsi/si_state.c       |  22 +-
>>  src/gallium/drivers/radeonsi/si_state.h       |   1 +
>>  src/gallium/drivers/radeonsi/sid.h            |  31 +++
>>  7 files changed, 427 insertions(+), 21 deletions(-)
>>  create mode 100644 src/gallium/drivers/radeonsi/cik_sdma.c
>>
>> diff --git a/src/gallium/drivers/radeonsi/Makefile.sources b/src/gallium/drivers/radeonsi/Makefile.sources
>> index 774dc22..2876c0a 100644
>> --- a/src/gallium/drivers/radeonsi/Makefile.sources
>> +++ b/src/gallium/drivers/radeonsi/Makefile.sources
>> @@ -1,4 +1,5 @@
>>  C_SOURCES := \
>> +     cik_sdma.c \
>>       si_blit.c \
>>       si_commands.c \
>>       si_compute.c \
>> diff --git a/src/gallium/drivers/radeonsi/cik_sdma.c b/src/gallium/drivers/radeonsi/cik_sdma.c
>> new file mode 100644
>> index 0000000..3c0103a
>> --- /dev/null
>> +++ b/src/gallium/drivers/radeonsi/cik_sdma.c
>> @@ -0,0 +1,364 @@
>> +/*
>> + * Copyright 2010 Jerome Glisse <glisse at freedesktop.org>
>> + * Copyright 2014 Advanced Micro Devices, Inc.
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a
>> + * copy of this software and associated documentation files (the "Software"),
>> + * to deal in the Software without restriction, including without limitation
>> + * on the rights to use, copy, modify, merge, publish, distribute, sub
>> + * license, and/or sell copies of the Software, and to permit persons to whom
>> + * the Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice (including the next
>> + * paragraph) shall be included in all copies or substantial portions of the
>> + * Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
>> + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
>> + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
>> + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
>> + * USE OR OTHER DEALINGS IN THE SOFTWARE.
>> + *
>> + * Authors:
>> + *      Jerome Glisse
>> + */
>> +
>> +#include "sid.h"
>> +#include "si_pipe.h"
>> +#include "../radeon/r600_cs.h"
>> +
>> +#include "util/u_format.h"
>> +
>> +static uint32_t cik_micro_tile_mode(struct si_screen *sscreen, unsigned tile_mode)
>> +{
>> +     if (sscreen->b.info.si_tile_mode_array_valid) {
>> +             uint32_t gb_tile_mode = sscreen->b.info.si_tile_mode_array[tile_mode];
>> +
>> +             return G_009910_MICRO_TILE_MODE_NEW(gb_tile_mode);
>> +     }
>> +
>> +     /* The kernel cannod return the tile mode array. Guess? */
>> +     return V_009910_ADDR_SURF_THIN_MICRO_TILING;
>> +}
>> +
>> +static void cik_sdma_do_copy_buffer(struct si_context *ctx,
>> +                                 struct pipe_resource *dst,
>> +                                 struct pipe_resource *src,
>> +                                 uint64_t dst_offset,
>> +                                 uint64_t src_offset,
>> +                                 uint64_t size)
>> +{
>> +     struct radeon_winsys_cs *cs = ctx->b.rings.dma.cs;
>> +     unsigned i, ncopy, csize;
>> +     struct r600_resource *rdst = (struct r600_resource*)dst;
>> +     struct r600_resource *rsrc = (struct r600_resource*)src;
>> +
>> +     dst_offset += r600_resource(dst)->gpu_address;
>> +     src_offset += r600_resource(src)->gpu_address;
>> +
>> +     ncopy = (size + CIK_SDMA_COPY_MAX_SIZE - 1) / CIK_SDMA_COPY_MAX_SIZE;
>> +     r600_need_dma_space(&ctx->b, ncopy * 7);
>> +
>> +     r600_context_bo_reloc(&ctx->b, &ctx->b.rings.dma, rsrc, RADEON_USAGE_READ,
>> +                           RADEON_PRIO_MIN);
>> +     r600_context_bo_reloc(&ctx->b, &ctx->b.rings.dma, rdst, RADEON_USAGE_WRITE,
>> +                           RADEON_PRIO_MIN);
>> +
>> +     for (i = 0; i < ncopy; i++) {
>> +             csize = size < CIK_SDMA_COPY_MAX_SIZE ? size : CIK_SDMA_COPY_MAX_SIZE;
>> +             cs->buf[cs->cdw++] = CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY,
>> +                                                  CIK_SDMA_COPY_SUB_OPCODE_LINEAR,
>> +                                                  0);
>> +             cs->buf[cs->cdw++] = csize;
>> +             cs->buf[cs->cdw++] = 0; /* src/dst endian swap */
>> +             cs->buf[cs->cdw++] = src_offset;
>> +             cs->buf[cs->cdw++] = src_offset >> 32;
>> +             cs->buf[cs->cdw++] = dst_offset;
>> +             cs->buf[cs->cdw++] = dst_offset >> 32;
>> +             dst_offset += csize;
>> +             src_offset += csize;
>> +             size -= csize;
>> +     }
>> +}
>> +
>> +static void cik_sdma_copy_buffer(struct si_context *ctx,
>> +                              struct pipe_resource *dst,
>> +                              struct pipe_resource *src,
>> +                              uint64_t dst_offset,
>> +                              uint64_t src_offset,
>> +                              uint64_t size)
>> +{
>> +     struct r600_resource *rdst = (struct r600_resource*)dst;
>> +
>> +     /* Mark the buffer range of destination as valid (initialized),
>> +      * so that transfer_map knows it should wait for the GPU when mapping
>> +      * that range. */
>> +     util_range_add(&rdst->valid_buffer_range, dst_offset,
>> +                    dst_offset + size);
>> +
>> +     cik_sdma_do_copy_buffer(ctx, dst, src, dst_offset, src_offset, size);
>> +}
>> +
>> +static void cik_sdma_copy_tile(struct si_context *ctx,
>> +                            struct pipe_resource *dst,
>> +                            unsigned dst_level,
>> +                            struct pipe_resource *src,
>> +                            unsigned src_level,
>> +                            unsigned y,
>> +                            unsigned copy_height,
>> +                            unsigned y_align,
>> +                            unsigned pitch,
>> +                            unsigned bpe)
>> +{
>> +     struct radeon_winsys_cs *cs = ctx->b.rings.dma.cs;
>> +     struct si_screen *sscreen = ctx->screen;
>> +     struct r600_texture *rsrc = (struct r600_texture*)src;
>> +     struct r600_texture *rdst = (struct r600_texture*)dst;
>> +     struct r600_texture *rlinear, *rtiled;
>> +     unsigned linear_lvl, tiled_lvl;
>> +     unsigned array_mode, lbpe, pitch_tile_max, slice_tile_max, size;
>> +     unsigned ncopy, height, cheight, detile, i, src_mode, dst_mode;
>> +     unsigned sub_op, bank_h, bank_w, mt_aspect, nbanks, tile_split, mt;
>> +     uint64_t base, addr;
>> +     unsigned pipe_config, tile_mode_index;
>> +
>> +     dst_mode = rdst->surface.level[dst_level].mode;
>> +     src_mode = rsrc->surface.level[src_level].mode;
>> +     /* downcast linear aligned to linear to simplify test */
>> +     src_mode = src_mode == RADEON_SURF_MODE_LINEAR_ALIGNED ? RADEON_SURF_MODE_LINEAR : src_mode;
>> +     dst_mode = dst_mode == RADEON_SURF_MODE_LINEAR_ALIGNED ? RADEON_SURF_MODE_LINEAR : dst_mode;
>> +     assert(dst_mode != src_mode);
>> +     assert(src_mode == RADEON_SURF_MODE_LINEAR || dst_mode == RADEON_SURF_MODE_LINEAR);
>> +
>> +     sub_op = CIK_SDMA_COPY_SUB_OPCODE_TILED;
>> +     lbpe = util_logbase2(bpe);
>> +     pitch_tile_max = ((pitch / bpe) / 8) - 1;
>> +
>> +     detile = dst_mode == RADEON_SURF_MODE_LINEAR;
>> +     rlinear = detile ? rdst : rsrc;
>> +     rtiled = detile ? rsrc : rdst;
>> +     linear_lvl = detile ? dst_level : src_level;
>> +     tiled_lvl = detile ? src_level : dst_level;
>> +
>> +     assert(!util_format_is_depth_and_stencil(rtiled->resource.b.b.format));
>> +
>> +     array_mode = si_array_mode(rtiled->surface.level[tiled_lvl].mode);
>> +     slice_tile_max = (rtiled->surface.level[tiled_lvl].nblk_x *
>> +                       rtiled->surface.level[tiled_lvl].nblk_y) / (8*8) - 1;
>> +     height = rlinear->surface.level[linear_lvl].nblk_y;
>> +     base = rtiled->surface.level[tiled_lvl].offset;
>> +     addr = rlinear->surface.level[linear_lvl].offset;
>> +     bank_h = cik_bank_wh(rtiled->surface.bankh);
>> +     bank_w = cik_bank_wh(rtiled->surface.bankw);
>> +     mt_aspect = cik_macro_tile_aspect(rtiled->surface.mtilea);
>> +     tile_split = cik_tile_split(rtiled->surface.tile_split);
>> +     tile_mode_index = si_tile_mode_index(rtiled, tiled_lvl, false);
>> +     nbanks = si_num_banks(sscreen, rtiled);
>> +     base += rtiled->resource.gpu_address;
>> +     addr += rlinear->resource.gpu_address;
>> +
>> +     pipe_config = cik_db_pipe_config(sscreen, tile_mode_index);
>> +     mt = cik_micro_tile_mode(sscreen, tile_mode_index);
>> +
>> +     size = (copy_height * pitch) / 4;
>> +     cheight = copy_height;
>> +     if (((cheight * pitch) / 4) > CIK_SDMA_COPY_MAX_SIZE) {
>> +             cheight = (CIK_SDMA_COPY_MAX_SIZE * 4) / pitch;
>> +             cheight &= ~(y_align - 1);
>> +     }
>> +     ncopy = (copy_height + cheight - 1) / cheight;
>> +     r600_need_dma_space(&ctx->b, ncopy * 12);
>> +
>> +     r600_context_bo_reloc(&ctx->b, &ctx->b.rings.dma, &rsrc->resource,
>> +                           RADEON_USAGE_READ, RADEON_PRIO_MIN);
>> +     r600_context_bo_reloc(&ctx->b, &ctx->b.rings.dma, &rdst->resource,
>> +                           RADEON_USAGE_WRITE, RADEON_PRIO_MIN);
>> +
>> +     copy_height = size * 4 / pitch;
>> +     for (i = 0; i < ncopy; i++) {
>> +             cheight = copy_height;
>> +             if (((cheight * pitch) / 4) > CIK_SDMA_COPY_MAX_SIZE) {
>> +                     cheight = (CIK_SDMA_COPY_MAX_SIZE * 4) / pitch;
>> +                     cheight &= ~(y_align - 1);
>> +             }
>> +             size = (cheight * pitch) / 4;
>> +
>> +             cs->buf[cs->cdw++] = CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY,
>> +                                                  sub_op, detile << 15);
>> +             cs->buf[cs->cdw++] = base;
>> +             cs->buf[cs->cdw++] = base >> 32;
>> +             cs->buf[cs->cdw++] = ((height - 1) << 16) | pitch_tile_max;
>> +             cs->buf[cs->cdw++] = slice_tile_max;
>> +             cs->buf[cs->cdw++] = (pipe_config << 26) | (mt_aspect << 24) |
>> +                     (nbanks << 21) | (bank_h << 18) | (bank_w << 15) |
>> +                     (tile_split << 11) | (mt << 8) | (array_mode << 3) |
>> +                     lbpe;
>> +             cs->buf[cs->cdw++] = y << 16; /* | x */
>> +             cs->buf[cs->cdw++] = 0; /* z */;
>> +             cs->buf[cs->cdw++] = addr & 0xfffffffc;
>> +             cs->buf[cs->cdw++] = addr >> 32;
>> +             cs->buf[cs->cdw++] = (pitch / bpe) - 1;
>> +             cs->buf[cs->cdw++] = size;
>> +
>> +             copy_height -= cheight;
>> +             y += cheight;
>> +     }
>> +}
>> +
>> +void cik_sdma_copy(struct pipe_context *ctx,
>> +                struct pipe_resource *dst,
>> +                unsigned dst_level,
>> +                unsigned dstx, unsigned dsty, unsigned dstz,
>> +                struct pipe_resource *src,
>> +                unsigned src_level,
>> +                const struct pipe_box *src_box)
>> +{
>> +     struct si_context *sctx = (struct si_context *)ctx;
>> +     struct r600_texture *rsrc = (struct r600_texture*)src;
>> +     struct r600_texture *rdst = (struct r600_texture*)dst;
>> +     unsigned dst_pitch, src_pitch, bpe, dst_mode, src_mode;
>> +     unsigned src_w, dst_w;
>> +     unsigned src_x, src_y;
>> +     unsigned copy_height, y_align;
>> +     unsigned dst_x = dstx, dst_y = dsty, dst_z = dstz;
>> +
>> +     if (sctx->b.rings.dma.cs == NULL) {
>> +             goto fallback;
>> +     }
>> +
>> +     if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
>> +             cik_sdma_copy_buffer(sctx, dst, src, dst_x, src_box->x, src_box->width);
>> +             return;
>> +     }
>> +
>> +     /* Before re-enabling this, please make sure you can hit all newly
>> +      * enabled paths in your testing, preferably with both piglit (in
>> +      * particular the streaming-texture-leak test) and real world apps
>> +      * (e.g. the UE4 Elemental demo).
>> +      */
>> +     goto fallback;
>> +
>> +     if (src->format != dst->format ||
>> +         rdst->surface.nsamples > 1 || rsrc->surface.nsamples > 1 ||
>> +         rdst->dirty_level_mask & (1 << dst_level)) {
>> +             goto fallback;
>> +     }
>> +
>> +     if (rsrc->dirty_level_mask & (1 << src_level)) {
>> +             if (rsrc->htile_buffer)
>> +                     goto fallback;
>> +
>> +             ctx->flush_resource(ctx, src);
>> +     }
>> +
>> +     src_x = util_format_get_nblocksx(src->format, src_box->x);
>> +     dst_x = util_format_get_nblocksx(src->format, dst_x);
>> +     src_y = util_format_get_nblocksy(src->format, src_box->y);
>> +     dst_y = util_format_get_nblocksy(src->format, dst_y);
>> +
>> +     dst_pitch = rdst->surface.level[dst_level].pitch_bytes;
>> +     src_pitch = rsrc->surface.level[src_level].pitch_bytes;
>> +     src_w = rsrc->surface.level[src_level].npix_x;
>> +     dst_w = rdst->surface.level[dst_level].npix_x;
>> +
>> +     if (src_pitch != dst_pitch || src_box->x || dst_x || src_w != dst_w ||
>> +         src_box->width != src_w ||
>> +         rsrc->surface.level[src_level].nblk_y !=
>> +         rdst->surface.level[dst_level].nblk_y) {
>> +             /* FIXME CIK can do partial blit */
>> +             goto fallback;
>> +     }
>> +
>> +     bpe = rdst->surface.bpe;
>> +     copy_height = src_box->height / rsrc->surface.blk_h;
>> +     dst_mode = rdst->surface.level[dst_level].mode;
>> +     src_mode = rsrc->surface.level[src_level].mode;
>> +     /* downcast linear aligned to linear to simplify test */
>> +     src_mode = src_mode == RADEON_SURF_MODE_LINEAR_ALIGNED ? RADEON_SURF_MODE_LINEAR : src_mode;
>> +     dst_mode = dst_mode == RADEON_SURF_MODE_LINEAR_ALIGNED ? RADEON_SURF_MODE_LINEAR : dst_mode;
>> +
>> +     /* Dimensions must be aligned to (macro)tiles */
>> +     switch (src_mode == RADEON_SURF_MODE_LINEAR ? dst_mode : src_mode) {
>> +     case RADEON_SURF_MODE_1D:
>> +             if ((src_x % 8) || (src_y % 8) || (dst_x % 8) || (dst_y % 8) ||
>> +                 (copy_height % 8))
>> +                     goto fallback;
>> +             y_align = 8;
>> +             break;
>> +     case RADEON_SURF_MODE_2D: {
>> +             unsigned mtilew, mtileh, num_banks;
>> +
>> +                     switch (si_num_banks(sctx->screen, rsrc)) {
>> +                     case V_02803C_ADDR_SURF_2_BANK:
>> +                     default:
>> +                             num_banks = 2;
>> +                             break;
>> +                     case V_02803C_ADDR_SURF_4_BANK:
>> +                             num_banks = 4;
>> +                             break;
>> +                     case V_02803C_ADDR_SURF_8_BANK:
>> +                             num_banks = 8;
>> +                             break;
>> +                     case V_02803C_ADDR_SURF_16_BANK:
>> +                             num_banks = 16;
>> +                             break;
>> +                     }
>> +
>> +                     mtilew = (8 * rsrc->surface.bankw *
>> +                               sctx->screen->b.tiling_info.num_channels) *
>> +                             rsrc->surface.mtilea;
>> +                     assert(!(mtilew & (mtilew - 1)));
>> +                     mtileh = (8 * rsrc->surface.bankh * num_banks) /
>> +                             rsrc->surface.mtilea;
>> +                     assert(!(mtileh & (mtileh - 1)));
>> +
>> +                     if ((src_x & (mtilew - 1)) || (src_y & (mtileh - 1)) ||
>> +                         (dst_x & (mtilew - 1)) || (dst_y & (mtileh - 1)) ||
>> +                         (copy_height & (mtileh - 1)))
>> +                             goto fallback;
>> +
>> +                     y_align = mtileh;
>> +                     break;
>> +     }
>> +     default:
>> +             y_align = 1;
>> +     }
>> +
>> +     if (src_mode == dst_mode) {
>> +             uint64_t dst_offset, src_offset;
>> +             unsigned src_h, dst_h;
>> +
>> +             src_h = rsrc->surface.level[src_level].npix_y;
>> +             dst_h = rdst->surface.level[dst_level].npix_y;
>> +
>> +             if (src_box->depth > 1 &&
>> +                 (src_y || dst_y || src_h != dst_h || src_box->height != src_h))
>> +                     goto fallback;
>> +
>> +             /* simple dma blit would do NOTE code here assume :
>> +              *   dst_pitch == src_pitch
>> +              */
>> +             src_offset= rsrc->surface.level[src_level].offset;
>> +             src_offset += rsrc->surface.level[src_level].slice_size * src_box->z;
>> +             src_offset += src_y * src_pitch + src_x * bpe;
>> +             dst_offset = rdst->surface.level[dst_level].offset;
>> +             dst_offset += rdst->surface.level[dst_level].slice_size * dst_z;
>> +             dst_offset += dst_y * dst_pitch + dst_x * bpe;
>> +             cik_sdma_do_copy_buffer(sctx, dst, src, dst_offset, src_offset,
>> +                                     src_box->depth *
>> +                                     rsrc->surface.level[src_level].slice_size);
>> +     } else {
>> +             if (dst_y != src_y || src_box->depth > 1 || src_box->z || dst_z)
>> +                     goto fallback;
>> +
>> +             cik_sdma_copy_tile(sctx, dst, dst_level, src, src_level,
>> +                                src_y, copy_height, y_align, dst_pitch, bpe);
>> +     }
>> +     return;
>> +
>> +fallback:
>> +     si_resource_copy_region(ctx, dst, dst_level, dstx, dsty, dstz,
>> +                             src, src_level, src_box);
>> +}
>> diff --git a/src/gallium/drivers/radeonsi/si_dma.c b/src/gallium/drivers/radeonsi/si_dma.c
>> index db523ee..7a0076e 100644
>> --- a/src/gallium/drivers/radeonsi/si_dma.c
>> +++ b/src/gallium/drivers/radeonsi/si_dma.c
>> @@ -30,21 +30,6 @@
>>
>>  #include "util/u_format.h"
>>
>> -static unsigned si_array_mode(unsigned mode)
>> -{
>> -     switch (mode) {
>> -     case RADEON_SURF_MODE_LINEAR_ALIGNED:
>> -             return V_009910_ARRAY_LINEAR_ALIGNED;
>> -     case RADEON_SURF_MODE_1D:
>> -             return V_009910_ARRAY_1D_TILED_THIN1;
>> -     case RADEON_SURF_MODE_2D:
>> -             return V_009910_ARRAY_2D_TILED_THIN1;
>> -     default:
>> -     case RADEON_SURF_MODE_LINEAR:
>> -             return V_009910_ARRAY_LINEAR_GENERAL;
>> -     }
>> -}
>> -
>>  static uint32_t si_micro_tile_mode(struct si_screen *sscreen, unsigned tile_mode)
>>  {
>>       if (sscreen->b.info.si_tile_mode_array_valid) {
>> @@ -240,11 +225,6 @@ void si_dma_copy(struct pipe_context *ctx,
>>               goto fallback;
>>       }
>>
>> -     /* TODO: Implement DMA copy for CIK */
>> -     if (sctx->b.chip_class >= CIK) {
>> -             goto fallback;
>> -     }
>> -
>>       if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
>>               si_dma_copy_buffer(sctx, dst, src, dst_x, src_box->x, src_box->width);
>>               return;
>> diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
>> index f98c7a8..2d67342 100644
>> --- a/src/gallium/drivers/radeonsi/si_pipe.h
>> +++ b/src/gallium/drivers/radeonsi/si_pipe.h
>> @@ -237,6 +237,15 @@ struct si_context {
>>       unsigned                spi_tmpring_size;
>>  };
>>
>> +/* cik_sdma.c */
>> +void cik_sdma_copy(struct pipe_context *ctx,
>> +                struct pipe_resource *dst,
>> +                unsigned dst_level,
>> +                unsigned dstx, unsigned dsty, unsigned dstz,
>> +                struct pipe_resource *src,
>> +                unsigned src_level,
>> +                const struct pipe_box *src_box);
>> +
>>  /* si_blit.c */
>>  void si_init_blit_functions(struct si_context *sctx);
>>  void si_flush_depth_textures(struct si_context *sctx,
>> diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
>> index 7f0fdd5..f003a04 100644
>> --- a/src/gallium/drivers/radeonsi/si_state.c
>> +++ b/src/gallium/drivers/radeonsi/si_state.c
>> @@ -44,6 +44,21 @@ static void si_init_atom(struct r600_atom *atom, struct r600_atom **list_elem,
>>       *list_elem = atom;
>>  }
>>
>> +unsigned si_array_mode(unsigned mode)
>> +{
>> +     switch (mode) {
>> +     case RADEON_SURF_MODE_LINEAR_ALIGNED:
>> +             return V_009910_ARRAY_LINEAR_ALIGNED;
>> +     case RADEON_SURF_MODE_1D:
>> +             return V_009910_ARRAY_1D_TILED_THIN1;
>> +     case RADEON_SURF_MODE_2D:
>> +             return V_009910_ARRAY_2D_TILED_THIN1;
>> +     default:
>> +     case RADEON_SURF_MODE_LINEAR:
>> +             return V_009910_ARRAY_LINEAR_GENERAL;
>> +     }
>> +}
>> +
>>  uint32_t si_num_banks(struct si_screen *sscreen, struct r600_texture *tex)
>>  {
>>       if (sscreen->b.chip_class == CIK &&
>> @@ -2910,11 +2925,16 @@ void si_init_state_functions(struct si_context *sctx)
>>       sctx->b.b.set_polygon_stipple = si_set_polygon_stipple;
>>       sctx->b.b.set_min_samples = si_set_min_samples;
>>
>> -     sctx->b.dma_copy = si_dma_copy;
>>       sctx->b.set_occlusion_query_state = si_set_occlusion_query_state;
>>       sctx->b.need_gfx_cs_space = si_need_gfx_cs_space;
>>
>>       sctx->b.b.draw_vbo = si_draw_vbo;
>> +
>> +     if (sctx->b.chip_class >= CIK) {
>> +             sctx->b.dma_copy = cik_sdma_copy;
>> +     } else {
>> +             sctx->b.dma_copy = si_dma_copy;
>> +     }
>>  }
>>
>>  static void
>> diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
>> index 2f8a943..5e68b16 100644
>> --- a/src/gallium/drivers/radeonsi/si_state.h
>> +++ b/src/gallium/drivers/radeonsi/si_state.h
>> @@ -261,6 +261,7 @@ unsigned cik_bank_wh(unsigned bankwh);
>>  unsigned cik_db_pipe_config(struct si_screen *sscreen, unsigned tile_mode);
>>  unsigned cik_macro_tile_aspect(unsigned macro_tile_aspect);
>>  unsigned cik_tile_split(unsigned tile_split);
>> +unsigned si_array_mode(unsigned mode);
>>  uint32_t si_num_banks(struct si_screen *sscreen, struct r600_texture *tex);
>>  unsigned si_tile_mode_index(struct r600_texture *rtex, unsigned level, bool stencil);
>>
>> diff --git a/src/gallium/drivers/radeonsi/sid.h b/src/gallium/drivers/radeonsi/sid.h
>> index afe011b..35d5ee2 100644
>> --- a/src/gallium/drivers/radeonsi/sid.h
>> +++ b/src/gallium/drivers/radeonsi/sid.h
>> @@ -4516,6 +4516,13 @@
>>  #define     V_009910_ADDR_SURF_8_BANK                               0x02
>>  #define     V_009910_ADDR_SURF_16_BANK                              0x03
>>  /* CIK */
>> +#define   S_009910_MICRO_TILE_MODE_NEW(x)                             (((x) & 0x07) << 22)
>> +#define   G_009910_MICRO_TILE_MODE_NEW(x)                             (((x) >> 22) & 0x07)
>> +#define   C_009910_MICRO_TILE_MODE_NEW(x)                             0xFE3FFFFF
>> +#define     V_009910_ADDR_SURF_DISPLAY_MICRO_TILING                 0x00
>> +#define     V_009910_ADDR_SURF_THIN_MICRO_TILING                    0x01
>> +#define     V_009910_ADDR_SURF_DEPTH_MICRO_TILING                   0x02
>> +#define     V_009910_ADDR_SURF_ROTATED_MICRO_TILING                 0x03
>>  #define R_00B01C_SPI_SHADER_PGM_RSRC3_PS                                0x00B01C
>>  #define   S_00B01C_CU_EN(x)                                           (((x) & 0xFFFF) << 0)
>>  #define   G_00B01C_CU_EN(x)                                           (((x) >> 0) & 0xFFFF)
>> @@ -8696,5 +8703,29 @@
>>  #define    SI_DMA_PACKET_CONSTANT_FILL             0xd
>>  #define    SI_DMA_PACKET_NOP                       0xf
>>
>> +/* CIK async DMA packets */
>> +#define CIK_SDMA_PACKET(op, sub_op, n)   ((((n) & 0xFFFF) << 16) |   \
>> +                                      (((sub_op) & 0xFF) << 8) |     \
>> +                                      (((op) & 0xFF) << 0))
>> +/* CIK async DMA packet types */
>> +#define    CIK_SDMA_OPCODE_NOP                     0x0
>> +#define    CIK_SDMA_OPCODE_COPY                    0x1
>> +#define        CIK_SDMA_COPY_SUB_OPCODE_LINEAR            0x0
>> +#define        CIK_SDMA_COPY_SUB_OPCODE_TILED             0x1
>> +#define        CIK_SDMA_COPY_SUB_OPCODE_SOA               0x3
>> +#define        CIK_SDMA_COPY_SUB_OPCODE_LINEAR_SUB_WINDOW 0x4
>> +#define        CIK_SDMA_COPY_SUB_OPCODE_TILED_SUB_WINDOW  0x5
>> +#define        CIK_SDMA_COPY_SUB_OPCODE_T2T_SUB_WINDOW    0x6
>> +#define    CIK_SDMA_OPCODE_WRITE                   0x2
>> +#define        SDMA_WRITE_SUB_OPCODE_LINEAR               0x0
>> +#define        SDMA_WRTIE_SUB_OPCODE_TILED                0x1
>> +#define    CIK_SDMA_OPCODE_INDIRECT_BUFFER         0x4
>> +#define    CIK_SDMA_PACKET_FENCE                   0x5
>> +#define    CIK_SDMA_PACKET_TRAP                    0x6
>> +#define    CIK_SDMA_PACKET_SEMAPHORE               0x7
>> +#define    CIK_SDMA_PACKET_CONSTANT_FILL           0xb
>> +#define    CIK_SDMA_PACKET_SRBM_WRITE              0xe
>> +#define    CIK_SDMA_COPY_MAX_SIZE                  0x1fffff
>> +
>>  #endif /* _SID_H */
>>
>>
>
>
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>


More information about the mesa-dev mailing list