[Mesa-dev] [PATCH 1/3] r600g, radeonsi: use a fallback in dma_copy instead of failing
Marek Olšák
maraeo at gmail.com
Sun Mar 9 04:22:58 PDT 2014
The buffer upload code required dword alignment. I don't know why. It
might have been for R600-R700.
Marek
On Sun, Mar 9, 2014 at 11:59 AM, Niels Ole Salscheider
<niels_ole at salscheider-online.de> wrote:
> On Sunday 09 March 2014, 02:24:51, Marek Olšák wrote:
>> From: Marek Olšák <marek.olsak at amd.com>
>>
>> ---
>> src/gallium/drivers/r600/evergreen_state.c | 37 +++++++++-------
>> src/gallium/drivers/r600/r600_state.c | 41 ++++++++++-------
>> src/gallium/drivers/radeon/r600_buffer_common.c | 58
>> +++++++++++-------------- src/gallium/drivers/radeon/r600_pipe_common.h |
>> 17 ++++----
>> src/gallium/drivers/radeon/r600_texture.c | 18 +++-----
>> src/gallium/drivers/radeonsi/si_state.c | 19 ++++----
>> 6 files changed, 97 insertions(+), 93 deletions(-)
>>
>> diff --git a/src/gallium/drivers/r600/evergreen_state.c
>> b/src/gallium/drivers/r600/evergreen_state.c index dca7c58..5e57f8d 100644
>> --- a/src/gallium/drivers/r600/evergreen_state.c
>> +++ b/src/gallium/drivers/r600/evergreen_state.c
>> @@ -3329,13 +3329,13 @@ static void evergreen_dma_copy_tile(struct
>> r600_context *rctx, }
>> }
>>
>> -static boolean evergreen_dma_blit(struct pipe_context *ctx,
>> - struct pipe_resource *dst,
>> - unsigned dst_level,
>> - unsigned dst_x, unsigned dst_y, unsigned dst_z,
>> - struct pipe_resource *src,
>> - unsigned src_level,
>> - const struct pipe_box *src_box)
>> +static void evergreen_dma_blit(struct pipe_context *ctx,
>> + struct pipe_resource *dst,
>> + unsigned dst_level,
>> + unsigned dst_x, unsigned dst_y, unsigned dst_z,
>> + struct pipe_resource *src,
>> + unsigned src_level,
>> + const struct pipe_box *src_box)
>> {
>> struct r600_context *rctx = (struct r600_context *)ctx;
>> struct r600_texture *rsrc = (struct r600_texture*)src;
>> @@ -3345,19 +3345,22 @@ static boolean evergreen_dma_blit(struct
>> pipe_context *ctx, unsigned src_x, src_y;
>>
>> if (rctx->b.rings.dma.cs == NULL) {
>> - return FALSE;
>> + goto fallback;
>> }
>>
>> if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
>> + if (dst_x % 4 || src_box->x % 4 || src_box->width % 4)
>> + goto fallback;
>
> Why do we need this? I think that the async DMA engine can handle byte aligned
> copies. It is streamout that needs x and width to be dw aligned, isn't it?
>
>> +
>> evergreen_dma_copy(rctx, dst, src, dst_x, src_box->x, src_box-
>>width);
>> - return TRUE;
>> + return;
>> }
>>
>> if (src->format != dst->format) {
>> - return FALSE;
>> + goto fallback;
>> }
>> if (rdst->dirty_level_mask != 0) {
>> - return FALSE;
>> + goto fallback;
>> }
>> if (rsrc->dirty_level_mask) {
>> ctx->flush_resource(ctx, src);
>> @@ -3383,13 +3386,13 @@ static boolean evergreen_dma_blit(struct
>> pipe_context *ctx,
>>
>> if (src_pitch != dst_pitch || src_box->x || dst_x || src_w != dst_w) {
>> /* FIXME evergreen can do partial blit */
>> - return FALSE;
>> + goto fallback;
>> }
>> /* the x test here are currently useless (because we don't support
> partial
>> blit) * but keep them around so we don't forget about those
>> */
>> if ((src_pitch & 0x7) || (src_box->x & 0x7) || (dst_x & 0x7) ||
>> (src_box->y & 0x7) || (dst_y & 0x7)) { - return FALSE;
>> + goto fallback;
>> }
>>
>> /* 128 bpp surfaces require non_disp_tiling for both
>> @@ -3400,7 +3403,7 @@ static boolean evergreen_dma_blit(struct pipe_context
>> *ctx, if ((rctx->b.chip_class == CAYMAN) &&
>> (src_mode != dst_mode) &&
>> (util_format_get_blocksize(src->format) >= 16)) {
>> - return FALSE;
>> + goto fallback;
>> }
>>
>> if (src_mode == dst_mode) {
>> @@ -3423,7 +3426,11 @@ static boolean evergreen_dma_blit(struct pipe_context
>> *ctx, src, src_level, src_x, src_y, src_box->z,
>> copy_height, dst_pitch, bpp);
>> }
>> - return TRUE;
>> + return;
>> +
>> +fallback:
>> + ctx->resource_copy_region(ctx, dst, dst_level, dst_x, dst_y, dst_z,
>> + src, src_level, src_box);
>> }
>>
>> void evergreen_init_state_functions(struct r600_context *rctx)
>> diff --git a/src/gallium/drivers/r600/r600_state.c
>> b/src/gallium/drivers/r600/r600_state.c index 6d89e6c..a0e6d2d 100644
>> --- a/src/gallium/drivers/r600/r600_state.c
>> +++ b/src/gallium/drivers/r600/r600_state.c
>> @@ -2883,13 +2883,13 @@ static boolean r600_dma_copy_tile(struct
>> r600_context *rctx, return TRUE;
>> }
>>
>> -static boolean r600_dma_blit(struct pipe_context *ctx,
>> - struct pipe_resource *dst,
>> - unsigned dst_level,
>> - unsigned dst_x, unsigned dst_y, unsigned dst_z,
>> - struct pipe_resource *src,
>> - unsigned src_level,
>> - const struct pipe_box *src_box)
>> +static void r600_dma_blit(struct pipe_context *ctx,
>> + struct pipe_resource *dst,
>> + unsigned dst_level,
>> + unsigned dst_x, unsigned dst_y, unsigned dst_z,
>> + struct pipe_resource *src,
>> + unsigned src_level,
>> + const struct pipe_box *src_box)
>> {
>> struct r600_context *rctx = (struct r600_context *)ctx;
>> struct r600_texture *rsrc = (struct r600_texture*)src;
>> @@ -2899,16 +2899,19 @@ static boolean r600_dma_blit(struct pipe_context
>> *ctx, unsigned src_x, src_y;
>>
>> if (rctx->b.rings.dma.cs == NULL) {
>> - return FALSE;
>> + goto fallback;
>> }
>>
>> if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
>> + if (dst_x % 4 || src_box->x % 4 || src_box->width % 4)
>> + goto fallback;
>> +
>> r600_dma_copy(rctx, dst, src, dst_x, src_box->x, src_box->width);
>> - return TRUE;
>> + return;
>> }
>>
>> if (src->format != dst->format) {
>> - return FALSE;
>> + goto fallback;
>> }
>>
>> src_x = util_format_get_nblocksx(src->format, src_box->x);
>> @@ -2931,11 +2934,11 @@ static boolean r600_dma_blit(struct pipe_context
>> *ctx,
>>
>> if (src_pitch != dst_pitch || src_box->x || dst_x || src_w != dst_w) {
>> /* strick requirement on r6xx/r7xx */
>> - return FALSE;
>> + goto fallback;
>> }
>> /* lot of constraint on alignment this should capture them all */
>> if ((src_pitch & 0x7) || (src_box->y & 0x7) || (dst_y & 0x7)) {
>> - return FALSE;
>> + goto fallback;
>> }
>>
>> if (src_mode == dst_mode) {
>> @@ -2955,15 +2958,21 @@ static boolean r600_dma_blit(struct pipe_context
>> *ctx, size = src_box->height * src_pitch;
>> /* must be dw aligned */
>> if ((dst_offset & 0x3) || (src_offset & 0x3) || (size & 0x3)) {
>> - return FALSE;
>> + goto fallback;
>> }
>> r600_dma_copy(rctx, dst, src, dst_offset, src_offset, size);
>> } else {
>> - return r600_dma_copy_tile(rctx, dst, dst_level, dst_x, dst_y, dst_z,
>> + if (!r600_dma_copy_tile(rctx, dst, dst_level, dst_x, dst_y, dst_z,
>> src, src_level, src_x, src_y, src_box->z,
>> - copy_height, dst_pitch, bpp);
>> + copy_height, dst_pitch, bpp)) {
>> + goto fallback;
>> + }
>> }
>> - return TRUE;
>> + return;
>> +
>> +fallback:
>> + ctx->resource_copy_region(ctx, dst, dst_level, dst_x, dst_y, dst_z,
>> + src, src_level, src_box);
>> }
>>
>> void r600_init_state_functions(struct r600_context *rctx)
>> diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c
>> b/src/gallium/drivers/radeon/r600_buffer_common.c index 90ca8cb..a7ecfb3
>> 100644
>> --- a/src/gallium/drivers/radeon/r600_buffer_common.c
>> +++ b/src/gallium/drivers/radeon/r600_buffer_common.c
>> @@ -190,6 +190,17 @@ static void *r600_buffer_get_transfer(struct
>> pipe_context *ctx, return data;
>> }
>>
>> +static bool r600_can_dma_copy_buffer(struct r600_common_context *rctx,
>> + unsigned dstx, unsigned srcx, unsigned size)
>> +{
>> + bool dword_aligned = !(dstx % 4) && !(srcx % 4) && !(size % 4);
>> +
>> + return rctx->screen->has_cp_dma ||
>> + (dword_aligned && (rctx->rings.dma.cs ||
>> + rctx->screen->has_streamout));
>> +
>> +}
>> +
>> static void *r600_buffer_transfer_map(struct pipe_context *ctx,
>> struct pipe_resource *resource,
>> unsigned level,
>> @@ -233,10 +244,7 @@ static void *r600_buffer_transfer_map(struct
>> pipe_context *ctx, else if ((usage & PIPE_TRANSFER_DISCARD_RANGE) &&
>> !(usage & PIPE_TRANSFER_UNSYNCHRONIZED) &&
>> !(rscreen->debug_flags & DBG_NO_DISCARD_RANGE) &&
>> - (rscreen->has_cp_dma ||
>> - (rscreen->has_streamout &&
>> - /* The buffer range must be aligned to 4 with streamout. */
>> - box->x % 4 == 0 && box->width % 4 == 0))) {
>> + r600_can_dma_copy_buffer(rctx, box->x, 0, box->width)) {
>> assert(usage & PIPE_TRANSFER_WRITE);
>>
>> /* Check if mapping this buffer would cause waiting for the GPU. */
>> @@ -260,10 +268,11 @@ static void *r600_buffer_transfer_map(struct
>> pipe_context *ctx, /* At this point, the buffer is always idle (we checked
>> it above). */ usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
>> }
>> - /* Using DMA for larger reads is much faster */
>> + /* Using a staging buffer in GTT for larger reads is much faster. */
>> else if ((usage & PIPE_TRANSFER_READ) &&
>> !(usage & PIPE_TRANSFER_WRITE) &&
>> - (rbuffer->domains == RADEON_DOMAIN_VRAM)) {
>> + rbuffer->domains == RADEON_DOMAIN_VRAM &&
>> + r600_can_dma_copy_buffer(rctx, 0, box->x, box->width)) {
>> unsigned offset;
>> struct r600_resource *staging = NULL;
>>
>> @@ -274,26 +283,16 @@ static void *r600_buffer_transfer_map(struct
>> pipe_context *ctx, if (staging) {
>> data += box->x % R600_MAP_BUFFER_ALIGNMENT;
>>
>> - /* Copy the staging buffer into the original one. */
>> - if (rctx->dma_copy(ctx, (struct pipe_resource*)staging, 0,
>> - box->x % R600_MAP_BUFFER_ALIGNMENT,
>> - 0, 0, resource, level, box)) {
>> - rctx->rings.gfx.flush(rctx, 0);
>> - if (rctx->rings.dma.cs)
>> - rctx->rings.dma.flush(rctx, 0);
>> -
>> - /* Wait for any offloaded CS flush to complete
>> - * to avoid busy-waiting in the winsys. */
>> - rctx->ws->cs_sync_flush(rctx->rings.gfx.cs);
>> - if (rctx->rings.dma.cs)
>> - rctx->ws->cs_sync_flush(rctx->rings.dma.cs);
>> -
>> - rctx->ws->buffer_wait(staging->buf, RADEON_USAGE_WRITE);
>> - return r600_buffer_get_transfer(ctx, resource, level,
> usage, box,
>> - ptransfer, data, staging, offset);
>> - } else {
>> - pipe_resource_reference((struct pipe_resource**)&staging,
> NULL);
>> - }
>> + /* Copy the VRAM buffer to the staging buffer. */
>> + rctx->dma_copy(ctx, &staging->b.b, 0,
>> + box->x % R600_MAP_BUFFER_ALIGNMENT,
>> + 0, 0, resource, level, box);
>> +
>> + /* Just do the synchronization. The buffer is mapped already. */
>> + r600_buffer_map_sync_with_rings(rctx, staging,
> PIPE_TRANSFER_READ);
>> +
>> + return r600_buffer_get_transfer(ctx, resource, level, usage, box,
>> + ptransfer, data, staging, offset);
>> }
>> }
>>
>> @@ -329,12 +328,7 @@ static void r600_buffer_transfer_unmap(struct
>> pipe_context *ctx, u_box_1d(soffset, size, &box);
>>
>> /* Copy the staging buffer into the original one. */
>> - if (!(size % 4) && !(doffset % 4) && !(soffset % 4) &&
>> - rctx->dma_copy(ctx, dst, 0, doffset, 0, 0, src, 0, &box)) {
>> - /* DONE. */
>> - } else {
>> - ctx->resource_copy_region(ctx, dst, 0, doffset, 0, 0, src,
> 0, &box);
>> - }
>> + rctx->dma_copy(ctx, dst, 0, doffset, 0, 0, src, 0, &box);
>> }
>> pipe_resource_reference((struct pipe_resource**)&rtransfer->staging,
>> NULL); }
>> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h
>> b/src/gallium/drivers/radeon/r600_pipe_common.h index 493a8fc..5e544c5
>> 100644
>> --- a/src/gallium/drivers/radeon/r600_pipe_common.h
>> +++ b/src/gallium/drivers/radeon/r600_pipe_common.h
>> @@ -344,15 +344,14 @@ struct r600_common_context {
>> boolean current_render_cond_cond;
>> boolean predicate_drawing;
>>
>> - /* Copy one resource to another using async DMA.
>> - * False is returned if the copy couldn't be done. */
>> - boolean (*dma_copy)(struct pipe_context *ctx,
>> - struct pipe_resource *dst,
>> - unsigned dst_level,
>> - unsigned dst_x, unsigned dst_y, unsigned dst_z,
>> - struct pipe_resource *src,
>> - unsigned src_level,
>> - const struct pipe_box *src_box);
>> + /* Copy one resource to another using async DMA. */
>> + void (*dma_copy)(struct pipe_context *ctx,
>> + struct pipe_resource *dst,
>> + unsigned dst_level,
>> + unsigned dst_x, unsigned dst_y, unsigned dst_z,
>> + struct pipe_resource *src,
>> + unsigned src_level,
>> + const struct pipe_box *src_box);
>>
>> void (*clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst,
>> unsigned offset, unsigned size, unsigned value);
>> diff --git a/src/gallium/drivers/radeon/r600_texture.c
>> b/src/gallium/drivers/radeon/r600_texture.c index e3b918e..5c32a84 100644
>> --- a/src/gallium/drivers/radeon/r600_texture.c
>> +++ b/src/gallium/drivers/radeon/r600_texture.c
>> @@ -80,12 +80,8 @@ static void r600_copy_to_staging_texture(struct
>> pipe_context *ctx, struct r600_t return;
>> }
>>
>> - if (!rctx->dma_copy(ctx, dst, 0, 0, 0, 0,
>> - src, transfer->level,
>> - &transfer->box)) {
>> - ctx->resource_copy_region(ctx, dst, 0, 0, 0, 0,
>> - src, transfer->level, &transfer->box);
>> - }
>> + rctx->dma_copy(ctx, dst, 0, 0, 0, 0, src, transfer->level,
>> + &transfer->box);
>> }
>>
>> /* Copy from a transfer's staging texture to a full GPU one. */
>> @@ -106,13 +102,9 @@ static void r600_copy_from_staging_texture(struct
>> pipe_context *ctx, struct r600 return;
>> }
>>
>> - if (!rctx->dma_copy(ctx, dst, transfer->level,
>> - transfer->box.x, transfer->box.y, transfer->box.z,
>> - src, 0, &sbox)) {
>> - ctx->resource_copy_region(ctx, dst, transfer->level,
>> - transfer->box.x, transfer->box.y, transfer->box.z,
>> - src, 0, &sbox);
>> - }
>> + rctx->dma_copy(ctx, dst, transfer->level,
>> + transfer->box.x, transfer->box.y, transfer->box.z,
>> + src, 0, &sbox);
>> }
>>
>> static unsigned r600_texture_get_offset(struct r600_texture *rtex, unsigned
>> level, diff --git a/src/gallium/drivers/radeonsi/si_state.c
>> b/src/gallium/drivers/radeonsi/si_state.c index 4094421..9210547 100644
>> --- a/src/gallium/drivers/radeonsi/si_state.c
>> +++ b/src/gallium/drivers/radeonsi/si_state.c
>> @@ -2919,16 +2919,19 @@ static void *si_create_blend_custom(struct
>> si_context *sctx, unsigned mode) return
>> si_create_blend_state_mode(&sctx->b.b, &blend, mode);
>> }
>>
>> -static boolean si_dma_copy(struct pipe_context *ctx,
>> - struct pipe_resource *dst,
>> - unsigned dst_level,
>> - unsigned dst_x, unsigned dst_y, unsigned dst_z,
>> - struct pipe_resource *src,
>> - unsigned src_level,
>> - const struct pipe_box *src_box)
>> +static void si_dma_copy(struct pipe_context *ctx,
>> + struct pipe_resource *dst,
>> + unsigned dst_level,
>> + unsigned dst_x, unsigned dst_y, unsigned dst_z,
>> + struct pipe_resource *src,
>> + unsigned src_level,
>> + const struct pipe_box *src_box)
>> {
>> /* XXX implement this or share evergreen_dma_blit with r600g */
>> - return FALSE;
>> +
>> + /* Fallback: */
>> + ctx->resource_copy_region(ctx, dst, dst_level, dst_x, dst_y, dst_z,
>> + src, src_level, src_box);
>> }
>>
>> static void si_set_occlusion_query_state(struct pipe_context *ctx, bool
>> enable) --
>> 1.8.3.2
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list