[Mesa-dev] [PATCH 3/4] radeon: accelerate transfer_inline_write

Christian König deathsimple at vodafone.de
Wed Aug 6 05:44:21 PDT 2014


Am 06.08.2014 um 13:45 schrieb Marek Olšák:
> On Tue, Aug 5, 2014 at 7:31 PM, Christian König <deathsimple at vodafone.de> wrote:
>> From: Christian König <christian.koenig at amd.com>
>>
>> Not completely implemented, cause we need DMA copy support for every hw generation.
>>
>> Signed-off-by: Christian König <christian.koenig at amd.com>
>> ---
>>   src/gallium/drivers/radeon/r600_buffer_common.c |   2 +-
>>   src/gallium/drivers/radeon/r600_pipe_common.c   |   2 +-
>>   src/gallium/drivers/radeon/r600_texture.c       | 104 ++++++++++++++++++++++--
>>   3 files changed, 100 insertions(+), 8 deletions(-)
>>
>> diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c
>> index d747cbc..28ab30c 100644
>> --- a/src/gallium/drivers/radeon/r600_buffer_common.c
>> +++ b/src/gallium/drivers/radeon/r600_buffer_common.c
>> @@ -372,7 +372,7 @@ static const struct u_resource_vtbl r600_buffer_vtbl =
>>          r600_buffer_transfer_map,       /* transfer_map */
>>          NULL,                           /* transfer_flush_region */
>>          r600_buffer_transfer_unmap,     /* transfer_unmap */
>> -       NULL                            /* transfer_inline_write */
>> +       u_default_transfer_inline_write /* transfer_inline_write */
>>   };
>>
>>   struct pipe_resource *r600_buffer_create(struct pipe_screen *screen,
>> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
>> index 3476021..69d344e 100644
>> --- a/src/gallium/drivers/radeon/r600_pipe_common.c
>> +++ b/src/gallium/drivers/radeon/r600_pipe_common.c
>> @@ -153,7 +153,7 @@ bool r600_common_context_init(struct r600_common_context *rctx,
>>          rctx->b.transfer_map = u_transfer_map_vtbl;
>>          rctx->b.transfer_flush_region = u_default_transfer_flush_region;
>>          rctx->b.transfer_unmap = u_transfer_unmap_vtbl;
>> -       rctx->b.transfer_inline_write = u_default_transfer_inline_write;
>> +       rctx->b.transfer_inline_write = u_transfer_inline_write_vtbl;
>>           rctx->b.memory_barrier = r600_memory_barrier;
>>          rctx->b.flush = r600_flush_from_st;
>>
>> diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
>> index 482bbff..89b3b55 100644
>> --- a/src/gallium/drivers/radeon/r600_texture.c
>> +++ b/src/gallium/drivers/radeon/r600_texture.c
>> @@ -849,6 +849,47 @@ static struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen
>>                                                                    stride, buf, &surface);
>>   }
>>
>> +static struct r600_texture *r600_texture_from_ptr(struct pipe_screen *screen,
>> +                                                 const struct pipe_resource *templ,
>> +                                                 void *pointer, unsigned stride)
>> +{
>> +       struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
>> +       struct radeon_surface surface = {};
>> +       struct r600_texture *tex;
>> +       unsigned offset, size;
>> +       struct pb_buffer *buf;
>> +       int r;
>> +
>> +       /* Support only 2D textures without mipmaps */
>> +       if ((templ->target != PIPE_TEXTURE_2D && templ->target != PIPE_TEXTURE_RECT) ||
>> +             templ->depth0 != 1 || templ->last_level != 0)
>> +               return NULL;
>> +
>> +       /* stride needs to be at least dw aligned */
>> +       if (stride % 4)
>> +               return NULL;
>> +
>> +       offset = ((uintptr_t)pointer) & 0xfff;
>> +       pointer = (void *)(((uintptr_t)pointer) - offset);
>> +       size = align(stride * templ->height0 + offset, 0x1000);
>> +
>> +       /* avoid the overhead for small copies */
>> +       if (size < 64*1024)
>> +               return NULL;
>> +
>> +       buf = rscreen->ws->buffer_from_ptr(rscreen->ws, pointer, size);
>> +       if (!buf)
>> +               return NULL;
>> +
>> +       r = r600_init_surface(rscreen, &surface, templ, RADEON_SURF_MODE_LINEAR_ALIGNED, false);
> I know you change it the next patch, but I think the alignment for
> LINEAR (not ALIGNED) is 8 pixels, right? Of course, libdrm_radeon
> should be reviewed if it doesn't over-align the stride. The safest
> thing would be to check if stride == surface[0].pitch_in_bytes.

Yeah, correct.

The problem here is that even RADEON_SURF_MODE_LINEAR couldn't even 
handle all different alignments the application could come up with for 
the base pointer and stride. The only thing that can handle dword 
aligned or even byte aligned subwindow copies is the async DMA partial 
copy command and that is only available on NI+.

Apart from that testing if libdrm_radeon really comes up with the 
correct stride is indeed a good idea.

>
>
>> +       if (r)
>> +               return NULL;
>> +
>> +       tex = r600_texture_create_object(screen, templ, stride, buf, &surface);
>> +       tex->surface.level[0].offset += offset;
>> +       return tex;
>> +}
>> +
>>   bool r600_init_flushed_depth_texture(struct pipe_context *ctx,
>>                                       struct pipe_resource *texture,
>>                                       struct r600_texture **staging)
>> @@ -1112,14 +1153,65 @@ static void r600_texture_transfer_unmap(struct pipe_context *ctx,
>>          FREE(transfer);
>>   }
>>
>> +static void r600_texture_transfer_inline_write(struct pipe_context *ctx,
>> +                                              struct pipe_resource *dst,
>> +                                              unsigned level, unsigned usage,
>> +                                              const struct pipe_box *box,
>> +                                              const void *data,
>> +                                              unsigned stride,
>> +                                              unsigned layer_stride)
>> +{
>> +       struct r600_common_context *rctx = (struct r600_common_context*)ctx;
>> +       struct r600_texture *rsrc;
>> +       struct pipe_resource *src, templ = {};
>> +       struct pipe_box src_box = {};
>> +
>> +       templ.target = PIPE_TEXTURE_2D;
>> +       templ.format = dst->format;
>> +
>> +       templ.width0 = box->width;
>> +       templ.height0 = box->height;
>> +       templ.depth0 = 1;
>> +       templ.array_size = 1;
>> +
>> +       templ.usage = PIPE_USAGE_STAGING;
>> +       templ.bind = PIPE_BIND_TRANSFER_READ;
>> +
>> +       rsrc = r600_texture_from_ptr(ctx->screen, &templ, (void *)data, stride);
>> +       src = (struct pipe_resource *)rsrc;
>> +       if (!src) {
> As an optimization, you can test if the dst texture is referenced or
> busy and if yes, use u_default_transfer_inline_write. If there is
> non-trivial rendering going on, the buffer_wait call below will cost a
> lot.

Wouldn't u_default_transfer_inline_write block for the destination 
buffer to be idle as well? I would rather say that I need to only flush 
the context if there are draws queued up for the destination buffer.

And to reduce the cost of the wait for the DMA from source to 
destination submitting it using a separate context sounds like the best 
approach to me.

Christian.

>
> Marek
>
>> +               u_default_transfer_inline_write(ctx, dst, level, usage, box,
>> +                                               data, stride, layer_stride);
>> +               return;
>> +       }
>> +
>> +       src_box.width = box->width;
>> +       src_box.height = box->height;
>> +       src_box.depth = box->depth;
>> +       rctx->dma_copy(ctx, dst, level, box->x, box->y, box->z, src, 0, &src_box);
>> +
>> +       if (rctx->ws->cs_is_buffer_referenced(rctx->rings.gfx.cs, rsrc->resource.cs_buf,
>> +                                             RADEON_USAGE_READ))
>> +               rctx->rings.gfx.flush(ctx, 0, NULL);
>> +
>> +       if (rctx->rings.dma.cs &&
>> +           rctx->ws->cs_is_buffer_referenced(rctx->rings.dma.cs, rsrc->resource.cs_buf,
>> +                                            RADEON_USAGE_READ))
>> +               rctx->rings.dma.flush(ctx, 0, NULL);
>> +
>> +       rctx->ws->buffer_wait(rsrc->resource.buf, RADEON_USAGE_READWRITE);
>> +
>> +       pipe_resource_reference(&src, NULL);
>> +}
>> +
>>   static const struct u_resource_vtbl r600_texture_vtbl =
>>   {
>> -       NULL,                           /* get_handle */
>> -       r600_texture_destroy,           /* resource_destroy */
>> -       r600_texture_transfer_map,      /* transfer_map */
>> -       NULL,                           /* transfer_flush_region */
>> -       r600_texture_transfer_unmap,    /* transfer_unmap */
>> -       NULL                            /* transfer_inline_write */
>> +       NULL,                                   /* get_handle */
>> +       r600_texture_destroy,                   /* resource_destroy */
>> +       r600_texture_transfer_map,              /* transfer_map */
>> +       NULL,                                   /* transfer_flush_region */
>> +       r600_texture_transfer_unmap,            /* transfer_unmap */
>> +       r600_texture_transfer_inline_write      /* transfer_inline_write */
>>   };
>>
>>   struct pipe_surface *r600_create_surface_custom(struct pipe_context *pipe,
>> --
>> 1.9.1
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev



More information about the mesa-dev mailing list