[Beignet] [PATCH] Correct clCreateImage(context, CL_MEM_COPY_HOST_PTR, ...)
Zhigang Gong
zhigang.gong at linux.intel.com
Wed May 22 18:56:01 PDT 2013
LGTM, pushed with minor modification (remove the code in comments.). Thanks.
On Wed, May 22, 2013 at 08:36:14AM +0200, Dag Lem wrote:
> The current implementation of clCreateImage initializes images from
> host memory using one of the functions cl_mem_copy_data_linear,
> cl_mem_copy_data_tilex and cl_mem_copy_data_tiley.
>
> This yields garbled images on some platforms, since the tiled formats
> do not always correspond to the formats assumed by the functions
> above.
>
> This is fixed by replacing the functions above with the new function
> cl_mem_copy_image, whichs maps tiled images in GTT mode for copying.
>
> cl_mem_copy_image also implements missing 3D image copy functionality
> (image buffers should also be copied correctly, if/when they are
> allowed).
>
> Signed-off-by: Dag Lem <dag at nimrod.no>
> ---
> src/cl_mem.c | 138 +++++++++++++++++------------------------------------------
> 1 file changed, 39 insertions(+), 99 deletions(-)
>
> diff --git a/src/cl_mem.c b/src/cl_mem.c
> index 354fe34..e1d3299 100644
> --- a/src/cl_mem.c
> +++ b/src/cl_mem.c
> @@ -30,6 +30,7 @@
> #include "CL/cl_intel.h"
> #include <assert.h>
> #include <stdio.h>
> +#include <string.h>
>
> #define FIELD_SIZE(CASE,TYPE) \
> case JOIN(CL_,CASE): \
> @@ -200,27 +201,34 @@ error:
> }
>
> static void
> -cl_mem_copy_data_linear(cl_mem mem,
> - size_t w,
> - size_t h,
> - size_t pitch,
> - uint32_t bpp,
> - void *data)
> +cl_mem_copy_image(cl_mem image,
> + size_t row_pitch,
> + size_t slice_pitch,
> + void* host_ptr)
> {
> - size_t x, y, p;
> - char *dst;
> - cl_buffer_map(mem->bo, 1);
> - dst = cl_buffer_get_virtual(mem->bo);
> - for (y = 0; y < h; ++y) {
> - char *src = (char*) data + pitch * y;
> - for (x = 0; x < w; ++x) {
> - for (p = 0; p < bpp; ++p)
> - dst[p] = src[p];
> - dst += bpp;
> - src += bpp;
> + char* dst_ptr = cl_mem_map_auto(image);
> +
> + if (row_pitch == image->row_pitch &&
> + (image->depth == 1 || slice_pitch == image->slice_pitch))
> + {
> + memcpy(dst_ptr, host_ptr, image->depth == 1 ? row_pitch*image->h : slice_pitch*image->depth);
> + }
> + else {
> + size_t y, z;
> + for (z = 0; z < image->depth; z++) {
> + const char* src = host_ptr;
> + char* dst = dst_ptr;
> + for (y = 0; y < image->h; y++) {
> + memcpy(dst, src, image->bpp*image->w);
> + src += row_pitch;
> + dst += image->row_pitch;
> + }
> + host_ptr = (char*)host_ptr + slice_pitch;
> + dst_ptr = (char*)dst_ptr + image->slice_pitch;
> }
> }
> - cl_buffer_unmap(mem->bo);
> +
> + cl_mem_unmap_auto(image);
> }
>
> static const uint32_t tile_sz = 4096; /* 4KB per tile */
> @@ -229,77 +237,6 @@ static const uint32_t tilex_h = 8; /* tileX height in number of rows */
> static const uint32_t tiley_w = 128; /* tileY width in bytes */
> static const uint32_t tiley_h = 32; /* tileY height in number of rows */
>
> -static void
> -cl_mem_copy_data_tilex(cl_mem mem,
> - size_t w,
> - size_t h,
> - size_t pitch,
> - uint32_t bpp,
> - void *data)
> -{
> - const size_t tile_w = tilex_w;
> - const size_t tile_h = tilex_h;
> - const size_t aligned_pitch = ALIGN(w * bpp, tile_w);
> - const size_t aligned_height = ALIGN(h, tile_h);
> - const size_t tilex_n = aligned_pitch / tile_w;
> - const size_t tiley_n = aligned_height / tile_h;
> - size_t x, y, tilex, tiley;
> - char *img = NULL;
> - char *end = (char*) data + pitch * h;
> -
> - cl_buffer_map(mem->bo, 1);
> - img = cl_buffer_get_virtual(mem->bo);
> - for (tiley = 0; tiley < tiley_n; ++tiley)
> - for (tilex = 0; tilex < tilex_n; ++tilex) {
> - char *tile = img + (tilex + tiley * tilex_n) * tile_sz;
> - for (y = 0; y < tile_h; ++y) {
> - char *src = (char*) data + (tiley*tile_h+y)*pitch + tilex*tile_w;
> - char *dst = tile + y*tile_w;
> - for (x = 0; x < tile_w; ++x, ++dst, ++src) {
> - if ((uintptr_t) src < (uintptr_t) end)
> - *dst = *src;
> - }
> - }
> - }
> - cl_buffer_unmap(mem->bo);
> -}
> -
> -static void
> -cl_mem_copy_data_tiley(cl_mem mem,
> - size_t w,
> - size_t h,
> - size_t pitch,
> - uint32_t bpp,
> - void *data)
> -{
> - const size_t tile_w = tiley_w;
> - const size_t tile_h = tiley_h;
> - const size_t aligned_pitch = ALIGN(w * bpp, tile_w);
> - const size_t aligned_height = ALIGN(h, tile_h);
> - const size_t tilex_n = aligned_pitch / tile_w;
> - const size_t tiley_n = aligned_height / tile_h;
> - size_t x, y, tilex, tiley, byte;
> - char *img = NULL;
> - char *end = (char*) data + pitch * h;
> -
> - cl_buffer_map(mem->bo, 1);
> - img = cl_buffer_get_virtual(mem->bo);
> - for (tiley = 0; tiley < tiley_n; ++tiley)
> - for (tilex = 0; tilex < tilex_n; ++tilex) {
> - char *tile = img + (tiley * tilex_n + tilex) * tile_sz;
> - for (x = 0; x < tile_w; x += 16) {
> - char *src = (char*) data + tiley*tile_h*pitch + tilex*tile_w+x;
> - char *dst = tile + x*tile_h;
> - for (y = 0; y < tile_h; ++y, dst += 16, src += pitch) {
> - for (byte = 0; byte < 16; ++byte)
> - if ((uintptr_t) src + byte < (uintptr_t) end)
> - dst[byte] = src[byte];
> - }
> - }
> - }
> - cl_buffer_unmap(mem->bo);
> -}
> -
> static cl_mem
> _cl_mem_new_image(cl_context ctx,
> cl_mem_flags flags,
> @@ -398,16 +335,6 @@ _cl_mem_new_image(cl_context ctx,
> if (mem == NULL || err != CL_SUCCESS)
> goto error;
>
> - /* Copy the data if required */
> - if (flags & CL_MEM_COPY_HOST_PTR) {
> - if (tiling == CL_NO_TILE)
> - cl_mem_copy_data_linear(mem, w, h, pitch, bpp, data);
> - else if (tiling == CL_TILE_X)
> - cl_mem_copy_data_tilex(mem, w, h, pitch, bpp, data);
> - else if (tiling == CL_TILE_Y)
> - cl_mem_copy_data_tiley(mem, w, h, pitch, bpp, data);
> - }
> -
> mem->w = w;
> mem->h = h;
> mem->depth = depth;
> @@ -422,6 +349,19 @@ _cl_mem_new_image(cl_context ctx,
>
> cl_buffer_set_tiling(mem->bo, tiling, aligned_pitch);
>
> + /* Copy the data if required */
> + if (flags & CL_MEM_COPY_HOST_PTR) {
> + /*
> + if (tiling == CL_NO_TILE)
> + cl_mem_copy_data_linear(mem, w, h, pitch, bpp, data);
> + else if (tiling == CL_TILE_X)
> + cl_mem_copy_data_tilex(mem, w, h, pitch, bpp, data);
> + else if (tiling == CL_TILE_Y)
> + cl_mem_copy_data_tiley(mem, w, h, pitch, bpp, data);
> + */
> + cl_mem_copy_image(mem, pitch, slice_pitch, data);
> + }
> +
> exit:
> if (errcode_ret)
> *errcode_ret = err;
> --
> 1.8.1.4
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list