[Beignet] [PATCH] Correct clCreateImage(context, CL_MEM_COPY_HOST_PTR, ...)
Dag Lem
dag at nimrod.no
Tue May 21 23:36:14 PDT 2013
The current implementation of clCreateImage initializes images from
host memory using one of the functions cl_mem_copy_data_linear,
cl_mem_copy_data_tilex and cl_mem_copy_data_tiley.
This yields garbled images on some platforms, since the tiled formats
do not always correspond to the formats assumed by the functions
above.
This is fixed by replacing the functions above with the new function
cl_mem_copy_image, whichs maps tiled images in GTT mode for copying.
cl_mem_copy_image also implements missing 3D image copy functionality
(image buffers should also be copied correctly, if/when they are
allowed).
Signed-off-by: Dag Lem <dag at nimrod.no>
---
src/cl_mem.c | 138 +++++++++++++++++------------------------------------------
1 file changed, 39 insertions(+), 99 deletions(-)
diff --git a/src/cl_mem.c b/src/cl_mem.c
index 354fe34..e1d3299 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -30,6 +30,7 @@
#include "CL/cl_intel.h"
#include <assert.h>
#include <stdio.h>
+#include <string.h>
#define FIELD_SIZE(CASE,TYPE) \
case JOIN(CL_,CASE): \
@@ -200,27 +201,34 @@ error:
}
static void
-cl_mem_copy_data_linear(cl_mem mem,
- size_t w,
- size_t h,
- size_t pitch,
- uint32_t bpp,
- void *data)
+cl_mem_copy_image(cl_mem image,
+ size_t row_pitch,
+ size_t slice_pitch,
+ void* host_ptr)
{
- size_t x, y, p;
- char *dst;
- cl_buffer_map(mem->bo, 1);
- dst = cl_buffer_get_virtual(mem->bo);
- for (y = 0; y < h; ++y) {
- char *src = (char*) data + pitch * y;
- for (x = 0; x < w; ++x) {
- for (p = 0; p < bpp; ++p)
- dst[p] = src[p];
- dst += bpp;
- src += bpp;
+ char* dst_ptr = cl_mem_map_auto(image);
+
+ if (row_pitch == image->row_pitch &&
+ (image->depth == 1 || slice_pitch == image->slice_pitch))
+ {
+ memcpy(dst_ptr, host_ptr, image->depth == 1 ? row_pitch*image->h : slice_pitch*image->depth);
+ }
+ else {
+ size_t y, z;
+ for (z = 0; z < image->depth; z++) {
+ const char* src = host_ptr;
+ char* dst = dst_ptr;
+ for (y = 0; y < image->h; y++) {
+ memcpy(dst, src, image->bpp*image->w);
+ src += row_pitch;
+ dst += image->row_pitch;
+ }
+ host_ptr = (char*)host_ptr + slice_pitch;
+ dst_ptr = (char*)dst_ptr + image->slice_pitch;
}
}
- cl_buffer_unmap(mem->bo);
+
+ cl_mem_unmap_auto(image);
}
static const uint32_t tile_sz = 4096; /* 4KB per tile */
@@ -229,77 +237,6 @@ static const uint32_t tilex_h = 8; /* tileX height in number of rows */
static const uint32_t tiley_w = 128; /* tileY width in bytes */
static const uint32_t tiley_h = 32; /* tileY height in number of rows */
-static void
-cl_mem_copy_data_tilex(cl_mem mem,
- size_t w,
- size_t h,
- size_t pitch,
- uint32_t bpp,
- void *data)
-{
- const size_t tile_w = tilex_w;
- const size_t tile_h = tilex_h;
- const size_t aligned_pitch = ALIGN(w * bpp, tile_w);
- const size_t aligned_height = ALIGN(h, tile_h);
- const size_t tilex_n = aligned_pitch / tile_w;
- const size_t tiley_n = aligned_height / tile_h;
- size_t x, y, tilex, tiley;
- char *img = NULL;
- char *end = (char*) data + pitch * h;
-
- cl_buffer_map(mem->bo, 1);
- img = cl_buffer_get_virtual(mem->bo);
- for (tiley = 0; tiley < tiley_n; ++tiley)
- for (tilex = 0; tilex < tilex_n; ++tilex) {
- char *tile = img + (tilex + tiley * tilex_n) * tile_sz;
- for (y = 0; y < tile_h; ++y) {
- char *src = (char*) data + (tiley*tile_h+y)*pitch + tilex*tile_w;
- char *dst = tile + y*tile_w;
- for (x = 0; x < tile_w; ++x, ++dst, ++src) {
- if ((uintptr_t) src < (uintptr_t) end)
- *dst = *src;
- }
- }
- }
- cl_buffer_unmap(mem->bo);
-}
-
-static void
-cl_mem_copy_data_tiley(cl_mem mem,
- size_t w,
- size_t h,
- size_t pitch,
- uint32_t bpp,
- void *data)
-{
- const size_t tile_w = tiley_w;
- const size_t tile_h = tiley_h;
- const size_t aligned_pitch = ALIGN(w * bpp, tile_w);
- const size_t aligned_height = ALIGN(h, tile_h);
- const size_t tilex_n = aligned_pitch / tile_w;
- const size_t tiley_n = aligned_height / tile_h;
- size_t x, y, tilex, tiley, byte;
- char *img = NULL;
- char *end = (char*) data + pitch * h;
-
- cl_buffer_map(mem->bo, 1);
- img = cl_buffer_get_virtual(mem->bo);
- for (tiley = 0; tiley < tiley_n; ++tiley)
- for (tilex = 0; tilex < tilex_n; ++tilex) {
- char *tile = img + (tiley * tilex_n + tilex) * tile_sz;
- for (x = 0; x < tile_w; x += 16) {
- char *src = (char*) data + tiley*tile_h*pitch + tilex*tile_w+x;
- char *dst = tile + x*tile_h;
- for (y = 0; y < tile_h; ++y, dst += 16, src += pitch) {
- for (byte = 0; byte < 16; ++byte)
- if ((uintptr_t) src + byte < (uintptr_t) end)
- dst[byte] = src[byte];
- }
- }
- }
- cl_buffer_unmap(mem->bo);
-}
-
static cl_mem
_cl_mem_new_image(cl_context ctx,
cl_mem_flags flags,
@@ -398,16 +335,6 @@ _cl_mem_new_image(cl_context ctx,
if (mem == NULL || err != CL_SUCCESS)
goto error;
- /* Copy the data if required */
- if (flags & CL_MEM_COPY_HOST_PTR) {
- if (tiling == CL_NO_TILE)
- cl_mem_copy_data_linear(mem, w, h, pitch, bpp, data);
- else if (tiling == CL_TILE_X)
- cl_mem_copy_data_tilex(mem, w, h, pitch, bpp, data);
- else if (tiling == CL_TILE_Y)
- cl_mem_copy_data_tiley(mem, w, h, pitch, bpp, data);
- }
-
mem->w = w;
mem->h = h;
mem->depth = depth;
@@ -422,6 +349,19 @@ _cl_mem_new_image(cl_context ctx,
cl_buffer_set_tiling(mem->bo, tiling, aligned_pitch);
+ /* Copy the data if required */
+ if (flags & CL_MEM_COPY_HOST_PTR) {
+ /*
+ if (tiling == CL_NO_TILE)
+ cl_mem_copy_data_linear(mem, w, h, pitch, bpp, data);
+ else if (tiling == CL_TILE_X)
+ cl_mem_copy_data_tilex(mem, w, h, pitch, bpp, data);
+ else if (tiling == CL_TILE_Y)
+ cl_mem_copy_data_tiley(mem, w, h, pitch, bpp, data);
+ */
+ cl_mem_copy_image(mem, pitch, slice_pitch, data);
+ }
+
exit:
if (errcode_ret)
*errcode_ret = err;
--
1.8.1.4
More information about the Beignet
mailing list