[igt-dev] [PATCH i-g-t 1/4] lib: Move common gpgpu/media functions to gpu_fill library
Daniele Ceraolo Spurio
daniele.ceraolospurio at intel.com
Thu Apr 5 18:48:46 UTC 2018
On 05/04/18 11:25, Daniele Ceraolo Spurio wrote:
>
>
> On 05/04/18 06:53, Katarzyna Dec wrote:
>> Gpgpu_fill and media_fill libraries are very similar and many
>> functions can be shared. I have created library gpu_fill with
>> all functions needed for implementing gpgpu_fill and media_fill
>> tests for all Gens. Duplicates, e.g. where only name was changed,
>> were removed. The earliest common function remained.
>>
>> v2: Changed code layout. GenX_fill_media_kernel was identical to
>> genX_fill_gpgpu_kernel so this function was unified to
>> gen7_fill_kernel. There were 2 very similar functions
>> gen8_emit_state_base_address for media and gpgpu, where the one
>> for gpgpu was configured like it would be using indirect state
>> (while we are using CURBE). I have checked if media fill version
>> works fine in gpgpu test on Gen8 and unified them.
>>
>> Signed-off-by: Katarzyna Dec <katarzyna.dec at intel.com>
>> Cc: Lukasz Kalamarz <lukasz.kalamarz at intel.com>
>> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
>> ---
>> lib/Makefile.sources | 2 +
>> lib/gpgpu_fill.c | 567 +------------------------------------
>> lib/gpu_fill.c | 729
>> ++++++++++++++++++++++++++++++++++++++++++++++++
>> lib/gpu_fill.h | 141 ++++++++++
>> lib/media_fill_gen7.c | 271 +-----------------
>> lib/media_fill_gen8.c | 297 +-------------------
>> lib/media_fill_gen8lp.c | 289 +------------------
>> lib/media_fill_gen9.c | 304 +-------------------
>> lib/meson.build | 1 +
>> 9 files changed, 890 insertions(+), 1711 deletions(-)
>> create mode 100644 lib/gpu_fill.c
>> create mode 100644 lib/gpu_fill.h
>>
>> diff --git a/lib/Makefile.sources b/lib/Makefile.sources
>> index 3d37ef1d..690a1d35 100644
>> --- a/lib/Makefile.sources
>> +++ b/lib/Makefile.sources
>> @@ -55,6 +55,8 @@ lib_source_list = \
>> intel_reg.h \
>> ioctl_wrappers.c \
>> ioctl_wrappers.h \
>> + gpu_fill.h \
>> + gpu_fill.c \
>> media_fill.h \
>> media_fill_gen7.c \
>> media_fill_gen8.c \
>> diff --git a/lib/gpgpu_fill.c b/lib/gpgpu_fill.c
>> index 4d98643d..68cbac5e 100644
>> --- a/lib/gpgpu_fill.c
>> +++ b/lib/gpgpu_fill.c
>> @@ -34,6 +34,7 @@
>> #include "gen7_media.h"
>> #include "gen8_media.h"
>> #include "gpgpu_fill.h"
>> +#include "gpu_fill.h"
>> /* shaders/gpgpu/gpgpu_fill.gxa */
>> static const uint32_t gen7_gpgpu_kernel[][4] = {
>> @@ -75,572 +76,6 @@ static const uint32_t gen9_gpgpu_kernel[][4] = {
>> { 0x07800031, 0x20000a40, 0x06000e00, 0x82000010 },
>> };
>> -static uint32_t
>> -batch_used(struct intel_batchbuffer *batch)
>> -{
>> - return batch->ptr - batch->buffer;
>> -}
>> -
>
> Hi,
>
> As I mentioned on the previous review I think that the batch_* functions
> shouldn't go to the gpu_fill.c file and should instead go with the other
> intel_batchbuffer methods. I don't think that moving it to gpu_fill.c as
> an interim solution works because we'll have to do another patch to move
> everything to intel_batchbuffer.c, including the other copies of the
> functions from the rendercopy files. It makes more sense IMO to move the
> code directly to its final destination.
> If Lukasz is going to look at that then please sync with him so that his
> patch goes before this one (either as part of this series or standalone).
>
> Thanks,
> Daniele
>
I've realized I've been a bit unclear here, considering that you've
already said in the cover letter that your plan was to move the batch_*
functions as a second step. What I wanted to express above is that I
think moving the batch_* as first step is cleaner and results in overall
less work (only moving things once). This isn't a blocking suggestion.
Daniele
>> -static uint32_t
>> -batch_align(struct intel_batchbuffer *batch, uint32_t align)
>> -{
>> - uint32_t offset = batch_used(batch);
>> - offset = ALIGN(offset, align);
>> - batch->ptr = batch->buffer + offset;
>> - return offset;
>> -}
>> -
>> -static void *
>> -batch_alloc(struct intel_batchbuffer *batch, uint32_t size, uint32_t
>> align)
>> -{
>> - uint32_t offset = batch_align(batch, align);
>> - batch->ptr += size;
>> - return memset(batch->buffer + offset, 0, size);
>> -}
>> -
>> -static uint32_t
>> -batch_offset(struct intel_batchbuffer *batch, void *ptr)
>> -{
>> - return (uint8_t *)ptr - batch->buffer;
>> -}
>> -
>> -static uint32_t
>> -batch_copy(struct intel_batchbuffer *batch, const void *ptr, uint32_t
>> size,
>> - uint32_t align)
>> -{
>> - return batch_offset(batch, memcpy(batch_alloc(batch, size,
>> align), ptr, size));
>> -}
>> -
>> -static void
>> -gen7_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end)
>> -{
>> - int ret;
>> -
>> - ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer);
>> - if (ret == 0)
>> - ret = drm_intel_bo_mrb_exec(batch->bo, batch_end,
>> - NULL, 0, 0, 0);
>> - igt_assert(ret == 0);
>> -}
>> -
>> -static uint32_t
>> -gen7_fill_curbe_buffer_data(struct intel_batchbuffer *batch, uint8_t
>> color)
>> -{
>> - uint8_t *curbe_buffer;
>> - uint32_t offset;
>> -
>> - curbe_buffer = batch_alloc(batch, sizeof(uint32_t) * 8, 64);
>> - offset = batch_offset(batch, curbe_buffer);
>> - *curbe_buffer = color;
>> -
>> - return offset;
>> -}
>> -
>> -static uint32_t
>> -gen7_fill_surface_state(struct intel_batchbuffer *batch,
>> - struct igt_buf *buf,
>> - uint32_t format,
>> - int is_dst)
>> -{
>> - struct gen7_surface_state *ss;
>> - uint32_t write_domain, read_domain, offset;
>> - int ret;
>> -
>> - if (is_dst) {
>> - write_domain = read_domain = I915_GEM_DOMAIN_RENDER;
>> - } else {
>> - write_domain = 0;
>> - read_domain = I915_GEM_DOMAIN_SAMPLER;
>> - }
>> -
>> - ss = batch_alloc(batch, sizeof(*ss), 64);
>> - offset = batch_offset(batch, ss);
>> -
>> - ss->ss0.surface_type = GEN7_SURFACE_2D;
>> - ss->ss0.surface_format = format;
>> - ss->ss0.render_cache_read_write = 1;
>> -
>> - if (buf->tiling == I915_TILING_X)
>> - ss->ss0.tiled_mode = 2;
>> - else if (buf->tiling == I915_TILING_Y)
>> - ss->ss0.tiled_mode = 3;
>> -
>> - ss->ss1.base_addr = buf->bo->offset;
>> - ret = drm_intel_bo_emit_reloc(batch->bo,
>> - batch_offset(batch, ss) + 4,
>> - buf->bo, 0,
>> - read_domain, write_domain);
>> - igt_assert(ret == 0);
>> -
>> - ss->ss2.height = igt_buf_height(buf) - 1;
>> - ss->ss2.width = igt_buf_width(buf) - 1;
>> -
>> - ss->ss3.pitch = buf->stride - 1;
>> -
>> - ss->ss7.shader_chanel_select_r = 4;
>> - ss->ss7.shader_chanel_select_g = 5;
>> - ss->ss7.shader_chanel_select_b = 6;
>> - ss->ss7.shader_chanel_select_a = 7;
>> -
>> - return offset;
>> -}
>> -
>> -static uint32_t
>> -gen8_fill_surface_state(struct intel_batchbuffer *batch,
>> - struct igt_buf *buf,
>> - uint32_t format,
>> - int is_dst)
>> -{
>> - struct gen8_surface_state *ss;
>> - uint32_t write_domain, read_domain, offset;
>> - int ret;
>> -
>> - if (is_dst) {
>> - write_domain = read_domain = I915_GEM_DOMAIN_RENDER;
>> - } else {
>> - write_domain = 0;
>> - read_domain = I915_GEM_DOMAIN_SAMPLER;
>> - }
>> -
>> - ss = batch_alloc(batch, sizeof(*ss), 64);
>> - offset = batch_offset(batch, ss);
>> -
>> - ss->ss0.surface_type = GEN8_SURFACE_2D;
>> - ss->ss0.surface_format = format;
>> - ss->ss0.render_cache_read_write = 1;
>> - ss->ss0.vertical_alignment = 1; /* align 4 */
>> - ss->ss0.horizontal_alignment = 1; /* align 4 */
>> -
>> - if (buf->tiling == I915_TILING_X)
>> - ss->ss0.tiled_mode = 2;
>> - else if (buf->tiling == I915_TILING_Y)
>> - ss->ss0.tiled_mode = 3;
>> -
>> - ss->ss8.base_addr = buf->bo->offset;
>> -
>> - ret = drm_intel_bo_emit_reloc(batch->bo,
>> - batch_offset(batch, ss) + 8 * 4,
>> - buf->bo, 0,
>> - read_domain, write_domain);
>> - igt_assert_eq(ret, 0);
>> -
>> - ss->ss2.height = igt_buf_height(buf) - 1;
>> - ss->ss2.width = igt_buf_width(buf) - 1;
>> - ss->ss3.pitch = buf->stride - 1;
>> -
>> - ss->ss7.shader_chanel_select_r = 4;
>> - ss->ss7.shader_chanel_select_g = 5;
>> - ss->ss7.shader_chanel_select_b = 6;
>> - ss->ss7.shader_chanel_select_a = 7;
>> -
>> - return offset;
>> -
>> -}
>> -
>> -static uint32_t
>> -gen7_fill_binding_table(struct intel_batchbuffer *batch,
>> - struct igt_buf *dst)
>> -{
>> - uint32_t *binding_table, offset;
>> -
>> - binding_table = batch_alloc(batch, 32, 64);
>> - offset = batch_offset(batch, binding_table);
>> -
>> - binding_table[0] = gen7_fill_surface_state(batch, dst,
>> GEN7_SURFACEFORMAT_R8_UNORM, 1);
>> -
>> - return offset;
>> -}
>> -
>> -static uint32_t
>> -gen8_fill_binding_table(struct intel_batchbuffer *batch,
>> - struct igt_buf *dst)
>> -{
>> - uint32_t *binding_table, offset;
>> -
>> - binding_table = batch_alloc(batch, 32, 64);
>> - offset = batch_offset(batch, binding_table);
>> -
>> - binding_table[0] = gen8_fill_surface_state(batch, dst,
>> GEN8_SURFACEFORMAT_R8_UNORM, 1);
>> -
>> - return offset;
>> -}
>> -
>> -static uint32_t
>> -gen7_fill_gpgpu_kernel(struct intel_batchbuffer *batch,
>> - const uint32_t kernel[][4],
>> - size_t size)
>> -{
>> - uint32_t offset;
>> -
>> - offset = batch_copy(batch, kernel, size, 64);
>> -
>> - return offset;
>> -}
>> -
>> -static uint32_t
>> -gen7_fill_interface_descriptor(struct intel_batchbuffer *batch,
>> struct igt_buf *dst,
>> - const uint32_t kernel[][4], size_t size)
>> -{
>> - struct gen7_interface_descriptor_data *idd;
>> - uint32_t offset;
>> - uint32_t binding_table_offset, kernel_offset;
>> -
>> - binding_table_offset = gen7_fill_binding_table(batch, dst);
>> - kernel_offset = gen7_fill_gpgpu_kernel(batch, kernel, size);
>> -
>> - idd = batch_alloc(batch, sizeof(*idd), 64);
>> - offset = batch_offset(batch, idd);
>> -
>> - idd->desc0.kernel_start_pointer = (kernel_offset >> 6);
>> -
>> - idd->desc1.single_program_flow = 1;
>> - idd->desc1.floating_point_mode = GEN7_FLOATING_POINT_IEEE_754;
>> -
>> - idd->desc2.sampler_count = 0; /* 0 samplers used */
>> - idd->desc2.sampler_state_pointer = 0;
>> -
>> - idd->desc3.binding_table_entry_count = 0;
>> - idd->desc3.binding_table_pointer = (binding_table_offset >> 5);
>> -
>> - idd->desc4.constant_urb_entry_read_offset = 0;
>> - idd->desc4.constant_urb_entry_read_length = 1; /* grf 1 */
>> -
>> - return offset;
>> -}
>> -
>> -static uint32_t
>> -gen8_fill_interface_descriptor(struct intel_batchbuffer *batch,
>> struct igt_buf *dst,
>> - const uint32_t kernel[][4], size_t size)
>> -{
>> - struct gen8_interface_descriptor_data *idd;
>> - uint32_t offset;
>> - uint32_t binding_table_offset, kernel_offset;
>> -
>> - binding_table_offset = gen8_fill_binding_table(batch, dst);
>> - kernel_offset = gen7_fill_gpgpu_kernel(batch, kernel, size);
>> -
>> - idd = batch_alloc(batch, sizeof(*idd), 64);
>> - offset = batch_offset(batch, idd);
>> -
>> - idd->desc0.kernel_start_pointer = (kernel_offset >> 6);
>> -
>> - idd->desc2.single_program_flow = 1;
>> - idd->desc2.floating_point_mode = GEN8_FLOATING_POINT_IEEE_754;
>> -
>> - idd->desc3.sampler_count = 0; /* 0 samplers used */
>> - idd->desc3.sampler_state_pointer = 0;
>> -
>> - idd->desc4.binding_table_entry_count = 0;
>> - idd->desc4.binding_table_pointer = (binding_table_offset >> 5);
>> -
>> - idd->desc5.constant_urb_entry_read_offset = 0;
>> - idd->desc5.constant_urb_entry_read_length = 1; /* grf 1 */
>> -
>> - return offset;
>> -}
>> -
>> -static void
>> -gen7_emit_state_base_address(struct intel_batchbuffer *batch)
>> -{
>> - OUT_BATCH(GEN7_STATE_BASE_ADDRESS | (10 - 2));
>> -
>> - /* general */
>> - OUT_BATCH(0);
>> -
>> - /* surface */
>> - OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
>> BASE_ADDRESS_MODIFY);
>> -
>> - /* dynamic */
>> - OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
>> BASE_ADDRESS_MODIFY);
>> -
>> - /* indirect */
>> - OUT_BATCH(0);
>> -
>> - /* instruction */
>> - OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
>> BASE_ADDRESS_MODIFY);
>> -
>> - /* general/dynamic/indirect/instruction access Bound */
>> - OUT_BATCH(0);
>> - OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
>> - OUT_BATCH(0);
>> - OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
>> -}
>> -
>> -static void
>> -gen8_emit_state_base_address(struct intel_batchbuffer *batch)
>> -{
>> - OUT_BATCH(GEN8_STATE_BASE_ADDRESS | (16 - 2));
>> -
>> - /* general */
>> - OUT_BATCH(0 | (0x78 << 4) | (0 << 1) | BASE_ADDRESS_MODIFY);
>> - OUT_BATCH(0);
>> -
>> - /* stateless data port */
>> - OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
>> -
>> - /* surface */
>> - OUT_RELOC(batch->bo, I915_GEM_DOMAIN_SAMPLER, 0,
>> BASE_ADDRESS_MODIFY);
>> -
>> - /* dynamic */
>> - OUT_RELOC(batch->bo, I915_GEM_DOMAIN_RENDER |
>> I915_GEM_DOMAIN_INSTRUCTION,
>> - 0, BASE_ADDRESS_MODIFY);
>> -
>> - /* indirect */
>> - OUT_BATCH(0);
>> - OUT_BATCH(0 );
>> -
>> - /* instruction */
>> - OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
>> BASE_ADDRESS_MODIFY);
>> -
>> - /* general state buffer size */
>> - OUT_BATCH(0xfffff000 | 1);
>> - /* dynamic state buffer size */
>> - OUT_BATCH(1 << 12 | 1);
>> - /* indirect object buffer size */
>> - OUT_BATCH(0xfffff000 | 1);
>> - /* intruction buffer size, must set modify enable bit, otherwise
>> it may result in GPU hang */
>> - OUT_BATCH(1 << 12 | 1);
>> -}
>> -
>> -static void
>> -gen9_emit_state_base_address(struct intel_batchbuffer *batch)
>> -{
>> - OUT_BATCH(GEN8_STATE_BASE_ADDRESS | (19 - 2));
>> -
>> - /* general */
>> - OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
>> - OUT_BATCH(0);
>> -
>> - /* stateless data port */
>> - OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
>> -
>> - /* surface */
>> - OUT_RELOC(batch->bo, I915_GEM_DOMAIN_SAMPLER, 0,
>> BASE_ADDRESS_MODIFY);
>> -
>> - /* dynamic */
>> - OUT_RELOC(batch->bo, I915_GEM_DOMAIN_RENDER |
>> I915_GEM_DOMAIN_INSTRUCTION,
>> - 0, BASE_ADDRESS_MODIFY);
>> -
>> - /* indirect */
>> - OUT_BATCH(0);
>> - OUT_BATCH(0);
>> -
>> - /* instruction */
>> - OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
>> BASE_ADDRESS_MODIFY);
>> -
>> - /* general state buffer size */
>> - OUT_BATCH(0xfffff000 | 1);
>> - /* dynamic state buffer size */
>> - OUT_BATCH(1 << 12 | 1);
>> - /* indirect object buffer size */
>> - OUT_BATCH(0xfffff000 | 1);
>> - /* intruction buffer size, must set modify enable bit, otherwise
>> it may result in GPU hang */
>> - OUT_BATCH(1 << 12 | 1);
>> -
>> - /* Bindless surface state base address */
>> - OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
>> - OUT_BATCH(0);
>> - OUT_BATCH(0xfffff000);
>> -}
>> -
>> -static void
>> -gen7_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch)
>> -{
>> - OUT_BATCH(GEN7_MEDIA_VFE_STATE | (8 - 2));
>> -
>> - /* scratch buffer */
>> - OUT_BATCH(0);
>> -
>> - /* number of threads & urb entries */
>> - OUT_BATCH(1 << 16 | /* max num of threads */
>> - 0 << 8 | /* num of URB entry */
>> - 1 << 2); /* GPGPU mode */
>> -
>> - OUT_BATCH(0);
>> -
>> - /* urb entry size & curbe size */
>> - OUT_BATCH(0 << 16 | /* URB entry size in 256 bits unit */
>> - 1); /* CURBE entry size in 256 bits unit */
>> -
>> - /* scoreboard */
>> - OUT_BATCH(0);
>> - OUT_BATCH(0);
>> - OUT_BATCH(0);
>> -}
>> -
>> -static void
>> -gen8_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch)
>> -{
>> - OUT_BATCH(GEN8_MEDIA_VFE_STATE | (9 - 2));
>> -
>> - /* scratch buffer */
>> - OUT_BATCH(0);
>> - OUT_BATCH(0);
>> -
>> - /* number of threads & urb entries */
>> - OUT_BATCH(1 << 16 | 1 << 8);
>> -
>> - OUT_BATCH(0);
>> -
>> - /* urb entry size & curbe size */
>> - OUT_BATCH(0 << 16 | 1);
>> -
>> - /* scoreboard */
>> - OUT_BATCH(0);
>> - OUT_BATCH(0);
>> - OUT_BATCH(0);
>> -}
>> -
>> -static void
>> -gen7_emit_curbe_load(struct intel_batchbuffer *batch, uint32_t
>> curbe_buffer)
>> -{
>> - OUT_BATCH(GEN7_MEDIA_CURBE_LOAD | (4 - 2));
>> - OUT_BATCH(0);
>> - /* curbe total data length */
>> - OUT_BATCH(64);
>> - /* curbe data start address, is relative to the dynamics base
>> address */
>> - OUT_BATCH(curbe_buffer);
>> -}
>> -
>> -static void
>> -gen7_emit_interface_descriptor_load(struct intel_batchbuffer *batch,
>> uint32_t interface_descriptor)
>> -{
>> - OUT_BATCH(GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
>> - OUT_BATCH(0);
>> - /* interface descriptor data length */
>> - OUT_BATCH(sizeof(struct gen7_interface_descriptor_data));
>> - /* interface descriptor address, is relative to the dynamics base
>> address */
>> - OUT_BATCH(interface_descriptor);
>> -}
>> -
>> -static void
>> -gen8_emit_interface_descriptor_load(struct intel_batchbuffer *batch,
>> uint32_t interface_descriptor)
>> -{
>> - OUT_BATCH(GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
>> - OUT_BATCH(0);
>> - /* interface descriptor data length */
>> - OUT_BATCH(sizeof(struct gen8_interface_descriptor_data));
>> - /* interface descriptor address, is relative to the dynamics base
>> address */
>> - OUT_BATCH(interface_descriptor);
>> -}
>> -
>> -static void
>> -gen7_emit_gpgpu_walk(struct intel_batchbuffer *batch,
>> - unsigned x, unsigned y,
>> - unsigned width, unsigned height)
>> -{
>> - uint32_t x_dim, y_dim, tmp, right_mask;
>> -
>> - /*
>> - * Simply do SIMD16 based dispatch, so every thread uses
>> - * SIMD16 channels.
>> - *
>> - * Define our own thread group size, e.g 16x1 for every group, then
>> - * will have 1 thread each group in SIMD16 dispatch. So thread
>> - * width/height/depth are all 1.
>> - *
>> - * Then thread group X = width / 16 (aligned to 16)
>> - * thread group Y = height;
>> - */
>> - x_dim = (width + 15) / 16;
>> - y_dim = height;
>> -
>> - tmp = width & 15;
>> - if (tmp == 0)
>> - right_mask = (1 << 16) - 1;
>> - else
>> - right_mask = (1 << tmp) - 1;
>> -
>> - OUT_BATCH(GEN7_GPGPU_WALKER | 9);
>> -
>> - /* interface descriptor offset */
>> - OUT_BATCH(0);
>> -
>> - /* SIMD size, thread w/h/d */
>> - OUT_BATCH(1 << 30 | /* SIMD16 */
>> - 0 << 16 | /* depth:1 */
>> - 0 << 8 | /* height:1 */
>> - 0); /* width:1 */
>> -
>> - /* thread group X */
>> - OUT_BATCH(0);
>> - OUT_BATCH(x_dim);
>> -
>> - /* thread group Y */
>> - OUT_BATCH(0);
>> - OUT_BATCH(y_dim);
>> -
>> - /* thread group Z */
>> - OUT_BATCH(0);
>> - OUT_BATCH(1);
>> -
>> - /* right mask */
>> - OUT_BATCH(right_mask);
>> -
>> - /* bottom mask, height 1, always 0xffffffff */
>> - OUT_BATCH(0xffffffff);
>> -}
>> -
>> -static void
>> -gen8_emit_gpgpu_walk(struct intel_batchbuffer *batch,
>> - unsigned x, unsigned y,
>> - unsigned width, unsigned height)
>> -{
>> - uint32_t x_dim, y_dim, tmp, right_mask;
>> -
>> - /*
>> - * Simply do SIMD16 based dispatch, so every thread uses
>> - * SIMD16 channels.
>> - *
>> - * Define our own thread group size, e.g 16x1 for every group, then
>> - * will have 1 thread each group in SIMD16 dispatch. So thread
>> - * width/height/depth are all 1.
>> - *
>> - * Then thread group X = width / 16 (aligned to 16)
>> - * thread group Y = height;
>> - */
>> - x_dim = (width + 15) / 16;
>> - y_dim = height;
>> -
>> - tmp = width & 15;
>> - if (tmp == 0)
>> - right_mask = (1 << 16) - 1;
>> - else
>> - right_mask = (1 << tmp) - 1;
>> -
>> - OUT_BATCH(GEN7_GPGPU_WALKER | 13);
>> -
>> - OUT_BATCH(0); /* kernel offset */
>> - OUT_BATCH(0); /* indirect data length */
>> - OUT_BATCH(0); /* indirect data offset */
>> -
>> - /* SIMD size, thread w/h/d */
>> - OUT_BATCH(1 << 30 | /* SIMD16 */
>> - 0 << 16 | /* depth:1 */
>> - 0 << 8 | /* height:1 */
>> - 0); /* width:1 */
>> -
>> - /* thread group X */
>> - OUT_BATCH(0);
>> - OUT_BATCH(0);
>> - OUT_BATCH(x_dim);
>> -
>> - /* thread group Y */
>> - OUT_BATCH(0);
>> - OUT_BATCH(0);
>> - OUT_BATCH(y_dim);
>> -
>> - /* thread group Z */
>> - OUT_BATCH(0);
>> - OUT_BATCH(1);
>> -
>> - /* right mask */
>> - OUT_BATCH(right_mask);
>> -
>> - /* bottom mask, height 1, always 0xffffffff */
>> - OUT_BATCH(0xffffffff);
>> -}
>> -
>> /*
>> * This sets up the gpgpu pipeline,
>> *
>> diff --git a/lib/gpu_fill.c b/lib/gpu_fill.c
>> new file mode 100644
>> index 00000000..b6da1cdc
>> --- /dev/null
>> +++ b/lib/gpu_fill.c
>> @@ -0,0 +1,729 @@
>> +#include <intel_bufmgr.h>
>> +#include <i915_drm.h>
>> +
>> +#include "intel_reg.h"
>> +#include "drmtest.h"
>> +#include "gpu_fill.h"
>> +#include <assert.h>
>> +#include "gen7_media.h"
>> +#include "gen8_media.h"
>> +
>> +
>> +uint32_t
>> +batch_used(struct intel_batchbuffer *batch)
>> +{
>> + return batch->ptr - batch->buffer;
>> +}
>> +
>> +uint32_t
>> +batch_align(struct intel_batchbuffer *batch, uint32_t align)
>> +{
>> + uint32_t offset = batch_used(batch);
>> + offset = ALIGN(offset, align);
>> + batch->ptr = batch->buffer + offset;
>> + return offset;
>> +}
>> +
>> +void *
>> +batch_alloc(struct intel_batchbuffer *batch, uint32_t size, uint32_t
>> align)
>> +{
>> + uint32_t offset = batch_align(batch, align);
>> + batch->ptr += size;
>> + return memset(batch->buffer + offset, 0, size);
>> +}
>> +
>> +uint32_t
>> +batch_offset(struct intel_batchbuffer *batch, void *ptr)
>> +{
>> + return (uint8_t *)ptr - batch->buffer;
>> +}
>> +
>> +uint32_t
>> +batch_copy(struct intel_batchbuffer *batch, const void *ptr, uint32_t
>> size, uint32_t align)
>> +{
>> + return batch_offset(batch, memcpy(batch_alloc(batch, size,
>> align), ptr, size));
>> +}
>> +
>> +void
>> +gen7_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end)
>> +{
>> + int ret;
>> +
>> + ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer);
>> + if (ret == 0)
>> + ret = drm_intel_bo_mrb_exec(batch->bo, batch_end,
>> + NULL, 0, 0, 0);
>> + igt_assert(ret == 0);
>> +}
>> +
>> +uint32_t
>> +gen7_fill_curbe_buffer_data(struct intel_batchbuffer *batch,
>> + uint8_t color)
>> +{
>> + uint8_t *curbe_buffer;
>> + uint32_t offset;
>> +
>> + curbe_buffer = batch_alloc(batch, sizeof(uint32_t) * 8, 64);
>> + offset = batch_offset(batch, curbe_buffer);
>> + *curbe_buffer = color;
>> +
>> + return offset;
>> +}
>> +
>> +uint32_t
>> +gen7_fill_surface_state(struct intel_batchbuffer *batch,
>> + struct igt_buf *buf,
>> + uint32_t format,
>> + int is_dst)
>> +{
>> + struct gen7_surface_state *ss;
>> + uint32_t write_domain, read_domain, offset;
>> + int ret;
>> +
>> + if (is_dst) {
>> + write_domain = read_domain = I915_GEM_DOMAIN_RENDER;
>> + } else {
>> + write_domain = 0;
>> + read_domain = I915_GEM_DOMAIN_SAMPLER;
>> + }
>> +
>> + ss = batch_alloc(batch, sizeof(*ss), 64);
>> + offset = batch_offset(batch, ss);
>> +
>> + ss->ss0.surface_type = GEN7_SURFACE_2D;
>> + ss->ss0.surface_format = format;
>> + ss->ss0.render_cache_read_write = 1;
>> +
>> + if (buf->tiling == I915_TILING_X)
>> + ss->ss0.tiled_mode = 2;
>> + else if (buf->tiling == I915_TILING_Y)
>> + ss->ss0.tiled_mode = 3;
>> +
>> + ss->ss1.base_addr = buf->bo->offset;
>> + ret = drm_intel_bo_emit_reloc(batch->bo,
>> + batch_offset(batch, ss) + 4,
>> + buf->bo, 0,
>> + read_domain, write_domain);
>> + igt_assert(ret == 0);
>> +
>> + ss->ss2.height = igt_buf_height(buf) - 1;
>> + ss->ss2.width = igt_buf_width(buf) - 1;
>> +
>> + ss->ss3.pitch = buf->stride - 1;
>> +
>> + ss->ss7.shader_chanel_select_r = 4;
>> + ss->ss7.shader_chanel_select_g = 5;
>> + ss->ss7.shader_chanel_select_b = 6;
>> + ss->ss7.shader_chanel_select_a = 7;
>> +
>> + return offset;
>> +}
>> +
>> +uint32_t
>> +gen7_fill_binding_table(struct intel_batchbuffer *batch,
>> + struct igt_buf *dst)
>> +{
>> + uint32_t *binding_table, offset;
>> +
>> + binding_table = batch_alloc(batch, 32, 64);
>> + offset = batch_offset(batch, binding_table);
>> +
>> + binding_table[0] = gen7_fill_surface_state(batch, dst,
>> GEN7_SURFACEFORMAT_R8_UNORM, 1);
>> +
>> + return offset;
>> +}
>> +
>> +uint32_t
>> +gen7_fill_kernel(struct intel_batchbuffer *batch,
>> + const uint32_t kernel[][4],
>> + size_t size)
>> +{
>> + uint32_t offset;
>> +
>> + offset = batch_copy(batch, kernel, size, 64);
>> +
>> + return offset;
>> +}
>> +
>> +uint32_t
>> +gen7_fill_interface_descriptor(struct intel_batchbuffer *batch,
>> struct igt_buf *dst,
>> + const uint32_t kernel[][4], size_t size)
>> +{
>> + struct gen7_interface_descriptor_data *idd;
>> + uint32_t offset;
>> + uint32_t binding_table_offset, kernel_offset;
>> +
>> + binding_table_offset = gen7_fill_binding_table(batch, dst);
>> + kernel_offset = gen7_fill_kernel(batch, kernel, size);
>> +
>> + idd = batch_alloc(batch, sizeof(*idd), 64);
>> + offset = batch_offset(batch, idd);
>> +
>> + idd->desc0.kernel_start_pointer = (kernel_offset >> 6);
>> +
>> + idd->desc1.single_program_flow = 1;
>> + idd->desc1.floating_point_mode = GEN7_FLOATING_POINT_IEEE_754;
>> +
>> + idd->desc2.sampler_count = 0; /* 0 samplers used */
>> + idd->desc2.sampler_state_pointer = 0;
>> +
>> + idd->desc3.binding_table_entry_count = 0;
>> + idd->desc3.binding_table_pointer = (binding_table_offset >> 5);
>> +
>> + idd->desc4.constant_urb_entry_read_offset = 0;
>> + idd->desc4.constant_urb_entry_read_length = 1; /* grf 1 */
>> +
>> + return offset;
>> +}
>> +
>> +void
>> +gen7_emit_state_base_address(struct intel_batchbuffer *batch)
>> +{
>> + OUT_BATCH(GEN7_STATE_BASE_ADDRESS | (10 - 2));
>> +
>> + /* general */
>> + OUT_BATCH(0);
>> +
>> + /* surface */
>> + OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
>> BASE_ADDRESS_MODIFY);
>> +
>> + /* dynamic */
>> + OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
>> BASE_ADDRESS_MODIFY);
>> +
>> + /* indirect */
>> + OUT_BATCH(0);
>> +
>> + /* instruction */
>> + OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
>> BASE_ADDRESS_MODIFY);
>> +
>> + /* general/dynamic/indirect/instruction access Bound */
>> + OUT_BATCH(0);
>> + OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
>> + OUT_BATCH(0);
>> + OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
>> +}
>> +
>> +void
>> +gen7_emit_vfe_state(struct intel_batchbuffer *batch)
>> +{
>> + OUT_BATCH(GEN7_MEDIA_VFE_STATE | (8 - 2));
>> +
>> + /* scratch buffer */
>> + OUT_BATCH(0);
>> +
>> + /* number of threads & urb entries */
>> + OUT_BATCH(1 << 16 |
>> + 2 << 8);
>> +
>> + OUT_BATCH(0);
>> +
>> + /* urb entry size & curbe size */
>> + OUT_BATCH(2 << 16 | /* in 256 bits unit */
>> + 2); /* in 256 bits unit */
>> +
>> + /* scoreboard */
>> + OUT_BATCH(0);
>> + OUT_BATCH(0);
>> + OUT_BATCH(0);
>> +}
>> +
>> +void
>> +gen7_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch)
>> +{
>> + OUT_BATCH(GEN7_MEDIA_VFE_STATE | (8 - 2));
>> +
>> + /* scratch buffer */
>> + OUT_BATCH(0);
>> +
>> + /* number of threads & urb entries */
>> + OUT_BATCH(1 << 16 | /* max num of threads */
>> + 0 << 8 | /* num of URB entry */
>> + 1 << 2); /* GPGPU mode */
>> +
>> + OUT_BATCH(0);
>> +
>> + /* urb entry size & curbe size */
>> + OUT_BATCH(0 << 16 | /* URB entry size in 256 bits unit */
>> + 1); /* CURBE entry size in 256 bits unit */
>> +
>> + /* scoreboard */
>> + OUT_BATCH(0);
>> + OUT_BATCH(0);
>> + OUT_BATCH(0);
>> +}
>> +
>> +void
>> +gen7_emit_curbe_load(struct intel_batchbuffer *batch, uint32_t
>> curbe_buffer)
>> +{
>> + OUT_BATCH(GEN7_MEDIA_CURBE_LOAD | (4 - 2));
>> + OUT_BATCH(0);
>> + /* curbe total data length */
>> + OUT_BATCH(64);
>> + /* curbe data start address, is relative to the dynamics base
>> address */
>> + OUT_BATCH(curbe_buffer);
>> +}
>> +
>> +void
>> +gen7_emit_interface_descriptor_load(struct intel_batchbuffer *batch,
>> uint32_t interface_descriptor)
>> +{
>> + OUT_BATCH(GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
>> + OUT_BATCH(0);
>> + /* interface descriptor data length */
>> + OUT_BATCH(sizeof(struct gen7_interface_descriptor_data));
>> + /* interface descriptor address, is relative to the dynamics base
>> address */
>> + OUT_BATCH(interface_descriptor);
>> +}
>> +
>> +void
>> +gen7_emit_media_objects(struct intel_batchbuffer *batch,
>> + unsigned x, unsigned y,
>> + unsigned width, unsigned height)
>> +{
>> + int i, j;
>> +
>> + for (i = 0; i < width / 16; i++) {
>> + for (j = 0; j < height / 16; j++) {
>> + OUT_BATCH(GEN7_MEDIA_OBJECT | (8 - 2));
>> +
>> + /* interface descriptor offset */
>> + OUT_BATCH(0);
>> +
>> + /* without indirect data */
>> + OUT_BATCH(0);
>> + OUT_BATCH(0);
>> +
>> + /* scoreboard */
>> + OUT_BATCH(0);
>> + OUT_BATCH(0);
>> +
>> + /* inline data (xoffset, yoffset) */
>> + OUT_BATCH(x + i * 16);
>> + OUT_BATCH(y + j * 16);
>> + }
>> + }
>> +}
>> +
>> +void
>> +gen7_emit_gpgpu_walk(struct intel_batchbuffer *batch,
>> + unsigned x, unsigned y,
>> + unsigned width, unsigned height)
>> +{
>> + uint32_t x_dim, y_dim, tmp, right_mask;
>> +
>> + /*
>> + * Simply do SIMD16 based dispatch, so every thread uses
>> + * SIMD16 channels.
>> + *
>> + * Define our own thread group size, e.g 16x1 for every group, then
>> + * will have 1 thread each group in SIMD16 dispatch. So thread
>> + * width/height/depth are all 1.
>> + *
>> + * Then thread group X = width / 16 (aligned to 16)
>> + * thread group Y = height;
>> + */
>> + x_dim = (width + 15) / 16;
>> + y_dim = height;
>> +
>> + tmp = width & 15;
>> + if (tmp == 0)
>> + right_mask = (1 << 16) - 1;
>> + else
>> + right_mask = (1 << tmp) - 1;
>> +
>> + OUT_BATCH(GEN7_GPGPU_WALKER | 9);
>> +
>> + /* interface descriptor offset */
>> + OUT_BATCH(0);
>> +
>> + /* SIMD size, thread w/h/d */
>> + OUT_BATCH(1 << 30 | /* SIMD16 */
>> + 0 << 16 | /* depth:1 */
>> + 0 << 8 | /* height:1 */
>> + 0); /* width:1 */
>> +
>> + /* thread group X */
>> + OUT_BATCH(0);
>> + OUT_BATCH(x_dim);
>> +
>> + /* thread group Y */
>> + OUT_BATCH(0);
>> + OUT_BATCH(y_dim);
>> +
>> + /* thread group Z */
>> + OUT_BATCH(0);
>> + OUT_BATCH(1);
>> +
>> + /* right mask */
>> + OUT_BATCH(right_mask);
>> +
>> + /* bottom mask, height 1, always 0xffffffff */
>> + OUT_BATCH(0xffffffff);
>> +}
>> +
>> +uint32_t
>> +gen8_fill_surface_state(struct intel_batchbuffer *batch,
>> + struct igt_buf *buf,
>> + uint32_t format,
>> + int is_dst)
>> +{
>> + struct gen8_surface_state *ss;
>> + uint32_t write_domain, read_domain, offset;
>> + int ret;
>> +
>> + if (is_dst) {
>> + write_domain = read_domain = I915_GEM_DOMAIN_RENDER;
>> + } else {
>> + write_domain = 0;
>> + read_domain = I915_GEM_DOMAIN_SAMPLER;
>> + }
>> +
>> + ss = batch_alloc(batch, sizeof(*ss), 64);
>> + offset = batch_offset(batch, ss);
>> +
>> + ss->ss0.surface_type = GEN8_SURFACE_2D;
>> + ss->ss0.surface_format = format;
>> + ss->ss0.render_cache_read_write = 1;
>> + ss->ss0.vertical_alignment = 1; /* align 4 */
>> + ss->ss0.horizontal_alignment = 1; /* align 4 */
>> +
>> + if (buf->tiling == I915_TILING_X)
>> + ss->ss0.tiled_mode = 2;
>> + else if (buf->tiling == I915_TILING_Y)
>> + ss->ss0.tiled_mode = 3;
>> +
>> + ss->ss8.base_addr = buf->bo->offset;
>> +
>> + ret = drm_intel_bo_emit_reloc(batch->bo,
>> + batch_offset(batch, ss) + 8 * 4,
>> + buf->bo, 0,
>> + read_domain, write_domain);
>> + igt_assert(ret == 0);
>> +
>> + ss->ss2.height = igt_buf_height(buf) - 1;
>> + ss->ss2.width = igt_buf_width(buf) - 1;
>> + ss->ss3.pitch = buf->stride - 1;
>> +
>> + ss->ss7.shader_chanel_select_r = 4;
>> + ss->ss7.shader_chanel_select_g = 5;
>> + ss->ss7.shader_chanel_select_b = 6;
>> + ss->ss7.shader_chanel_select_a = 7;
>> +
>> + return offset;
>> +}
>> +
>> +uint32_t
>> +gen8_fill_binding_table(struct intel_batchbuffer *batch,
>> + struct igt_buf *dst)
>> +{
>> + uint32_t *binding_table, offset;
>> +
>> + binding_table = batch_alloc(batch, 32, 64);
>> + offset = batch_offset(batch, binding_table);
>> +
>> + binding_table[0] = gen8_fill_surface_state(batch, dst,
>> GEN8_SURFACEFORMAT_R8_UNORM, 1);
>> +
>> + return offset;
>> +}
>> +
>> +uint32_t
>> +gen8_fill_interface_descriptor(struct intel_batchbuffer *batch,
>> struct igt_buf *dst, const uint32_t kernel[][4], size_t size)
>> +{
>> + struct gen8_interface_descriptor_data *idd;
>> + uint32_t offset;
>> + uint32_t binding_table_offset, kernel_offset;
>> +
>> + binding_table_offset = gen8_fill_binding_table(batch, dst);
>> + kernel_offset = gen7_fill_kernel(batch, kernel, sizeof(kernel));
>> +
>> + idd = batch_alloc(batch, sizeof(*idd), 64);
>> + offset = batch_offset(batch, idd);
>> +
>> + idd->desc0.kernel_start_pointer = (kernel_offset >> 6);
>> +
>> + idd->desc2.single_program_flow = 1;
>> + idd->desc2.floating_point_mode = GEN8_FLOATING_POINT_IEEE_754;
>> +
>> + idd->desc3.sampler_count = 0; /* 0 samplers used */
>> + idd->desc3.sampler_state_pointer = 0;
>> +
>> + idd->desc4.binding_table_entry_count = 0;
>> + idd->desc4.binding_table_pointer = (binding_table_offset >> 5);
>> +
>> + idd->desc5.constant_urb_entry_read_offset = 0;
>> + idd->desc5.constant_urb_entry_read_length = 1; /* grf 1 */
>> +
>> + return offset;
>> +}
>> +
>> +void
>> +gen8_emit_state_base_address(struct intel_batchbuffer *batch)
>> +{
>> + OUT_BATCH(GEN8_STATE_BASE_ADDRESS | (16 - 2));
>> +
>> + /* general */
>> + OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
>> + OUT_BATCH(0);
>> +
>> + /* stateless data port */
>> + OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
>> +
>> + /* surface */
>> + OUT_RELOC(batch->bo, I915_GEM_DOMAIN_SAMPLER, 0,
>> BASE_ADDRESS_MODIFY);
>> +
>> + /* dynamic */
>> + OUT_RELOC(batch->bo, I915_GEM_DOMAIN_RENDER |
>> I915_GEM_DOMAIN_INSTRUCTION,
>> + 0, BASE_ADDRESS_MODIFY);
>> +
>> + /* indirect */
>> + OUT_BATCH(0);
>> + OUT_BATCH(0);
>> +
>> + /* instruction */
>> + OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
>> BASE_ADDRESS_MODIFY);
>> +
>> + /* general state buffer size */
>> + OUT_BATCH(0xfffff000 | 1);
>> + /* dynamic state buffer size */
>> + OUT_BATCH(1 << 12 | 1);
>> + /* indirect object buffer size */
>> + OUT_BATCH(0xfffff000 | 1);
>> + /* intruction buffer size, must set modify enable bit, otherwise
>> it may result in GPU hang */
>> + OUT_BATCH(1 << 12 | 1);
>> +}
>> +
>> +void
>> +gen8_emit_vfe_state(struct intel_batchbuffer *batch)
>> +{
>> + OUT_BATCH(GEN8_MEDIA_VFE_STATE | (9 - 2));
>> +
>> + /* scratch buffer */
>> + OUT_BATCH(0);
>> + OUT_BATCH(0);
>> +
>> + /* number of threads & urb entries */
>> + OUT_BATCH(1 << 16 |
>> + 2 << 8);
>> +
>> + OUT_BATCH(0);
>> +
>> + /* urb entry size & curbe size */
>> + OUT_BATCH(2 << 16 |
>> + 2);
>> +
>> + /* scoreboard */
>> + OUT_BATCH(0);
>> + OUT_BATCH(0);
>> + OUT_BATCH(0);
>> +}
>> +
>> +void
>> +gen8_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch)
>> +{
>> + OUT_BATCH(GEN8_MEDIA_VFE_STATE | (9 - 2));
>> +
>> + /* scratch buffer */
>> + OUT_BATCH(0);
>> + OUT_BATCH(0);
>> +
>> + /* number of threads & urb entries */
>> + OUT_BATCH(1 << 16 | 1 << 8);
>> +
>> + OUT_BATCH(0);
>> +
>> + /* urb entry size & curbe size */
>> + OUT_BATCH(0 << 16 | 1);
>> +
>> + /* scoreboard */
>> + OUT_BATCH(0);
>> + OUT_BATCH(0);
>> + OUT_BATCH(0);
>> +}
>> +
>> +void
>> +gen8_emit_curbe_load(struct intel_batchbuffer *batch, uint32_t
>> curbe_buffer)
>> +{
>> + OUT_BATCH(GEN8_MEDIA_CURBE_LOAD | (4 - 2));
>> + OUT_BATCH(0);
>> + /* curbe total data length */
>> + OUT_BATCH(64);
>> + /* curbe data start address, is relative to the dynamics base
>> address */
>> + OUT_BATCH(curbe_buffer);
>> +}
>> +
>> +void
>> +gen8_emit_interface_descriptor_load(struct intel_batchbuffer *batch,
>> uint32_t interface_descriptor)
>> +{
>> + OUT_BATCH(GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
>> + OUT_BATCH(0);
>> + /* interface descriptor data length */
>> + OUT_BATCH(sizeof(struct gen8_interface_descriptor_data));
>> + /* interface descriptor address, is relative to the dynamics base
>> address */
>> + OUT_BATCH(interface_descriptor);
>> +}
>> +
>> +void
>> +gen8_emit_media_state_flush(struct intel_batchbuffer *batch)
>> +{
>> + OUT_BATCH(GEN8_MEDIA_STATE_FLUSH | (2 - 2));
>> + OUT_BATCH(0);
>> +}
>> +
>> +void
>> +gen8_emit_media_objects(struct intel_batchbuffer *batch,
>> + unsigned x, unsigned y,
>> + unsigned width, unsigned height)
>> +{
>> + int i, j;
>> +
>> + for (i = 0; i < width / 16; i++) {
>> + for (j = 0; j < height / 16; j++) {
>> + OUT_BATCH(GEN8_MEDIA_OBJECT | (8 - 2));
>> +
>> + /* interface descriptor offset */
>> + OUT_BATCH(0);
>> +
>> + /* without indirect data */
>> + OUT_BATCH(0);
>> + OUT_BATCH(0);
>> +
>> + /* scoreboard */
>> + OUT_BATCH(0);
>> + OUT_BATCH(0);
>> +
>> + /* inline data (xoffset, yoffset) */
>> + OUT_BATCH(x + i * 16);
>> + OUT_BATCH(y + j * 16);
>> + gen8_emit_media_state_flush(batch);
>> + }
>> + }
>> +}
>> +
>> +void
>> +gen8lp_emit_media_objects(struct intel_batchbuffer *batch,
>> + unsigned x, unsigned y,
>> + unsigned width, unsigned height)
>> +{
>> + int i, j;
>> +
>> + for (i = 0; i < width / 16; i++) {
>> + for (j = 0; j < height / 16; j++) {
>> + OUT_BATCH(GEN8_MEDIA_OBJECT | (8 - 2));
>> +
>> + /* interface descriptor offset */
>> + OUT_BATCH(0);
>> +
>> + /* without indirect data */
>> + OUT_BATCH(0);
>> + OUT_BATCH(0);
>> +
>> + /* scoreboard */
>> + OUT_BATCH(0);
>> + OUT_BATCH(0);
>> +
>> + /* inline data (xoffset, yoffset) */
>> + OUT_BATCH(x + i * 16);
>> + OUT_BATCH(y + j * 16);
>> + }
>> + }
>> +}
>> +
>> +void
>> +gen8_emit_gpgpu_walk(struct intel_batchbuffer *batch,
>> + unsigned x, unsigned y,
>> + unsigned width, unsigned height)
>> +{
>> + uint32_t x_dim, y_dim, tmp, right_mask;
>> +
>> + /*
>> + * Simply do SIMD16 based dispatch, so every thread uses
>> + * SIMD16 channels.
>> + *
>> + * Define our own thread group size, e.g 16x1 for every group, then
>> + * will have 1 thread each group in SIMD16 dispatch. So thread
>> + * width/height/depth are all 1.
>> + *
>> + * Then thread group X = width / 16 (aligned to 16)
>> + * thread group Y = height;
>> + */
>> + x_dim = (width + 15) / 16;
>> + y_dim = height;
>> +
>> + tmp = width & 15;
>> + if (tmp == 0)
>> + right_mask = (1 << 16) - 1;
>> + else
>> + right_mask = (1 << tmp) - 1;
>> +
>> + OUT_BATCH(GEN7_GPGPU_WALKER | 13);
>> +
>> + OUT_BATCH(0); /* kernel offset */
>> + OUT_BATCH(0); /* indirect data length */
>> + OUT_BATCH(0); /* indirect data offset */
>> +
>> + /* SIMD size, thread w/h/d */
>> + OUT_BATCH(1 << 30 | /* SIMD16 */
>> + 0 << 16 | /* depth:1 */
>> + 0 << 8 | /* height:1 */
>> + 0); /* width:1 */
>> +
>> + /* thread group X */
>> + OUT_BATCH(0);
>> + OUT_BATCH(0);
>> + OUT_BATCH(x_dim);
>> +
>> + /* thread group Y */
>> + OUT_BATCH(0);
>> + OUT_BATCH(0);
>> + OUT_BATCH(y_dim);
>> +
>> + /* thread group Z */
>> + OUT_BATCH(0);
>> + OUT_BATCH(1);
>> +
>> + /* right mask */
>> + OUT_BATCH(right_mask);
>> +
>> + /* bottom mask, height 1, always 0xffffffff */
>> + OUT_BATCH(0xffffffff);
>> +}
>> +
>> +void
>> +gen9_emit_state_base_address(struct intel_batchbuffer *batch)
>> +{
>> + OUT_BATCH(GEN8_STATE_BASE_ADDRESS | (19 - 2));
>> +
>> + /* general */
>> + OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
>> + OUT_BATCH(0);
>> +
>> + /* stateless data port */
>> + OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
>> +
>> + /* surface */
>> + OUT_RELOC(batch->bo, I915_GEM_DOMAIN_SAMPLER, 0,
>> BASE_ADDRESS_MODIFY);
>> +
>> + /* dynamic */
>> + OUT_RELOC(batch->bo, I915_GEM_DOMAIN_RENDER |
>> I915_GEM_DOMAIN_INSTRUCTION,
>> + 0, BASE_ADDRESS_MODIFY);
>> +
>> + /* indirect */
>> + OUT_BATCH(0);
>> + OUT_BATCH(0);
>> +
>> + /* instruction */
>> + OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
>> BASE_ADDRESS_MODIFY);
>> +
>> + /* general state buffer size */
>> + OUT_BATCH(0xfffff000 | 1);
>> + /* dynamic state buffer size */
>> + OUT_BATCH(1 << 12 | 1);
>> + /* indirect object buffer size */
>> + OUT_BATCH(0xfffff000 | 1);
>> + /* intruction buffer size, must set modify enable bit, otherwise
>> it may result in GPU hang */
>> + OUT_BATCH(1 << 12 | 1);
>> +
>> + /* Bindless surface state base address */
>> + OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
>> + OUT_BATCH(0);
>> + OUT_BATCH(0xfffff000);
>> +}
>> diff --git a/lib/gpu_fill.h b/lib/gpu_fill.h
>> new file mode 100644
>> index 00000000..a271ce6e
>> --- /dev/null
>> +++ b/lib/gpu_fill.h
>> @@ -0,0 +1,141 @@
>> +/*
>> + * Copyright © 2018 Intel Corporation
>> + *
>> + * Permission is hereby granted, free of charge, to any person
>> obtaining a
>> + * copy of this software and associated documentation files (the
>> "Software"),
>> + * to deal in the Software without restriction, including without
>> limitation
>> + * the rights to use, copy, modify, merge, publish, distribute,
>> sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice (including
>> the next
>> + * paragraph) shall be included in all copies or substantial portions
>> of the
>> + * Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
>> EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>> MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
>> SHALL
>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
>> OR OTHER
>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
>> ARISING
>> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
>> OTHER DEALINGS
>> + * IN THE SOFTWARE.
>> + *
>> + */
>> +
>> +#ifndef GPU_FILL_H
>> +#define GPU_FILL_H
>> +
>> +uint32_t
>> +batch_used(struct intel_batchbuffer *batch);
>> +
>> +uint32_t
>> +batch_align(struct intel_batchbuffer *batch, uint32_t align);
>> +
>> +void *
>> +batch_alloc(struct intel_batchbuffer *batch, uint32_t size, uint32_t
>> align);
>> +
>> +uint32_t
>> +batch_offset(struct intel_batchbuffer *batch, void *ptr);
>> +
>> +uint32_t
>> +batch_copy(struct intel_batchbuffer *batch, const void *ptr, uint32_t
>> size, uint32_t align);
>> +
>> +void
>> +gen7_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end);
>> +
>> +uint32_t
>> +gen7_fill_curbe_buffer_data(struct intel_batchbuffer *batch,
>> + uint8_t color);
>> +
>> +uint32_t
>> +gen7_fill_surface_state(struct intel_batchbuffer *batch,
>> + struct igt_buf *buf,
>> + uint32_t format,
>> + int is_dst);
>> +
>> +uint32_t
>> +gen7_fill_binding_table(struct intel_batchbuffer *batch,
>> + struct igt_buf *dst);
>> +uint32_t
>> +gen7_fill_kernel(struct intel_batchbuffer *batch,
>> + const uint32_t kernel[][4],
>> + size_t size);
>> +
>> +uint32_t
>> +gen7_fill_interface_descriptor(struct intel_batchbuffer *batch,
>> struct igt_buf *dst,
>> + const uint32_t kernel[][4], size_t size);
>> +
>> +void
>> +gen7_emit_state_base_address(struct intel_batchbuffer *batch);
>> +
>> +void
>> +gen7_emit_vfe_state(struct intel_batchbuffer *batch);
>> +
>> +void
>> +gen7_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch);
>> +
>> +void
>> +gen7_emit_curbe_load(struct intel_batchbuffer *batch, uint32_t
>> curbe_buffer);
>> +
>> +void
>> +gen7_emit_interface_descriptor_load(struct intel_batchbuffer *batch,
>> uint32_t interface_descriptor);
>> +
>> +void
>> +gen7_emit_media_objects(struct intel_batchbuffer *batch,
>> + unsigned x, unsigned y,
>> + unsigned width, unsigned height);
>> +
>> +uint32_t
>> +gen8_fill_surface_state(struct intel_batchbuffer *batch,
>> + struct igt_buf *buf,
>> + uint32_t format,
>> + int is_dst);
>> +
>> +uint32_t
>> +gen8_fill_binding_table(struct intel_batchbuffer *batch,
>> + struct igt_buf *dst);
>> +
>> +uint32_t
>> +gen8_fill_interface_descriptor(struct intel_batchbuffer *batch,
>> struct igt_buf *dst, const uint32_t kernel[][4], size_t size);
>> +
>> +void
>> +gen8_emit_state_base_address(struct intel_batchbuffer *batch);
>> +
>> +void
>> +gen8_emit_vfe_state(struct intel_batchbuffer *batch);
>> +
>> +void
>> +gen8_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch);
>> +
>> +void
>> +gen8_emit_curbe_load(struct intel_batchbuffer *batch, uint32_t
>> curbe_buffer);
>> +
>> +void
>> +gen8_emit_interface_descriptor_load(struct intel_batchbuffer *batch,
>> uint32_t interface_descriptor);
>> +
>> +void
>> +gen8_emit_media_state_flush(struct intel_batchbuffer *batch);
>> +
>> +void
>> +gen8_emit_media_objects(struct intel_batchbuffer *batch,
>> + unsigned x, unsigned y,
>> + unsigned width, unsigned height);
>> +
>> +void
>> +gen8lp_emit_media_objects(struct intel_batchbuffer *batch,
>> + unsigned x, unsigned y,
>> + unsigned width, unsigned height);
>> +
>> +void
>> +gen9_emit_state_base_address(struct intel_batchbuffer *batch);
>> +
>> +void
>> +gen7_emit_gpgpu_walk(struct intel_batchbuffer *batch,
>> + unsigned x, unsigned y,
>> + unsigned width, unsigned height);
>> +void
>> +gen8_emit_gpgpu_walk(struct intel_batchbuffer *batch,
>> + unsigned x, unsigned y,
>> + unsigned width, unsigned height);
>> +
>> +#endif /* GPU_FILL_H */
>> diff --git a/lib/media_fill_gen7.c b/lib/media_fill_gen7.c
>> index 6fb44798..c97555a6 100644
>> --- a/lib/media_fill_gen7.c
>> +++ b/lib/media_fill_gen7.c
>> @@ -5,7 +5,7 @@
>> #include "gen7_media.h"
>> #include "intel_reg.h"
>> #include "drmtest.h"
>> -
>> +#include "gpu_fill.h"
>> #include <assert.h>
>> static const uint32_t media_kernel[][4] = {
>> @@ -22,275 +22,6 @@ static const uint32_t media_kernel[][4] = {
>> { 0x07800031, 0x20001ca8, 0x00000e00, 0x82000010 },
>> };
>> -static uint32_t
>> -batch_used(struct intel_batchbuffer *batch)
>> -{
>> - return batch->ptr - batch->buffer;
>> -}
>> -
>> -static uint32_t
>> -batch_align(struct intel_batchbuffer *batch, uint32_t align)
>> -{
>> - uint32_t offset = batch_used(batch);
>> - offset = ALIGN(offset, align);
>> - batch->ptr = batch->buffer + offset;
>> - return offset;
>> -}
>> -
>> -static void *
>> -batch_alloc(struct intel_batchbuffer *batch, uint32_t size, uint32_t
>> align)
>> -{
>> - uint32_t offset = batch_align(batch, align);
>> - batch->ptr += size;
>> - return memset(batch->buffer + offset, 0, size);
>> -}
>> -
>> -static uint32_t
>> -batch_offset(struct intel_batchbuffer *batch, void *ptr)
>> -{
>> - return (uint8_t *)ptr - batch->buffer;
>> -}
>> -
>> -static uint32_t
>> -batch_copy(struct intel_batchbuffer *batch, const void *ptr, uint32_t
>> size, uint32_t align)
>> -{
>> - return batch_offset(batch, memcpy(batch_alloc(batch, size,
>> align), ptr, size));
>> -}
>> -
>> -static void
>> -gen7_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end)
>> -{
>> - int ret;
>> -
>> - ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer);
>> - if (ret == 0)
>> - ret = drm_intel_bo_mrb_exec(batch->bo, batch_end,
>> - NULL, 0, 0, 0);
>> - igt_assert(ret == 0);
>> -}
>> -
>> -static uint32_t
>> -gen7_fill_curbe_buffer_data(struct intel_batchbuffer *batch,
>> - uint8_t color)
>> -{
>> - uint8_t *curbe_buffer;
>> - uint32_t offset;
>> -
>> - curbe_buffer = batch_alloc(batch, sizeof(uint32_t) * 8, 64);
>> - offset = batch_offset(batch, curbe_buffer);
>> - *curbe_buffer = color;
>> -
>> - return offset;
>> -}
>> -
>> -static uint32_t
>> -gen7_fill_surface_state(struct intel_batchbuffer *batch,
>> - struct igt_buf *buf,
>> - uint32_t format,
>> - int is_dst)
>> -{
>> - struct gen7_surface_state *ss;
>> - uint32_t write_domain, read_domain, offset;
>> - int ret;
>> -
>> - if (is_dst) {
>> - write_domain = read_domain = I915_GEM_DOMAIN_RENDER;
>> - } else {
>> - write_domain = 0;
>> - read_domain = I915_GEM_DOMAIN_SAMPLER;
>> - }
>> -
>> - ss = batch_alloc(batch, sizeof(*ss), 64);
>> - offset = batch_offset(batch, ss);
>> -
>> - ss->ss0.surface_type = GEN7_SURFACE_2D;
>> - ss->ss0.surface_format = format;
>> - ss->ss0.render_cache_read_write = 1;
>> -
>> - if (buf->tiling == I915_TILING_X)
>> - ss->ss0.tiled_mode = 2;
>> - else if (buf->tiling == I915_TILING_Y)
>> - ss->ss0.tiled_mode = 3;
>> -
>> - ss->ss1.base_addr = buf->bo->offset;
>> - ret = drm_intel_bo_emit_reloc(batch->bo,
>> - batch_offset(batch, ss) + 4,
>> - buf->bo, 0,
>> - read_domain, write_domain);
>> - igt_assert(ret == 0);
>> -
>> - ss->ss2.height = igt_buf_height(buf) - 1;
>> - ss->ss2.width = igt_buf_width(buf) - 1;
>> -
>> - ss->ss3.pitch = buf->stride - 1;
>> -
>> - ss->ss7.shader_chanel_select_r = 4;
>> - ss->ss7.shader_chanel_select_g = 5;
>> - ss->ss7.shader_chanel_select_b = 6;
>> - ss->ss7.shader_chanel_select_a = 7;
>> -
>> - return offset;
>> -}
>> -
>> -static uint32_t
>> -gen7_fill_binding_table(struct intel_batchbuffer *batch,
>> - struct igt_buf *dst)
>> -{
>> - uint32_t *binding_table, offset;
>> -
>> - binding_table = batch_alloc(batch, 32, 64);
>> - offset = batch_offset(batch, binding_table);
>> -
>> - binding_table[0] = gen7_fill_surface_state(batch, dst,
>> GEN7_SURFACEFORMAT_R8_UNORM, 1);
>> -
>> - return offset;
>> -}
>> -
>> -static uint32_t
>> -gen7_fill_media_kernel(struct intel_batchbuffer *batch,
>> - const uint32_t kernel[][4],
>> - size_t size)
>> -{
>> - uint32_t offset;
>> -
>> - offset = batch_copy(batch, kernel, size, 64);
>> -
>> - return offset;
>> -}
>> -
>> -static uint32_t
>> -gen7_fill_interface_descriptor(struct intel_batchbuffer *batch,
>> struct igt_buf *dst,
>> - const uint32_t kernel[][4], size_t size)
>> -{
>> - struct gen7_interface_descriptor_data *idd;
>> - uint32_t offset;
>> - uint32_t binding_table_offset, kernel_offset;
>> -
>> - binding_table_offset = gen7_fill_binding_table(batch, dst);
>> - kernel_offset = gen7_fill_media_kernel(batch, kernel, size);
>> -
>> - idd = batch_alloc(batch, sizeof(*idd), 64);
>> - offset = batch_offset(batch, idd);
>> -
>> - idd->desc0.kernel_start_pointer = (kernel_offset >> 6);
>> -
>> - idd->desc1.single_program_flow = 1;
>> - idd->desc1.floating_point_mode = GEN7_FLOATING_POINT_IEEE_754;
>> -
>> - idd->desc2.sampler_count = 0; /* 0 samplers used */
>> - idd->desc2.sampler_state_pointer = 0;
>> -
>> - idd->desc3.binding_table_entry_count = 0;
>> - idd->desc3.binding_table_pointer = (binding_table_offset >> 5);
>> -
>> - idd->desc4.constant_urb_entry_read_offset = 0;
>> - idd->desc4.constant_urb_entry_read_length = 1; /* grf 1 */
>> -
>> - return offset;
>> -}
>> -
>> -static void
>> -gen7_emit_state_base_address(struct intel_batchbuffer *batch)
>> -{
>> - OUT_BATCH(GEN7_STATE_BASE_ADDRESS | (10 - 2));
>> -
>> - /* general */
>> - OUT_BATCH(0);
>> -
>> - /* surface */
>> - OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
>> BASE_ADDRESS_MODIFY);
>> -
>> - /* dynamic */
>> - OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
>> BASE_ADDRESS_MODIFY);
>> -
>> - /* indirect */
>> - OUT_BATCH(0);
>> -
>> - /* instruction */
>> - OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
>> BASE_ADDRESS_MODIFY);
>> -
>> - /* general/dynamic/indirect/instruction access Bound */
>> - OUT_BATCH(0);
>> - OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
>> - OUT_BATCH(0);
>> - OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
>> -}
>> -
>> -static void
>> -gen7_emit_vfe_state(struct intel_batchbuffer *batch)
>> -{
>> - OUT_BATCH(GEN7_MEDIA_VFE_STATE | (8 - 2));
>> -
>> - /* scratch buffer */
>> - OUT_BATCH(0);
>> -
>> - /* number of threads & urb entries */
>> - OUT_BATCH(1 << 16 |
>> - 2 << 8);
>> -
>> - OUT_BATCH(0);
>> -
>> - /* urb entry size & curbe size */
>> - OUT_BATCH(2 << 16 | /* in 256 bits unit */
>> - 2); /* in 256 bits unit */
>> -
>> - /* scoreboard */
>> - OUT_BATCH(0);
>> - OUT_BATCH(0);
>> - OUT_BATCH(0);
>> -}
>> -
>> -static void
>> -gen7_emit_curbe_load(struct intel_batchbuffer *batch, uint32_t
>> curbe_buffer)
>> -{
>> - OUT_BATCH(GEN7_MEDIA_CURBE_LOAD | (4 - 2));
>> - OUT_BATCH(0);
>> - /* curbe total data length */
>> - OUT_BATCH(64);
>> - /* curbe data start address, is relative to the dynamics base
>> address */
>> - OUT_BATCH(curbe_buffer);
>> -}
>> -
>> -static void
>> -gen7_emit_interface_descriptor_load(struct intel_batchbuffer *batch,
>> uint32_t interface_descriptor)
>> -{
>> - OUT_BATCH(GEN7_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
>> - OUT_BATCH(0);
>> - /* interface descriptor data length */
>> - OUT_BATCH(sizeof(struct gen7_interface_descriptor_data));
>> - /* interface descriptor address, is relative to the dynamics base
>> address */
>> - OUT_BATCH(interface_descriptor);
>> -}
>> -
>> -static void
>> -gen7_emit_media_objects(struct intel_batchbuffer *batch,
>> - unsigned x, unsigned y,
>> - unsigned width, unsigned height)
>> -{
>> - int i, j;
>> -
>> - for (i = 0; i < width / 16; i++) {
>> - for (j = 0; j < height / 16; j++) {
>> - OUT_BATCH(GEN7_MEDIA_OBJECT | (8 - 2));
>> -
>> - /* interface descriptor offset */
>> - OUT_BATCH(0);
>> -
>> - /* without indirect data */
>> - OUT_BATCH(0);
>> - OUT_BATCH(0);
>> -
>> - /* scoreboard */
>> - OUT_BATCH(0);
>> - OUT_BATCH(0);
>> -
>> - /* inline data (xoffset, yoffset) */
>> - OUT_BATCH(x + i * 16);
>> - OUT_BATCH(y + j * 16);
>> - }
>> - }
>> -}
>> -
>> /*
>> * This sets up the media pipeline,
>> *
>> diff --git a/lib/media_fill_gen8.c b/lib/media_fill_gen8.c
>> index 4a8fe5a2..88c7dbdd 100644
>> --- a/lib/media_fill_gen8.c
>> +++ b/lib/media_fill_gen8.c
>> @@ -5,7 +5,7 @@
>> #include "gen8_media.h"
>> #include "intel_reg.h"
>> #include "drmtest.h"
>> -
>> +#include "gpu_fill.h"
>> #include <assert.h>
>> @@ -23,294 +23,6 @@ static const uint32_t media_kernel[][4] = {
>> { 0x07800031, 0x20000a40, 0x0e000e00, 0x82000010 },
>> };
>> -static uint32_t
>> -batch_used(struct intel_batchbuffer *batch)
>> -{
>> - return batch->ptr - batch->buffer;
>> -}
>> -
>> -static uint32_t
>> -batch_align(struct intel_batchbuffer *batch, uint32_t align)
>> -{
>> - uint32_t offset = batch_used(batch);
>> - offset = ALIGN(offset, align);
>> - batch->ptr = batch->buffer + offset;
>> - return offset;
>> -}
>> -
>> -static void *
>> -batch_alloc(struct intel_batchbuffer *batch, uint32_t size, uint32_t
>> align)
>> -{
>> - uint32_t offset = batch_align(batch, align);
>> - batch->ptr += size;
>> - return memset(batch->buffer + offset, 0, size);
>> -}
>> -
>> -static uint32_t
>> -batch_offset(struct intel_batchbuffer *batch, void *ptr)
>> -{
>> - return (uint8_t *)ptr - batch->buffer;
>> -}
>> -
>> -static uint32_t
>> -batch_copy(struct intel_batchbuffer *batch, const void *ptr, uint32_t
>> size, uint32_t align)
>> -{
>> - return batch_offset(batch, memcpy(batch_alloc(batch, size,
>> align), ptr, size));
>> -}
>> -
>> -static void
>> -gen8_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end)
>> -{
>> - int ret;
>> -
>> - ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer);
>> - if (ret == 0)
>> - ret = drm_intel_bo_mrb_exec(batch->bo, batch_end,
>> - NULL, 0, 0, 0);
>> - igt_assert(ret == 0);
>> -}
>> -
>> -static uint32_t
>> -gen8_fill_curbe_buffer_data(struct intel_batchbuffer *batch,
>> - uint8_t color)
>> -{
>> - uint8_t *curbe_buffer;
>> - uint32_t offset;
>> -
>> - curbe_buffer = batch_alloc(batch, sizeof(uint32_t) * 8, 64);
>> - offset = batch_offset(batch, curbe_buffer);
>> - *curbe_buffer = color;
>> -
>> - return offset;
>> -}
>> -
>> -static uint32_t
>> -gen8_fill_surface_state(struct intel_batchbuffer *batch,
>> - struct igt_buf *buf,
>> - uint32_t format,
>> - int is_dst)
>> -{
>> - struct gen8_surface_state *ss;
>> - uint32_t write_domain, read_domain, offset;
>> - int ret;
>> -
>> - if (is_dst) {
>> - write_domain = read_domain = I915_GEM_DOMAIN_RENDER;
>> - } else {
>> - write_domain = 0;
>> - read_domain = I915_GEM_DOMAIN_SAMPLER;
>> - }
>> -
>> - ss = batch_alloc(batch, sizeof(*ss), 64);
>> - offset = batch_offset(batch, ss);
>> -
>> - ss->ss0.surface_type = GEN8_SURFACE_2D;
>> - ss->ss0.surface_format = format;
>> - ss->ss0.render_cache_read_write = 1;
>> - ss->ss0.vertical_alignment = 1; /* align 4 */
>> - ss->ss0.horizontal_alignment = 1; /* align 4 */
>> -
>> - if (buf->tiling == I915_TILING_X)
>> - ss->ss0.tiled_mode = 2;
>> - else if (buf->tiling == I915_TILING_Y)
>> - ss->ss0.tiled_mode = 3;
>> -
>> - ss->ss8.base_addr = buf->bo->offset;
>> -
>> - ret = drm_intel_bo_emit_reloc(batch->bo,
>> - batch_offset(batch, ss) + 8 * 4,
>> - buf->bo, 0,
>> - read_domain, write_domain);
>> - igt_assert(ret == 0);
>> -
>> - ss->ss2.height = igt_buf_height(buf) - 1;
>> - ss->ss2.width = igt_buf_width(buf) - 1;
>> - ss->ss3.pitch = buf->stride - 1;
>> -
>> - ss->ss7.shader_chanel_select_r = 4;
>> - ss->ss7.shader_chanel_select_g = 5;
>> - ss->ss7.shader_chanel_select_b = 6;
>> - ss->ss7.shader_chanel_select_a = 7;
>> -
>> - return offset;
>> -}
>> -
>> -static uint32_t
>> -gen8_fill_binding_table(struct intel_batchbuffer *batch,
>> - struct igt_buf *dst)
>> -{
>> - uint32_t *binding_table, offset;
>> -
>> - binding_table = batch_alloc(batch, 32, 64);
>> - offset = batch_offset(batch, binding_table);
>> -
>> - binding_table[0] = gen8_fill_surface_state(batch, dst,
>> GEN8_SURFACEFORMAT_R8_UNORM, 1);
>> -
>> - return offset;
>> -}
>> -
>> -static uint32_t
>> -gen8_fill_media_kernel(struct intel_batchbuffer *batch,
>> - const uint32_t kernel[][4],
>> - size_t size)
>> -{
>> - uint32_t offset;
>> -
>> - offset = batch_copy(batch, kernel, size, 64);
>> -
>> - return offset;
>> -}
>> -
>> -static uint32_t
>> -gen8_fill_interface_descriptor(struct intel_batchbuffer *batch,
>> struct igt_buf *dst)
>> -{
>> - struct gen8_interface_descriptor_data *idd;
>> - uint32_t offset;
>> - uint32_t binding_table_offset, kernel_offset;
>> -
>> - binding_table_offset = gen8_fill_binding_table(batch, dst);
>> - kernel_offset = gen8_fill_media_kernel(batch, media_kernel,
>> sizeof(media_kernel));
>> -
>> - idd = batch_alloc(batch, sizeof(*idd), 64);
>> - offset = batch_offset(batch, idd);
>> -
>> - idd->desc0.kernel_start_pointer = (kernel_offset >> 6);
>> -
>> - idd->desc2.single_program_flow = 1;
>> - idd->desc2.floating_point_mode = GEN8_FLOATING_POINT_IEEE_754;
>> -
>> - idd->desc3.sampler_count = 0; /* 0 samplers used */
>> - idd->desc3.sampler_state_pointer = 0;
>> -
>> - idd->desc4.binding_table_entry_count = 0;
>> - idd->desc4.binding_table_pointer = (binding_table_offset >> 5);
>> -
>> - idd->desc5.constant_urb_entry_read_offset = 0;
>> - idd->desc5.constant_urb_entry_read_length = 1; /* grf 1 */
>> -
>> - return offset;
>> -}
>> -
>> -static void
>> -gen8_emit_state_base_address(struct intel_batchbuffer *batch)
>> -{
>> - OUT_BATCH(GEN8_STATE_BASE_ADDRESS | (16 - 2));
>> -
>> - /* general */
>> - OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
>> - OUT_BATCH(0);
>> -
>> - /* stateless data port */
>> - OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
>> -
>> - /* surface */
>> - OUT_RELOC(batch->bo, I915_GEM_DOMAIN_SAMPLER, 0,
>> BASE_ADDRESS_MODIFY);
>> -
>> - /* dynamic */
>> - OUT_RELOC(batch->bo, I915_GEM_DOMAIN_RENDER |
>> I915_GEM_DOMAIN_INSTRUCTION,
>> - 0, BASE_ADDRESS_MODIFY);
>> -
>> - /* indirect */
>> - OUT_BATCH(0);
>> - OUT_BATCH(0);
>> -
>> - /* instruction */
>> - OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
>> BASE_ADDRESS_MODIFY);
>> -
>> - /* general state buffer size */
>> - OUT_BATCH(0xfffff000 | 1);
>> - /* dynamic state buffer size */
>> - OUT_BATCH(1 << 12 | 1);
>> - /* indirect object buffer size */
>> - OUT_BATCH(0xfffff000 | 1);
>> - /* intruction buffer size, must set modify enable bit, otherwise
>> it may result in GPU hang */
>> - OUT_BATCH(1 << 12 | 1);
>> -}
>> -
>> -static void
>> -gen8_emit_vfe_state(struct intel_batchbuffer *batch)
>> -{
>> - OUT_BATCH(GEN8_MEDIA_VFE_STATE | (9 - 2));
>> -
>> - /* scratch buffer */
>> - OUT_BATCH(0);
>> - OUT_BATCH(0);
>> -
>> - /* number of threads & urb entries */
>> - OUT_BATCH(1 << 16 |
>> - 2 << 8);
>> -
>> - OUT_BATCH(0);
>> -
>> - /* urb entry size & curbe size */
>> - OUT_BATCH(2 << 16 |
>> - 2);
>> -
>> - /* scoreboard */
>> - OUT_BATCH(0);
>> - OUT_BATCH(0);
>> - OUT_BATCH(0);
>> -}
>> -
>> -static void
>> -gen8_emit_curbe_load(struct intel_batchbuffer *batch, uint32_t
>> curbe_buffer)
>> -{
>> - OUT_BATCH(GEN8_MEDIA_CURBE_LOAD | (4 - 2));
>> - OUT_BATCH(0);
>> - /* curbe total data length */
>> - OUT_BATCH(64);
>> - /* curbe data start address, is relative to the dynamics base
>> address */
>> - OUT_BATCH(curbe_buffer);
>> -}
>> -
>> -static void
>> -gen8_emit_interface_descriptor_load(struct intel_batchbuffer *batch,
>> uint32_t interface_descriptor)
>> -{
>> - OUT_BATCH(GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
>> - OUT_BATCH(0);
>> - /* interface descriptor data length */
>> - OUT_BATCH(sizeof(struct gen8_interface_descriptor_data));
>> - /* interface descriptor address, is relative to the dynamics base
>> address */
>> - OUT_BATCH(interface_descriptor);
>> -}
>> -
>> -static void
>> -gen8_emit_media_state_flush(struct intel_batchbuffer *batch)
>> -{
>> - OUT_BATCH(GEN8_MEDIA_STATE_FLUSH | (2 - 2));
>> - OUT_BATCH(0);
>> -}
>> -
>> -static void
>> -gen8_emit_media_objects(struct intel_batchbuffer *batch,
>> - unsigned x, unsigned y,
>> - unsigned width, unsigned height)
>> -{
>> - int i, j;
>> -
>> - for (i = 0; i < width / 16; i++) {
>> - for (j = 0; j < height / 16; j++) {
>> - OUT_BATCH(GEN8_MEDIA_OBJECT | (8 - 2));
>> -
>> - /* interface descriptor offset */
>> - OUT_BATCH(0);
>> -
>> - /* without indirect data */
>> - OUT_BATCH(0);
>> - OUT_BATCH(0);
>> -
>> - /* scoreboard */
>> - OUT_BATCH(0);
>> - OUT_BATCH(0);
>> -
>> - /* inline data (xoffset, yoffset) */
>> - OUT_BATCH(x + i * 16);
>> - OUT_BATCH(y + j * 16);
>> - gen8_emit_media_state_flush(batch);
>> - }
>> - }
>> -}
>> -
>> /*
>> * This sets up the media pipeline,
>> *
>> @@ -348,8 +60,9 @@ gen8_media_fillfunc(struct intel_batchbuffer *batch,
>> /* setup states */
>> batch->ptr = &batch->buffer[BATCH_STATE_SPLIT];
>> - curbe_buffer = gen8_fill_curbe_buffer_data(batch, color);
>> - interface_descriptor = gen8_fill_interface_descriptor(batch, dst);
>> + curbe_buffer = gen7_fill_curbe_buffer_data(batch, color);
>> + interface_descriptor = gen8_fill_interface_descriptor(batch, dst,
>> + media_kernel, sizeof(media_kernel));
>> igt_assert(batch->ptr < &batch->buffer[4095]);
>> /* media pipeline */
>> @@ -370,6 +83,6 @@ gen8_media_fillfunc(struct intel_batchbuffer *batch,
>> batch_end = batch_align(batch, 8);
>> igt_assert(batch_end < BATCH_STATE_SPLIT);
>> - gen8_render_flush(batch, batch_end);
>> + gen7_render_flush(batch, batch_end);
>> intel_batchbuffer_reset(batch);
>> }
>> diff --git a/lib/media_fill_gen8lp.c b/lib/media_fill_gen8lp.c
>> index 1f8a4adc..b30d96a3 100644
>> --- a/lib/media_fill_gen8lp.c
>> +++ b/lib/media_fill_gen8lp.c
>> @@ -5,7 +5,7 @@
>> #include "gen8_media.h"
>> #include "intel_reg.h"
>> #include "drmtest.h"
>> -
>> +#include "gpu_fill.h"
>> #include <assert.h>
>> @@ -23,286 +23,6 @@ static const uint32_t media_kernel[][4] = {
>> { 0x07800031, 0x20000a40, 0x0e000e00, 0x82000010 },
>> };
>> -static uint32_t
>> -batch_used(struct intel_batchbuffer *batch)
>> -{
>> - return batch->ptr - batch->buffer;
>> -}
>> -
>> -static uint32_t
>> -batch_align(struct intel_batchbuffer *batch, uint32_t align)
>> -{
>> - uint32_t offset = batch_used(batch);
>> - offset = ALIGN(offset, align);
>> - batch->ptr = batch->buffer + offset;
>> - return offset;
>> -}
>> -
>> -static void *
>> -batch_alloc(struct intel_batchbuffer *batch, uint32_t size, uint32_t
>> align)
>> -{
>> - uint32_t offset = batch_align(batch, align);
>> - batch->ptr += size;
>> - return memset(batch->buffer + offset, 0, size);
>> -}
>> -
>> -static uint32_t
>> -batch_offset(struct intel_batchbuffer *batch, void *ptr)
>> -{
>> - return (uint8_t *)ptr - batch->buffer;
>> -}
>> -
>> -static uint32_t
>> -batch_copy(struct intel_batchbuffer *batch, const void *ptr, uint32_t
>> size, uint32_t align)
>> -{
>> - return batch_offset(batch, memcpy(batch_alloc(batch, size,
>> align), ptr, size));
>> -}
>> -
>> -static void
>> -gen8_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end)
>> -{
>> - int ret;
>> -
>> - ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer);
>> - if (ret == 0)
>> - ret = drm_intel_bo_mrb_exec(batch->bo, batch_end,
>> - NULL, 0, 0, 0);
>> - igt_assert(ret == 0);
>> -}
>> -
>> -static uint32_t
>> -gen8_fill_curbe_buffer_data(struct intel_batchbuffer *batch,
>> - uint8_t color)
>> -{
>> - uint8_t *curbe_buffer;
>> - uint32_t offset;
>> -
>> - curbe_buffer = batch_alloc(batch, sizeof(uint32_t) * 8, 64);
>> - offset = batch_offset(batch, curbe_buffer);
>> - *curbe_buffer = color;
>> -
>> - return offset;
>> -}
>> -
>> -static uint32_t
>> -gen8_fill_surface_state(struct intel_batchbuffer *batch,
>> - struct igt_buf *buf,
>> - uint32_t format,
>> - int is_dst)
>> -{
>> - struct gen8_surface_state *ss;
>> - uint32_t write_domain, read_domain, offset;
>> - int ret;
>> -
>> - if (is_dst) {
>> - write_domain = read_domain = I915_GEM_DOMAIN_RENDER;
>> - } else {
>> - write_domain = 0;
>> - read_domain = I915_GEM_DOMAIN_SAMPLER;
>> - }
>> -
>> - ss = batch_alloc(batch, sizeof(*ss), 64);
>> - offset = batch_offset(batch, ss);
>> -
>> - ss->ss0.surface_type = GEN8_SURFACE_2D;
>> - ss->ss0.surface_format = format;
>> - ss->ss0.render_cache_read_write = 1;
>> - ss->ss0.vertical_alignment = 1; /* align 4 */
>> - ss->ss0.horizontal_alignment = 1; /* align 4 */
>> -
>> - if (buf->tiling == I915_TILING_X)
>> - ss->ss0.tiled_mode = 2;
>> - else if (buf->tiling == I915_TILING_Y)
>> - ss->ss0.tiled_mode = 3;
>> -
>> - ss->ss8.base_addr = buf->bo->offset;
>> -
>> - ret = drm_intel_bo_emit_reloc(batch->bo,
>> - batch_offset(batch, ss) + 8 * 4,
>> - buf->bo, 0,
>> - read_domain, write_domain);
>> - igt_assert(ret == 0);
>> -
>> - ss->ss2.height = igt_buf_height(buf) - 1;
>> - ss->ss2.width = igt_buf_width(buf) - 1;
>> - ss->ss3.pitch = buf->stride - 1;
>> -
>> - ss->ss7.shader_chanel_select_r = 4;
>> - ss->ss7.shader_chanel_select_g = 5;
>> - ss->ss7.shader_chanel_select_b = 6;
>> - ss->ss7.shader_chanel_select_a = 7;
>> -
>> - return offset;
>> -}
>> -
>> -static uint32_t
>> -gen8_fill_binding_table(struct intel_batchbuffer *batch,
>> - struct igt_buf *dst)
>> -{
>> - uint32_t *binding_table, offset;
>> -
>> - binding_table = batch_alloc(batch, 32, 64);
>> - offset = batch_offset(batch, binding_table);
>> -
>> - binding_table[0] = gen8_fill_surface_state(batch, dst,
>> GEN8_SURFACEFORMAT_R8_UNORM, 1);
>> -
>> - return offset;
>> -}
>> -
>> -static uint32_t
>> -gen8_fill_media_kernel(struct intel_batchbuffer *batch,
>> - const uint32_t kernel[][4],
>> - size_t size)
>> -{
>> - uint32_t offset;
>> -
>> - offset = batch_copy(batch, kernel, size, 64);
>> -
>> - return offset;
>> -}
>> -
>> -static uint32_t
>> -gen8_fill_interface_descriptor(struct intel_batchbuffer *batch,
>> struct igt_buf *dst)
>> -{
>> - struct gen8_interface_descriptor_data *idd;
>> - uint32_t offset;
>> - uint32_t binding_table_offset, kernel_offset;
>> -
>> - binding_table_offset = gen8_fill_binding_table(batch, dst);
>> - kernel_offset = gen8_fill_media_kernel(batch, media_kernel,
>> sizeof(media_kernel));
>> -
>> - idd = batch_alloc(batch, sizeof(*idd), 64);
>> - offset = batch_offset(batch, idd);
>> -
>> - idd->desc0.kernel_start_pointer = (kernel_offset >> 6);
>> -
>> - idd->desc2.single_program_flow = 1;
>> - idd->desc2.floating_point_mode = GEN8_FLOATING_POINT_IEEE_754;
>> -
>> - idd->desc3.sampler_count = 0; /* 0 samplers used */
>> - idd->desc3.sampler_state_pointer = 0;
>> -
>> - idd->desc4.binding_table_entry_count = 0;
>> - idd->desc4.binding_table_pointer = (binding_table_offset >> 5);
>> -
>> - idd->desc5.constant_urb_entry_read_offset = 0;
>> - idd->desc5.constant_urb_entry_read_length = 1; /* grf 1 */
>> -
>> - return offset;
>> -}
>> -
>> -static void
>> -gen8_emit_state_base_address(struct intel_batchbuffer *batch)
>> -{
>> - OUT_BATCH(GEN8_STATE_BASE_ADDRESS | (16 - 2));
>> -
>> - /* general */
>> - OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
>> - OUT_BATCH(0);
>> -
>> - /* stateless data port */
>> - OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
>> -
>> - /* surface */
>> - OUT_RELOC(batch->bo, I915_GEM_DOMAIN_SAMPLER, 0,
>> BASE_ADDRESS_MODIFY);
>> -
>> - /* dynamic */
>> - OUT_RELOC(batch->bo, I915_GEM_DOMAIN_RENDER |
>> I915_GEM_DOMAIN_INSTRUCTION,
>> - 0, BASE_ADDRESS_MODIFY);
>> -
>> - /* indirect */
>> - OUT_BATCH(0);
>> - OUT_BATCH(0);
>> -
>> - /* instruction */
>> - OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
>> BASE_ADDRESS_MODIFY);
>> -
>> - /* general state buffer size */
>> - OUT_BATCH(0xfffff000 | 1);
>> - /* dynamic state buffer size */
>> - OUT_BATCH(1 << 12 | 1);
>> - /* indirect object buffer size */
>> - OUT_BATCH(0xfffff000 | 1);
>> - /* intruction buffer size, must set modify enable bit, otherwise
>> it may result in GPU hang */
>> - OUT_BATCH(1 << 12 | 1);
>> -}
>> -
>> -static void
>> -gen8_emit_vfe_state(struct intel_batchbuffer *batch)
>> -{
>> - OUT_BATCH(GEN8_MEDIA_VFE_STATE | (9 - 2));
>> -
>> - /* scratch buffer */
>> - OUT_BATCH(0);
>> - OUT_BATCH(0);
>> -
>> - /* number of threads & urb entries */
>> - OUT_BATCH(1 << 16 |
>> - 2 << 8);
>> -
>> - OUT_BATCH(0);
>> -
>> - /* urb entry size & curbe size */
>> - OUT_BATCH(2 << 16 |
>> - 2);
>> -
>> - /* scoreboard */
>> - OUT_BATCH(0);
>> - OUT_BATCH(0);
>> - OUT_BATCH(0);
>> -}
>> -
>> -static void
>> -gen8_emit_curbe_load(struct intel_batchbuffer *batch, uint32_t
>> curbe_buffer)
>> -{
>> - OUT_BATCH(GEN8_MEDIA_CURBE_LOAD | (4 - 2));
>> - OUT_BATCH(0);
>> - /* curbe total data length */
>> - OUT_BATCH(64);
>> - /* curbe data start address, is relative to the dynamics base
>> address */
>> - OUT_BATCH(curbe_buffer);
>> -}
>> -
>> -static void
>> -gen8_emit_interface_descriptor_load(struct intel_batchbuffer *batch,
>> uint32_t interface_descriptor)
>> -{
>> - OUT_BATCH(GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
>> - OUT_BATCH(0);
>> - /* interface descriptor data length */
>> - OUT_BATCH(sizeof(struct gen8_interface_descriptor_data));
>> - /* interface descriptor address, is relative to the dynamics base
>> address */
>> - OUT_BATCH(interface_descriptor);
>> -}
>> -
>> -static void
>> -gen8lp_emit_media_objects(struct intel_batchbuffer *batch,
>> - unsigned x, unsigned y,
>> - unsigned width, unsigned height)
>> -{
>> - int i, j;
>> -
>> - for (i = 0; i < width / 16; i++) {
>> - for (j = 0; j < height / 16; j++) {
>> - OUT_BATCH(GEN8_MEDIA_OBJECT | (8 - 2));
>> -
>> - /* interface descriptor offset */
>> - OUT_BATCH(0);
>> -
>> - /* without indirect data */
>> - OUT_BATCH(0);
>> - OUT_BATCH(0);
>> -
>> - /* scoreboard */
>> - OUT_BATCH(0);
>> - OUT_BATCH(0);
>> -
>> - /* inline data (xoffset, yoffset) */
>> - OUT_BATCH(x + i * 16);
>> - OUT_BATCH(y + j * 16);
>> - }
>> - }
>> -}
>> -
>> /*
>> * This sets up the media pipeline,
>> *
>> @@ -340,8 +60,9 @@ gen8lp_media_fillfunc(struct intel_batchbuffer *batch,
>> /* setup states */
>> batch->ptr = &batch->buffer[BATCH_STATE_SPLIT];
>> - curbe_buffer = gen8_fill_curbe_buffer_data(batch, color);
>> - interface_descriptor = gen8_fill_interface_descriptor(batch, dst);
>> + curbe_buffer = gen7_fill_curbe_buffer_data(batch, color);
>> + interface_descriptor = gen8_fill_interface_descriptor(batch, dst,
>> + media_kernel, sizeof(media_kernel));
>> igt_assert(batch->ptr < &batch->buffer[4095]);
>> /* media pipeline */
>> @@ -362,6 +83,6 @@ gen8lp_media_fillfunc(struct intel_batchbuffer *batch,
>> batch_end = batch_align(batch, 8);
>> igt_assert(batch_end < BATCH_STATE_SPLIT);
>> - gen8_render_flush(batch, batch_end);
>> + gen7_render_flush(batch, batch_end);
>> intel_batchbuffer_reset(batch);
>> }
>> diff --git a/lib/media_fill_gen9.c b/lib/media_fill_gen9.c
>> index 3fd21819..e5d94487 100644
>> --- a/lib/media_fill_gen9.c
>> +++ b/lib/media_fill_gen9.c
>> @@ -4,11 +4,9 @@
>> #include "media_fill.h"
>> #include "gen8_media.h"
>> #include "intel_reg.h"
>> -
>> +#include "gpu_fill.h"
>> #include <assert.h>
>> -#define ALIGN(x, y) (((x) + (y)-1) & ~((y)-1))
>> -
>> static const uint32_t media_kernel[][4] = {
>> { 0x00400001, 0x20202288, 0x00000020, 0x00000000 },
>> { 0x00600001, 0x20800208, 0x008d0000, 0x00000000 },
>> @@ -23,299 +21,6 @@ static const uint32_t media_kernel[][4] = {
>> { 0x07800031, 0x20000a40, 0x0e000e00, 0x82000010 },
>> };
>> -static uint32_t
>> -batch_used(struct intel_batchbuffer *batch)
>> -{
>> - return batch->ptr - batch->buffer;
>> -}
>> -
>> -static uint32_t
>> -batch_align(struct intel_batchbuffer *batch, uint32_t align)
>> -{
>> - uint32_t offset = batch_used(batch);
>> - offset = ALIGN(offset, align);
>> - batch->ptr = batch->buffer + offset;
>> - return offset;
>> -}
>> -
>> -static void *
>> -batch_alloc(struct intel_batchbuffer *batch, uint32_t size, uint32_t
>> align)
>> -{
>> - uint32_t offset = batch_align(batch, align);
>> - batch->ptr += size;
>> - return memset(batch->buffer + offset, 0, size);
>> -}
>> -
>> -static uint32_t
>> -batch_offset(struct intel_batchbuffer *batch, void *ptr)
>> -{
>> - return (uint8_t *)ptr - batch->buffer;
>> -}
>> -
>> -static uint32_t
>> -batch_copy(struct intel_batchbuffer *batch, const void *ptr, uint32_t
>> size, uint32_t align)
>> -{
>> - return batch_offset(batch, memcpy(batch_alloc(batch, size,
>> align), ptr, size));
>> -}
>> -
>> -static void
>> -gen8_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end)
>> -{
>> - int ret;
>> -
>> - ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer);
>> - if (ret == 0)
>> - ret = drm_intel_bo_mrb_exec(batch->bo, batch_end,
>> - NULL, 0, 0, 0);
>> - assert(ret == 0);
>> -}
>> -
>> -static uint32_t
>> -gen8_fill_curbe_buffer_data(struct intel_batchbuffer *batch,
>> - uint8_t color)
>> -{
>> - uint8_t *curbe_buffer;
>> - uint32_t offset;
>> -
>> - curbe_buffer = batch_alloc(batch, sizeof(uint32_t) * 8, 64);
>> - offset = batch_offset(batch, curbe_buffer);
>> - *curbe_buffer = color;
>> -
>> - return offset;
>> -}
>> -
>> -static uint32_t
>> -gen8_fill_surface_state(struct intel_batchbuffer *batch,
>> - struct igt_buf *buf,
>> - uint32_t format,
>> - int is_dst)
>> -{
>> - struct gen8_surface_state *ss;
>> - uint32_t write_domain, read_domain, offset;
>> - int ret;
>> -
>> - if (is_dst) {
>> - write_domain = read_domain = I915_GEM_DOMAIN_RENDER;
>> - } else {
>> - write_domain = 0;
>> - read_domain = I915_GEM_DOMAIN_SAMPLER;
>> - }
>> -
>> - ss = batch_alloc(batch, sizeof(*ss), 64);
>> - offset = batch_offset(batch, ss);
>> -
>> - ss->ss0.surface_type = GEN8_SURFACE_2D;
>> - ss->ss0.surface_format = format;
>> - ss->ss0.render_cache_read_write = 1;
>> - ss->ss0.vertical_alignment = 1; /* align 4 */
>> - ss->ss0.horizontal_alignment = 1; /* align 4 */
>> -
>> - if (buf->tiling == I915_TILING_X)
>> - ss->ss0.tiled_mode = 2;
>> - else if (buf->tiling == I915_TILING_Y)
>> - ss->ss0.tiled_mode = 3;
>> -
>> - ss->ss8.base_addr = buf->bo->offset;
>> -
>> - ret = drm_intel_bo_emit_reloc(batch->bo,
>> - batch_offset(batch, ss) + 8 * 4,
>> - buf->bo, 0,
>> - read_domain, write_domain);
>> - assert(ret == 0);
>> -
>> - ss->ss2.height = igt_buf_height(buf) - 1;
>> - ss->ss2.width = igt_buf_width(buf) - 1;
>> - ss->ss3.pitch = buf->stride - 1;
>> -
>> - ss->ss7.shader_chanel_select_r = 4;
>> - ss->ss7.shader_chanel_select_g = 5;
>> - ss->ss7.shader_chanel_select_b = 6;
>> - ss->ss7.shader_chanel_select_a = 7;
>> -
>> - return offset;
>> -}
>> -
>> -static uint32_t
>> -gen8_fill_binding_table(struct intel_batchbuffer *batch,
>> - struct igt_buf *dst)
>> -{
>> - uint32_t *binding_table, offset;
>> -
>> - binding_table = batch_alloc(batch, 32, 64);
>> - offset = batch_offset(batch, binding_table);
>> -
>> - binding_table[0] = gen8_fill_surface_state(batch, dst,
>> GEN8_SURFACEFORMAT_R8_UNORM, 1);
>> -
>> - return offset;
>> -}
>> -
>> -static uint32_t
>> -gen8_fill_media_kernel(struct intel_batchbuffer *batch,
>> - const uint32_t kernel[][4],
>> - size_t size)
>> -{
>> - uint32_t offset;
>> -
>> - offset = batch_copy(batch, kernel, size, 64);
>> -
>> - return offset;
>> -}
>> -
>> -static uint32_t
>> -gen8_fill_interface_descriptor(struct intel_batchbuffer *batch,
>> struct igt_buf *dst)
>> -{
>> - struct gen8_interface_descriptor_data *idd;
>> - uint32_t offset;
>> - uint32_t binding_table_offset, kernel_offset;
>> -
>> - binding_table_offset = gen8_fill_binding_table(batch, dst);
>> - kernel_offset = gen8_fill_media_kernel(batch, media_kernel,
>> sizeof(media_kernel));
>> -
>> - idd = batch_alloc(batch, sizeof(*idd), 64);
>> - offset = batch_offset(batch, idd);
>> -
>> - idd->desc0.kernel_start_pointer = (kernel_offset >> 6);
>> -
>> - idd->desc2.single_program_flow = 1;
>> - idd->desc2.floating_point_mode = GEN8_FLOATING_POINT_IEEE_754;
>> -
>> - idd->desc3.sampler_count = 0; /* 0 samplers used */
>> - idd->desc3.sampler_state_pointer = 0;
>> -
>> - idd->desc4.binding_table_entry_count = 0;
>> - idd->desc4.binding_table_pointer = (binding_table_offset >> 5);
>> -
>> - idd->desc5.constant_urb_entry_read_offset = 0;
>> - idd->desc5.constant_urb_entry_read_length = 1; /* grf 1 */
>> -
>> - return offset;
>> -}
>> -
>> -static void
>> -gen9_emit_state_base_address(struct intel_batchbuffer *batch)
>> -{
>> - OUT_BATCH(GEN8_STATE_BASE_ADDRESS | (19 - 2));
>> -
>> - /* general */
>> - OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
>> - OUT_BATCH(0);
>> -
>> - /* stateless data port */
>> - OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
>> -
>> - /* surface */
>> - OUT_RELOC(batch->bo, I915_GEM_DOMAIN_SAMPLER, 0,
>> BASE_ADDRESS_MODIFY);
>> -
>> - /* dynamic */
>> - OUT_RELOC(batch->bo, I915_GEM_DOMAIN_RENDER |
>> I915_GEM_DOMAIN_INSTRUCTION,
>> - 0, BASE_ADDRESS_MODIFY);
>> -
>> - /* indirect */
>> - OUT_BATCH(0);
>> - OUT_BATCH(0);
>> -
>> - /* instruction */
>> - OUT_RELOC(batch->bo, I915_GEM_DOMAIN_INSTRUCTION, 0,
>> BASE_ADDRESS_MODIFY);
>> -
>> - /* general state buffer size */
>> - OUT_BATCH(0xfffff000 | 1);
>> - /* dynamic state buffer size */
>> - OUT_BATCH(1 << 12 | 1);
>> - /* indirect object buffer size */
>> - OUT_BATCH(0xfffff000 | 1);
>> - /* intruction buffer size, must set modify enable bit, otherwise
>> it may result in GPU hang */
>> - OUT_BATCH(1 << 12 | 1);
>> -
>> - /* Bindless surface state base address */
>> - OUT_BATCH(0 | BASE_ADDRESS_MODIFY);
>> - OUT_BATCH(0);
>> - OUT_BATCH(0xfffff000);
>> -}
>> -
>> -static void
>> -gen8_emit_vfe_state(struct intel_batchbuffer *batch)
>> -{
>> - OUT_BATCH(GEN8_MEDIA_VFE_STATE | (9 - 2));
>> -
>> - /* scratch buffer */
>> - OUT_BATCH(0);
>> - OUT_BATCH(0);
>> -
>> - /* number of threads & urb entries */
>> - OUT_BATCH(1 << 16 |
>> - 2 << 8);
>> -
>> - OUT_BATCH(0);
>> -
>> - /* urb entry size & curbe size */
>> - OUT_BATCH(2 << 16 |
>> - 2);
>> -
>> - /* scoreboard */
>> - OUT_BATCH(0);
>> - OUT_BATCH(0);
>> - OUT_BATCH(0);
>> -}
>> -
>> -static void
>> -gen8_emit_curbe_load(struct intel_batchbuffer *batch, uint32_t
>> curbe_buffer)
>> -{
>> - OUT_BATCH(GEN8_MEDIA_CURBE_LOAD | (4 - 2));
>> - OUT_BATCH(0);
>> - /* curbe total data length */
>> - OUT_BATCH(64);
>> - /* curbe data start address, is relative to the dynamics base
>> address */
>> - OUT_BATCH(curbe_buffer);
>> -}
>> -
>> -static void
>> -gen8_emit_interface_descriptor_load(struct intel_batchbuffer *batch,
>> uint32_t interface_descriptor)
>> -{
>> - OUT_BATCH(GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD | (4 - 2));
>> - OUT_BATCH(0);
>> - /* interface descriptor data length */
>> - OUT_BATCH(sizeof(struct gen8_interface_descriptor_data));
>> - /* interface descriptor address, is relative to the dynamics base
>> address */
>> - OUT_BATCH(interface_descriptor);
>> -}
>> -
>> -static void
>> -gen8_emit_media_state_flush(struct intel_batchbuffer *batch)
>> -{
>> - OUT_BATCH(GEN8_MEDIA_STATE_FLUSH | (2 - 2));
>> - OUT_BATCH(0);
>> -}
>> -
>> -static void
>> -gen8_emit_media_objects(struct intel_batchbuffer *batch,
>> - unsigned x, unsigned y,
>> - unsigned width, unsigned height)
>> -{
>> - int i, j;
>> -
>> - for (i = 0; i < width / 16; i++) {
>> - for (j = 0; j < height / 16; j++) {
>> - OUT_BATCH(GEN8_MEDIA_OBJECT | (8 - 2));
>> -
>> - /* interface descriptor offset */
>> - OUT_BATCH(0);
>> -
>> - /* without indirect data */
>> - OUT_BATCH(0);
>> - OUT_BATCH(0);
>> -
>> - /* scoreboard */
>> - OUT_BATCH(0);
>> - OUT_BATCH(0);
>> -
>> - /* inline data (xoffset, yoffset) */
>> - OUT_BATCH(x + i * 16);
>> - OUT_BATCH(y + j * 16);
>> - gen8_emit_media_state_flush(batch);
>> - }
>> - }
>> -}
>> -
>> /*
>> * This sets up the media pipeline,
>> *
>> @@ -353,8 +58,9 @@ gen9_media_fillfunc(struct intel_batchbuffer *batch,
>> /* setup states */
>> batch->ptr = &batch->buffer[BATCH_STATE_SPLIT];
>> - curbe_buffer = gen8_fill_curbe_buffer_data(batch, color);
>> - interface_descriptor = gen8_fill_interface_descriptor(batch, dst);
>> + curbe_buffer = gen7_fill_curbe_buffer_data(batch, color);
>> + interface_descriptor = gen8_fill_interface_descriptor(batch, dst,
>> + media_kernel, sizeof(media_kernel));
>> assert(batch->ptr < &batch->buffer[4095]);
>> /* media pipeline */
>> @@ -387,6 +93,6 @@ gen9_media_fillfunc(struct intel_batchbuffer *batch,
>> batch_end = batch_align(batch, 8);
>> assert(batch_end < BATCH_STATE_SPLIT);
>> - gen8_render_flush(batch, batch_end);
>> + gen7_render_flush(batch, batch_end);
>> intel_batchbuffer_reset(batch);
>> }
>> diff --git a/lib/meson.build b/lib/meson.build
>> index b3b8b14a..38c3f107 100644
>> --- a/lib/meson.build
>> +++ b/lib/meson.build
>> @@ -24,6 +24,7 @@ lib_sources = [
>> 'intel_os.c',
>> 'intel_mmio.c',
>> 'ioctl_wrappers.c',
>> + 'gpu_fill.c',
>> 'media_fill_gen7.c',
>> 'media_fill_gen8.c',
>> 'media_fill_gen8lp.c',
>>
> _______________________________________________
> igt-dev mailing list
> igt-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/igt-dev
More information about the igt-dev
mailing list