[igt-dev] [PATCH 02/10] lib/intel_batchbuffer: Add blitter copy using XY_SRC_COPY_BLT
Vanshidhar Konda
vanshidhar.r.konda at intel.com
Wed Dec 18 17:39:30 UTC 2019
On Wed, Dec 18, 2019 at 05:13:28PM +0200, Ville Syrjälä wrote:
>On Tue, Dec 17, 2019 at 09:59:17PM -0800, Vanshidhar Konda wrote:
>> Add a method that uses the XY_SRC_COPY_BLT instruction for copying
>> buffers using the blitter engine.
>>
>> Signed-off-by: Vanshidhar Konda <vanshidhar.r.konda at intel.com>
>> ---
>> lib/intel_batchbuffer.c | 183 ++++++++++++++++++++++++++++++++++++++++
>> lib/intel_batchbuffer.h | 21 +++++
>> 2 files changed, 204 insertions(+)
>>
>> diff --git a/lib/intel_batchbuffer.c b/lib/intel_batchbuffer.c
>> index 51aae4dc..1352aa95 100644
>> --- a/lib/intel_batchbuffer.c
>> +++ b/lib/intel_batchbuffer.c
>> @@ -46,6 +46,12 @@
>>
>> #include <i915_drm.h>
>>
>> +#define MI_FLUSH_DW (0x26 << 23)
>> +
>> +#define BCS_SWCTRL 0x22200
>> +#define BCS_SRC_Y (1 << 0)
>> +#define BCS_DST_Y (1 << 1)
>> +
>> /**
>> * SECTION:intel_batchbuffer
>> * @short_description: Batchbuffer and blitter support
>> @@ -661,6 +667,183 @@ static void exec_blit(int fd,
>> gem_execbuf(fd, &exec);
>> }
>>
>> +static uint32_t src_copy_dword0(uint32_t src_tiling, uint32_t dst_tiling,
>> + uint32_t bpp, uint32_t device_gen)
>> +{
>> + uint32_t dword0 = 0;
>> +
>> + dword0 |= XY_SRC_COPY_BLT_CMD;
>> + if (bpp == 32)
>> + dword0 |= XY_SRC_COPY_BLT_WRITE_RGB |
>> + XY_SRC_COPY_BLT_WRITE_ALPHA;
>> +
>> + if (device_gen >= 4 && src_tiling)
>> + dword0 |= XY_SRC_COPY_BLT_SRC_TILED;
>> + if (device_gen >= 4 && dst_tiling)
>> + dword0 |= XY_SRC_COPY_BLT_DST_TILED;
>> +
>> + return dword0;
>> +}
>> +
>> +static uint32_t src_copy_dword1(uint32_t dst_pitch, uint32_t bpp)
>> +{
>> + uint32_t dword1 = 0;
>> +
>> + switch (bpp) {
>> + case 8:
>> + break;
>> + case 16:
>> + dword1 |= (1 << 24); /* Only support 565 color */
>> + break;
>> + case 32:
>> + dword1 |= (3 << 24);
>> + break;
>> + default:
>> + igt_assert(0);
>> + }
>> +
>> + dword1 |= 0xcc << 16;
>> + dword1 |= dst_pitch;
>> +
>> + return dword1;
>> +}
>> +/**
>> + * igt_blitter_src_copy__raw:
>> + * @fd: file descriptor of the i915 driver
>> + * @src_handle: GEM handle of the source buffer
>> + * @src_delta: offset into the source GEM bo, in bytes
>> + * @src_stride: Stride (in bytes) of the source buffer
>> + * @src_tiling: Tiling mode of the source buffer
>> + * @src_x: X coordinate of the source region to copy
>> + * @src_y: Y coordinate of the source region to copy
>> + * @width: Width of the region to copy
>> + * @height: Height of the region to copy
>> + * @bpp: source and destination bits per pixel
>> + * @dst_handle: GEM handle of the destination buffer
>> + * @dst_delta: offset into the destination GEM bo, in bytes
>> + * @dst_stride: Stride (in bytes) of the destination buffer
>> + * @dst_tiling: Tiling mode of the destination buffer
>> + * @dst_x: X coordinate of destination
>> + * @dst_y: Y coordinate of destination
>> + *
>> + */
>> +void igt_blitter_src_copy__raw(int fd,
>> + /* src */
>> + uint32_t src_handle,
>> + unsigned int src_delta,
>> + unsigned int src_stride,
>> + unsigned int src_tiling,
>> + unsigned int src_x, unsigned src_y,
>
>Inconsistent unsigned int vs. unsigned
I'll fix this in the next version.
>
>> +
>> + /* size */
>> + unsigned int width, unsigned int height,
>> +
>> + /* bpp */
>> + int bpp,
>> +
>> + /* dst */
>> + uint32_t dst_handle,
>> + unsigned dst_delta,
>> + unsigned int dst_stride,
>> + unsigned int dst_tiling,
>> + unsigned int dst_x, unsigned dst_y)
>> +{
>> + uint32_t batch[32];
>> + struct drm_i915_gem_exec_object2 objs[3];
>> + struct drm_i915_gem_relocation_entry relocs[2];
>> + uint32_t batch_handle;
>> + uint32_t src_pitch, dst_pitch;
>> + uint32_t dst_reloc_offset, src_reloc_offset;
>> + int i = 0;
>> + uint32_t gen = intel_gen(intel_get_drm_devid(fd));
>> + const bool has_64b_reloc = gen >= 8;
>> +
>> + memset(batch, 0, sizeof(batch));
>> +
>> + igt_assert((src_tiling == I915_TILING_NONE) ||
>> + (src_tiling == I915_TILING_X) ||
>> + (src_tiling == I915_TILING_Y));
>> + igt_assert((dst_tiling == I915_TILING_NONE) ||
>> + (dst_tiling == I915_TILING_X) ||
>> + (dst_tiling == I915_TILING_Y));
>> +
>> + src_pitch = fast_copy_pitch(src_stride, src_tiling);
>> + dst_pitch = fast_copy_pitch(dst_stride, dst_tiling);
>
>I believe those do the wrong thing for pre-gen4 tiling.
The implementation that I've added is based on the latest one in
gem_blits.c. The logic I added is consistent with all the
implementations in IGT that are setting tiling for XY_SRC_COPY_BLT.
Not being aware of the history of the hardware I wanted to keep it
inline with what I found.
Would adding an assert to use this only on >= gen 4 be the right
approach?
>
>I also wonder how many implementations of this we already have in igt.
>I suspect the answer is more than two. Maybe there's a way to reduce the
>duplication a bit?
>
There are at least 13 implementations that I found. From what I can tell
there are two different forms - one using gem object based batch buffer
and another using libdrm batchbuffer and macros associated with that.
Merging them should be another patch series I think.
Vanshi
>> +
>> + CHECK_RANGE(src_x); CHECK_RANGE(src_y);
>> + CHECK_RANGE(dst_x); CHECK_RANGE(dst_y);
>> + CHECK_RANGE(width); CHECK_RANGE(height);
>> + CHECK_RANGE(src_x + width); CHECK_RANGE(src_y + height);
>> + CHECK_RANGE(dst_x + width); CHECK_RANGE(dst_y + height);
>> + CHECK_RANGE(src_pitch); CHECK_RANGE(dst_pitch);
>> +
>> + if ((src_tiling | dst_tiling) >= I915_TILING_Y) {
>> + unsigned int mask;
>> +
>> + batch[i++] = MI_LOAD_REGISTER_IMM;
>> + batch[i++] = BCS_SWCTRL;
>> +
>> + mask = (BCS_SRC_Y | BCS_DST_Y) << 16;
>> + if (src_tiling == I915_TILING_Y)
>> + mask |= BCS_SRC_Y;
>> + if (dst_tiling == I915_TILING_Y)
>> + mask |= BCS_DST_Y;
>> + batch[i++] = mask;
>> + }
>> +
>> + batch[i] = src_copy_dword0(src_tiling, dst_tiling, bpp, gen);
>> + batch[i++] |= 6 + 2 * has_64b_reloc;
>> + batch[i++] = src_copy_dword1(dst_pitch, bpp);
>> + batch[i++] = (dst_y << 16) | dst_x; /* dst x1,y1 */
>> + batch[i++] = ((dst_y + height) << 16) | (dst_x + width); /* dst x2,y2 */
>> + dst_reloc_offset = i;
>> + batch[i++] = dst_delta; /* dst address lower bits */
>> + batch[i++] = 0; /* dst address upper bits */
>> + batch[i++] = (src_y << 16) | src_x; /* src x1,y1 */
>> + batch[i++] = src_pitch;
>> + src_reloc_offset = i;
>> + batch[i++] = src_delta; /* src address lower bits */
>> + batch[i++] = 0; /* src address upper bits */
>> +
>> + if ((src_tiling | dst_tiling) >= I915_TILING_Y) {
>> + igt_assert(gen >= 6);
>> + batch[i++] = MI_FLUSH_DW | 2;
>> + batch[i++] = 0;
>> + batch[i++] = 0;
>> + batch[i++] = 0;
>> +
>> + batch[i++] = MI_LOAD_REGISTER_IMM;
>> + batch[i++] = BCS_SWCTRL;
>> + batch[i++] = (BCS_SRC_Y | BCS_DST_Y) << 16;
>> + }
>> +
>> + batch[i++] = MI_BATCH_BUFFER_END;
>> + batch[i++] = MI_NOOP;
>> +
>> + igt_assert(i <= ARRAY_SIZE(batch));
>> +
>> + batch_handle = gem_create(fd, 4096);
>> + gem_write(fd, batch_handle, 0, batch, sizeof(batch));
>> +
>> + fill_relocation(&relocs[0], dst_handle, dst_delta, dst_reloc_offset,
>> + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
>> + fill_relocation(&relocs[1], src_handle, src_delta, src_reloc_offset,
>> + I915_GEM_DOMAIN_RENDER, 0);
>> +
>> + fill_object(&objs[0], dst_handle, NULL, 0);
>> + fill_object(&objs[1], src_handle, NULL, 0);
>> + fill_object(&objs[2], batch_handle, relocs, 2);
>> +
>> + if (dst_tiling)
>> + objs[0].flags |= EXEC_OBJECT_NEEDS_FENCE;
>> + if (src_tiling)
>> + objs[1].flags |= EXEC_OBJECT_NEEDS_FENCE;
>> +
>> + exec_blit(fd, objs, 3, ARRAY_SIZE(batch));
>> +
>> + gem_close(fd, batch_handle);
>> +}
>> +
>> /**
>> * igt_blitter_fast_copy__raw:
>> * @fd: file descriptor of the i915 driver
>> diff --git a/lib/intel_batchbuffer.h b/lib/intel_batchbuffer.h
>> index 37e3affe..4820cebb 100644
>> --- a/lib/intel_batchbuffer.h
>> +++ b/lib/intel_batchbuffer.h
>> @@ -252,6 +252,27 @@ struct igt_buf {
>> unsigned igt_buf_width(const struct igt_buf *buf);
>> unsigned igt_buf_height(const struct igt_buf *buf);
>>
>> +void igt_blitter_src_copy__raw(int fd,
>> + /* src */
>> + uint32_t src_handle,
>> + unsigned int src_delta,
>> + unsigned int src_stride,
>> + unsigned int src_tiling,
>> + unsigned int src_x, unsigned src_y,
>> +
>> + /* size */
>> + unsigned int width, unsigned int height,
>> +
>> + /* bpp */
>> + int bpp,
>> +
>> + /* dst */
>> + uint32_t dst_handle,
>> + unsigned int dst_delta,
>> + unsigned int dst_stride,
>> + unsigned int dst_tiling,
>> + unsigned int dst_x, unsigned dst_y);
>> +
>> void igt_blitter_fast_copy(struct intel_batchbuffer *batch,
>> const struct igt_buf *src, unsigned src_delta,
>> unsigned src_x, unsigned src_y,
>> --
>> 2.24.0
>>
>> _______________________________________________
>> igt-dev mailing list
>> igt-dev at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/igt-dev
>
>--
>Ville Syrjälä
>Intel
More information about the igt-dev
mailing list