[igt-dev] [PATCH i-g-t v3 19/22] tests/prime_nv_pcopy.c: Remove libdrm dependency

Mon Sep 28 06:11:57 UTC 2020

+Daniel

Daniel: For nv should we go toward hybrid solution? I mean for nv
we use libdrm and for intel we remove it? From my perspective 
libdrm removal for intel is only to cover lmem regions when playing
with tiling/buffer sharing. 
Problem I see now is I don't have appropriate hw combination to 
test and review such changes. What is your suggestion?  

On Fri, Sep 18, 2020 at 12:58:53PM +0200, Dominik Grzegorzek wrote:
> Use intel_bb / intel_buf to remove libdrm dependency.
> 
> Signed-off-by: Dominik Grzegorzek <dominik.grzegorzek at intel.com>
> Cc: Zbigniew Kempczyński <zbigniew.kempczynski at intel.com>
> Cc: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>  tests/prime_nv_pcopy.c | 103 ++++++++++++++++++++---------------------
>  1 file changed, 50 insertions(+), 53 deletions(-)
> 
> diff --git a/tests/prime_nv_pcopy.c b/tests/prime_nv_pcopy.c
> index bbcf7680..15d79975 100644
> --- a/tests/prime_nv_pcopy.c
> +++ b/tests/prime_nv_pcopy.c
> @@ -25,15 +25,13 @@
>  #include <sys/ioctl.h>
>  #include <errno.h>
>  
> -#include "intel_bufmgr.h"
>  #include "nouveau.h"
>  
>  static int intel_fd = -1, nouveau_fd = -1;
> -static drm_intel_bufmgr *bufmgr;
> +static struct buf_ops *bops;
>  static struct nouveau_device *ndev;
>  static struct nouveau_client *nclient;
> -static uint32_t devid;
> -static struct intel_batchbuffer *batch;
> +static struct intel_bb *ibb;
>  static struct nouveau_object *nchannel, *pcopy;
>  static struct nouveau_bufctx *nbufctx;
>  static struct nouveau_pushbuf *npush;
> @@ -161,16 +159,15 @@ BEGIN_NVXX(struct nouveau_pushbuf *push, int subc, int mthd, int size)
>  }
>  
>  static void
> -noop_intel(drm_intel_bo *bo)
> +noop_intel(struct intel_buf *buf)
>  {
> -	BEGIN_BATCH(3, 1);
> -	OUT_BATCH(MI_NOOP);
> -	OUT_BATCH(MI_BATCH_BUFFER_END);
> -	OUT_RELOC(bo, I915_GEM_DOMAIN_RENDER,
> -			I915_GEM_DOMAIN_RENDER, 0);
> -	ADVANCE_BATCH();
> -
> -	intel_batchbuffer_flush(batch);
> +	intel_bb_out(ibb, MI_NOOP);
> +	intel_bb_out(ibb, MI_BATCH_BUFFER_END);
> +	intel_bb_emit_reloc(ibb, buf->handle, I915_GEM_DOMAIN_RENDER,
> +			    I915_GEM_DOMAIN_RENDER, 0, buf->addr.offset);
> +
> +	intel_bb_flush_blit(ibb);
> +	intel_bb_sync(ibb);

We don't need sync here.

>  }
>  
>  static void find_and_open_devices(void)
> @@ -542,24 +539,25 @@ static void test1_micro(void)
>  {
>  	struct nouveau_bo *bo_intel = NULL, *bo_nvidia = NULL, *bo_linear = NULL;
>  	rect intel, nvidia, linear;
> -	uint32_t tiling = I915_TILING_Y;
>  
>  	uint32_t src_x = 0, src_y = 0;
>  	uint32_t dst_x = 0, dst_y = 0;
>  	uint32_t x, y, w = 256, h = 64;
> +	uint8_t *test_intel_buf_ptr;
>  
> -	drm_intel_bo *test_intel_bo;
> +	struct intel_buf *test_intel_buf;
>  	int prime_fd;
>  
> -	test_intel_bo = drm_intel_bo_alloc(bufmgr, "test bo", w * h, 4096);
> -	igt_assert(test_intel_bo);
> -	drm_intel_bo_set_tiling(test_intel_bo, &tiling, w);
> -	igt_assert(tiling == I915_TILING_Y);
> -	igt_assert(drm_intel_gem_bo_map_gtt(test_intel_bo) == 0);
> +	test_intel_buf = intel_buf_create(bops, w, h, 8, 4096,
> +					  I915_TILING_Y, I915_COMPRESSION_NONE);

Use 0 instead 4096. 

> +
> +	test_intel_buf_ptr = gem_mmap__gtt(intel_fd, test_intel_buf->handle,
> +					    test_intel_buf->surface[0].size,
> +					    PROT_READ | PROT_WRITE);
>  
> -	drm_intel_bo_gem_export_to_prime(test_intel_bo, &prime_fd);
> +	prime_fd = prime_handle_to_fd(intel_fd, test_intel_buf->handle);

prime_handle_to_fd_for_mmap(). 

>  	igt_assert_lte(0, prime_fd);
> -	noop_intel(test_intel_bo);
> +	noop_intel(test_intel_buf);
>  
>  	nv_bo_alloc(&bo_intel, &intel, w, h, tile_intel_y, prime_fd, 0);
>  	nv_bo_alloc(&bo_nvidia, &nvidia, w, h, 0x10, -1, NOUVEAU_BO_VRAM);
> @@ -584,15 +582,16 @@ static void test1_micro(void)
>  	if (pcopy)
>  		perform_copy(bo_intel, &intel, dst_x, dst_y, bo_nvidia, &nvidia, src_x, src_y, w, h);
>  	else
> -		swtile_y(test_intel_bo->virtual, bo_linear->map, w, h);
> +		swtile_y(test_intel_buf_ptr, bo_linear->map, w, h);

I'm really confused here. For !pcopy bo_nvidia is not used. So in that case
we just use previously filled bo_linear to set the map, then software copy
to test_intel_buf_ptr and check the same mapped buffer so I see no real
prime usage. Unfortunatelly I have no such hw to play with that. As we're
using gtt mapping we're slightly limited to gens without mapable gtt. 
To tile/detile on newer gens you could use intel_buf_to_linear/linear_to_intel_buf,
it wraps appropriate mapping or just do software tiling. If Daniel will said
we have to do hybrid solution we should remove gtt mapping.

--
Zbigniew

>  
> -	noop_intel(test_intel_bo);
> -	check1_micro(test_intel_bo->virtual, intel.pitch, intel.h, dst_x, dst_y, w, h);
> +	noop_intel(test_intel_buf);
> +	check1_micro(test_intel_buf_ptr, intel.pitch, intel.h, dst_x, dst_y, w, h);
>  
>  	nouveau_bo_ref(NULL, &bo_linear);
>  	nouveau_bo_ref(NULL, &bo_nvidia);
>  	nouveau_bo_ref(NULL, &bo_intel);
> -	drm_intel_bo_unreference(test_intel_bo);
> +	gem_munmap(test_intel_buf_ptr, test_intel_buf->surface[0].size);
> +	intel_buf_destroy(test_intel_buf);
>  }
>  
>  /* test 2, see if we can copy from linear to intel X format safely
> @@ -685,43 +684,44 @@ static void test3_base(int tile_src, int tile_dst)
>  	uint32_t dst_x = 2 * cpp, dst_y = 26;
>  	uint32_t w = 298 * cpp, h = 298;
>  
> -	drm_intel_bo *test_intel_bo;
> +	struct intel_buf *test_intel_buf;
>  	int prime_fd;
>  
> -	test_intel_bo = drm_intel_bo_alloc(bufmgr, "test bo", 2048 * cpp * 768, 4096);
> -	igt_assert(test_intel_bo);
> +	test_intel_buf = intel_buf_create(bops, 2048, 768, cpp * 8, 4096,
> +					  I915_TILING_NONE,
> +					  I915_COMPRESSION_NONE);
>  
> -	drm_intel_bo_gem_export_to_prime(test_intel_bo, &prime_fd);
> +	prime_fd = prime_handle_to_fd(intel_fd, test_intel_buf->handle);
>  	igt_assert_lte(0, prime_fd);
>  
>  	nv_bo_alloc(&bo_intel, &intel, 2048 * cpp, 768, tile_dst, prime_fd, 0);
>  	nv_bo_alloc(&bo_nvidia, &nvidia, 300 * cpp, 300, tile_src, -1, NOUVEAU_BO_VRAM);
>  	nv_bo_alloc(&bo_linear, &linear, 2048 * cpp, 768, 0, -1, NOUVEAU_BO_GART);
>  
> -	noop_intel(test_intel_bo);
> +	noop_intel(test_intel_buf);
>  	memset(bo_linear->map, 0x80, bo_linear->size);
>  	perform_copy(bo_intel, &intel, 0, 0, bo_linear, &linear, 0, 0, linear.pitch, linear.h);
> -	noop_intel(test_intel_bo);
> +	noop_intel(test_intel_buf);
>  
>  	memset(bo_linear->map, 0x04, bo_linear->size);
>  	perform_copy(bo_nvidia, &nvidia, 0, 0, bo_linear, &linear, 0, 0, nvidia.pitch, nvidia.h);
>  
>  	/* Perform the actual sub rectangle copy */
> -	noop_intel(test_intel_bo);
> +	noop_intel(test_intel_buf);
>  	perform_copy(bo_intel, &intel, dst_x, dst_y, bo_nvidia, &nvidia, src_x, src_y, w, h);
> -	noop_intel(test_intel_bo);
> +	noop_intel(test_intel_buf);
>  
>  	memset(bo_linear->map, 0x3, bo_linear->size);
> -	noop_intel(test_intel_bo);
> +	noop_intel(test_intel_buf);
>  	perform_copy(bo_linear, &linear, 0, 0, bo_intel, &intel, 0, 0, intel.pitch, intel.h);
> -	noop_intel(test_intel_bo);
> +	noop_intel(test_intel_buf);
>  
>  	check3(bo_linear->map, linear.pitch, linear.h, dst_x, dst_y, w, h);
>  
>  	nouveau_bo_ref(NULL, &bo_linear);
>  	nouveau_bo_ref(NULL, &bo_nvidia);
>  	nouveau_bo_ref(NULL, &bo_intel);
> -	drm_intel_bo_unreference(test_intel_bo);
> +	intel_buf_destroy(test_intel_buf);
>  }
>  
>  static void test3_1(void)
> @@ -767,7 +767,7 @@ static void test3_5(void)
>  /* Test only new style semaphores, old ones are AWFUL */
>  static void test_semaphore(void)
>  {
> -	drm_intel_bo *test_intel_bo = NULL;
> +	struct intel_buf *test_intel_buf = NULL;
>  	struct nouveau_bo *sema_bo = NULL;
>  	int prime_fd;
>  	uint32_t *sema;
> @@ -776,16 +776,17 @@ static void test_semaphore(void)
>  	igt_skip_on(ndev->chipset < 0x84);
>  
>  	/* Should probably be kept in sysmem */
> -	test_intel_bo = drm_intel_bo_alloc(bufmgr, "semaphore bo", 4096, 4096);
> -	igt_assert(test_intel_bo);
> +	test_intel_buf = intel_buf_create(bops, 32, 32, 32, 4096,
> +					  I915_TILING_NONE,
> +					  I915_COMPRESSION_NONE);
>  
> -	drm_intel_bo_gem_export_to_prime(test_intel_bo, &prime_fd);
> +	prime_fd = prime_handle_to_fd(intel_fd, test_intel_buf->handle);
>  	igt_assert_lte(0, prime_fd);
>  	igt_assert(nouveau_bo_prime_handle_ref(ndev, prime_fd, &sema_bo) == 0);
>  	close(prime_fd);
>  
> -	igt_assert(drm_intel_gem_bo_map_gtt(test_intel_bo) == 0);
> -	sema = test_intel_bo->virtual;
> +	sema = gem_mmap__gtt(intel_fd, test_intel_buf->handle,
> +			     test_intel_buf->surface[0].size, PROT_WRITE);
>  	sema++;
>  	*sema = 0;
>  
> @@ -845,7 +846,8 @@ static void test_semaphore(void)
>  	igt_assert(*sema == 9);
>  
>  	nouveau_bo_ref(NULL, &sema_bo);
> -	drm_intel_bo_unreference(test_intel_bo);
> +	gem_munmap(sema, test_intel_buf->surface[0].size);
> +	intel_buf_destroy(test_intel_buf);
>  }
>  
>  igt_main
> @@ -857,18 +859,13 @@ igt_main
>  		igt_require(intel_fd != -1);
>  
>  		/* set up intel bufmgr */
> -		bufmgr = drm_intel_bufmgr_gem_init(intel_fd, 4096);
> -		igt_assert(bufmgr);
> -		/* Do not enable reuse, we share (almost) all buffers. */
> -		//drm_intel_bufmgr_gem_enable_reuse(bufmgr);
> +		bops = buf_ops_create(intel_fd);
>  
>  		/* set up nouveau bufmgr */
>  		init_nouveau();
>  
>  		/* set up an intel batch buffer */
> -		devid = intel_get_drm_devid(intel_fd);
> -		batch = intel_batchbuffer_alloc(bufmgr, devid);
> -		igt_assert(batch);
> +		ibb = intel_bb_create(intel_fd, 4096);
>  	}
>  
>  #define xtest(x, args...) \
> @@ -893,11 +890,11 @@ igt_main
>  		nouveau_pushbuf_del(&npush);
>  		nouveau_object_del(&nchannel);
>  
> -		intel_batchbuffer_free(batch);
> +		intel_bb_destroy(ibb);
>  
>  		nouveau_client_del(&nclient);
>  		nouveau_device_del(&ndev);
> -		drm_intel_bufmgr_destroy(bufmgr);
> +		buf_ops_destroy(bops);
>  
>  		close(intel_fd);
>  		close(nouveau_fd);
> -- 
> 2.20.1
>