[Mesa-dev] [PATCH] radeonsi: don't reallocate on DMABUF export if local BOs are disabled

Thu Mar 15 20:24:28 UTC 2018

From: Marek Olšák <marek.olsak at amd.com>

---
 src/amd/common/ac_gpu_info.c              | 2 ++
 src/amd/common/ac_gpu_info.h              | 1 +
 src/gallium/drivers/radeon/r600_texture.c | 4 +++-
 src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 7 +++----
 4 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c
index 73b5da0..73fc362 100644
--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
@@ -306,20 +306,22 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
 		uvd.available_rings ? uvd_version : 0;
 	info->vce_fw_version =
 		vce.available_rings ? vce_version : 0;
 	info->uvd_enc_supported =
 		uvd_enc.available_rings ? true : false;
 	info->has_userptr = true;
 	info->has_syncobj = has_syncobj(fd);
 	info->has_syncobj_wait_for_submit = info->has_syncobj && info->drm_minor >= 20;
 	info->has_fence_to_handle = info->has_syncobj && info->drm_minor >= 21;
 	info->has_ctx_priority = info->drm_minor >= 22;
+	/* TODO: Enable this once the kernel handles it efficiently. */
+	/*info->has_local_buffers = ws->info.drm_minor >= 20;*/
 	info->num_render_backends = amdinfo->rb_pipes;
 	info->clock_crystal_freq = amdinfo->gpu_counter_freq;
 	if (!info->clock_crystal_freq) {
 		fprintf(stderr, "amdgpu: clock crystal frequency is 0, timestamps will be wrong\n");
 		info->clock_crystal_freq = 1;
 	}
 	info->tcc_cache_line_size = 64; /* TC L2 line size on GCN */
 	if (info->chip_class == GFX9) {
 		info->num_tile_pipes = 1 << G_0098F8_NUM_PIPES(amdinfo->gb_addr_cfg);
 		info->pipe_interleave_bytes =
diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h
index 34d91be..3f08b57 100644
--- a/src/amd/common/ac_gpu_info.h
+++ b/src/amd/common/ac_gpu_info.h
@@ -82,20 +82,21 @@ struct radeon_info {
 
 	/* Kernel info. */
 	uint32_t                    drm_major; /* version */
 	uint32_t                    drm_minor;
 	uint32_t                    drm_patchlevel;
 	bool                        has_userptr;
 	bool                        has_syncobj;
 	bool                        has_syncobj_wait_for_submit;
 	bool                        has_fence_to_handle;
 	bool                        has_ctx_priority;
+	bool                        has_local_buffers;
 
 	/* Shader cores. */
 	uint32_t                    r600_max_quad_pipes; /* wave size / 16 */
 	uint32_t                    max_shader_clock;
 	uint32_t                    num_good_compute_units;
 	uint32_t                    max_se; /* shader engines */
 	uint32_t                    max_sh_per_se; /* shader arrays per shader engine */
 
 	/* Render backends (color + depth blocks). */
 	uint32_t                    r300_num_gb_pipes;
diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
index 1c4e9b7..30d06de 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -690,20 +690,21 @@ static boolean r600_texture_get_handle(struct pipe_screen* screen,
 		/* This is not supported now, but it might be required for OpenCL
 		 * interop in the future.
 		 */
 		if (resource->nr_samples > 1 || rtex->is_depth)
 			return false;
 
 		/* Move a suballocated texture into a non-suballocated allocation. */
 		if (sscreen->ws->buffer_is_suballocated(res->buf) ||
 		    rtex->surface.tile_swizzle ||
 		    (rtex->resource.flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
+		     sscreen->info.has_local_buffers &&
 		     whandle->type != DRM_API_HANDLE_TYPE_KMS)) {
 			assert(!res->b.is_shared);
 			r600_reallocate_texture_inplace(rctx, rtex,
 							PIPE_BIND_SHARED, false);
 			flush = true;
 			assert(res->b.b.bind & PIPE_BIND_SHARED);
 			assert(res->flags & RADEON_FLAG_NO_SUBALLOC);
 			assert(!(res->flags & RADEON_FLAG_NO_INTERPROCESS_SHARING));
 			assert(rtex->surface.tile_swizzle == 0);
 		}
@@ -751,21 +752,22 @@ static boolean r600_texture_get_handle(struct pipe_screen* screen,
 			offset = rtex->surface.u.legacy.level[0].offset;
 			stride = rtex->surface.u.legacy.level[0].nblk_x *
 				 rtex->surface.bpe;
 			slice_size = (uint64_t)rtex->surface.u.legacy.level[0].slice_size_dw * 4;
 		}
 	} else {
 		/* Buffer exports are for the OpenCL interop. */
 		/* Move a suballocated buffer into a non-suballocated allocation. */
 		if (sscreen->ws->buffer_is_suballocated(res->buf) ||
 		    /* A DMABUF export always fails if the BO is local. */
-		    rtex->resource.flags & RADEON_FLAG_NO_INTERPROCESS_SHARING) {
+		    rtex->resource.flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
+		    sscreen->info.has_local_buffers) {
 			assert(!res->b.is_shared);
 
 			/* Allocate a new buffer with PIPE_BIND_SHARED. */
 			struct pipe_resource templ = res->b.b;
 			templ.bind |= PIPE_BIND_SHARED;
 
 			struct pipe_resource *newb =
 				screen->resource_create(screen, &templ);
 			if (!newb)
 				return false;
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
index 12d497d..7740b46 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
@@ -416,24 +416,23 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws,
     * DRM 3.6.0 has good BO move throttling, so we can allow VRAM-only
     * placements even with a low amount of stolen VRAM.
     */
    if (!ws->info.has_dedicated_vram && ws->info.drm_minor < 6)
       request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
 
    if (flags & RADEON_FLAG_NO_CPU_ACCESS)
       request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
    if (flags & RADEON_FLAG_GTT_WC)
       request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
-   /* TODO: Enable this once the kernel handles it efficiently. */
-   /*if (flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
-       ws->info.drm_minor >= 20)
-      request.flags |= AMDGPU_GEM_CREATE_VM_ALWAYS_VALID;*/
+   if (flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
+       ws->info.has_local_buffers)
+      request.flags |= AMDGPU_GEM_CREATE_VM_ALWAYS_VALID;
 
    r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle);
    if (r) {
       fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n");
       fprintf(stderr, "amdgpu:    size      : %"PRIu64" bytes\n", size);
       fprintf(stderr, "amdgpu:    alignment : %u bytes\n", alignment);
       fprintf(stderr, "amdgpu:    domains   : %u\n", initial_domain);
       goto error_bo_alloc;
    }
 
-- 
2.7.4