[PATCH] drm/amdgpu: use sdma to optimize debugged non-visible device memory access

Thu Dec 2 21:43:04 UTC 2021

To support better memory access performance on non-Large BAR devices, use
SDMA copies instead of MM access.

SDMA access is restricted to PAGE_SIZE'd access to account for the PTRACED
process memory r/w operation use case.  Any other access size will use
MMIO.

Failure to do an SDMA copy will result in a fallback to MM access.

Note: This is an attempt readdress patch request
'drm/amdgpu: extend ttm memory access to do sdma copies'
with the addition of restrictions and fallbacks.

Signed-off-by: Jonathan Kim <jonathan.kim at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 97 +++++++++++++++++++++++++
 1 file changed, 97 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 23fc57506a20..1cb984252f58 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1741,6 +1741,91 @@ static void amdgpu_ttm_vram_mm_access(struct amdgpu_device *adev, loff_t pos,
 	}
 }
 
+/**
+ * amdgpu_ttm_access_memory_page_sdma - Read/write page of memory that backs a buffer object.
+ *
+ * @bo:  The buffer object to read/write
+ * @offset:  Offset into buffer object
+ * @buf:  Secondary buffer to write/read from
+ * @write:  true if writing
+ *
+ * This is used to access a page of VRAM that backs a buffer object via SDMA
+ * access for debugging purposes.
+ */
+static int amdgpu_ttm_access_memory_page_sdma(struct ttm_buffer_object *bo,
+					unsigned long offset, void *buf,
+					int write)
+{
+	struct amdgpu_bo *dst_bo, *abo = ttm_to_amdgpu_bo(bo);
+	struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
+	struct ttm_operation_ctx ctx = {.interruptible = true};
+	struct amdgpu_copy_mem src, dst;
+	struct drm_gem_object *gobj;
+	struct dma_fence *fence;
+	struct page *dst_page;
+	struct ttm_tt *dst_ttm;
+	int ret;
+
+	/* Create an SG BO to dma map the target buffer for direct copy. */
+	ret = amdgpu_gem_object_create(adev, PAGE_SIZE, PAGE_SIZE, AMDGPU_GEM_DOMAIN_CPU,
+				0, ttm_bo_type_sg, NULL, &gobj);
+	if (ret)
+		return ret;
+
+	dst_bo = gem_to_amdgpu_bo(gobj);
+	dst_ttm = dst_bo->tbo.ttm;
+	dst_ttm->sg = kmalloc(sizeof(*dst_ttm->sg), GFP_KERNEL);
+	if (unlikely(!dst_ttm->sg)) {
+		ret = -ENOMEM;
+		goto free_bo;
+	}
+
+	dst_page = virt_to_page(buf);
+	ret = sg_alloc_table_from_pages(dst_ttm->sg, &dst_page, 1, 0,
+					1 << PAGE_SHIFT, GFP_KERNEL);
+	if (unlikely(ret))
+		goto free_sg;
+
+	ret = dma_map_sgtable(adev->dev, dst_ttm->sg, DMA_BIDIRECTIONAL, 0);
+	if (unlikely(ret))
+		goto release_sg;
+
+	drm_prime_sg_to_dma_addr_array(dst_ttm->sg, dst_ttm->dma_address, 1);
+
+	amdgpu_bo_placement_from_domain(dst_bo, AMDGPU_GEM_DOMAIN_GTT);
+	ret = ttm_bo_validate(&dst_bo->tbo, &dst_bo->placement, &ctx);
+	if (ret)
+		goto unmap_sg;
+
+	src.mem = bo->resource;
+	src.offset = offset;
+	dst.bo = &dst_bo->tbo;
+	dst.mem = dst.bo->resource;
+	dst.offset = 0;
+
+	/* Do the direct copy and wait for fence response. */
+	ret = amdgpu_ttm_copy_mem_to_mem(adev, write ? &dst : &src, write ? &src : &dst,
+					1 << PAGE_SHIFT, amdgpu_bo_encrypted(abo),
+					bo->base.resv, &fence);
+	if (!ret && fence) {
+		if (!dma_fence_wait_timeout(fence, false, adev->sdma_timeout))
+			ret = -ETIMEDOUT;
+
+		dma_fence_put(fence);
+	}
+
+unmap_sg:
+	dma_unmap_sgtable(adev->dev, dst_ttm->sg, DMA_BIDIRECTIONAL, 0);
+release_sg:
+	sg_free_table(dst_ttm->sg);
+free_sg:
+	kfree(dst_ttm->sg);
+	dst_ttm->sg = NULL;
+free_bo:
+	gobj->funcs->free(gobj);
+	return ret;
+}
+
 /**
  * amdgpu_ttm_access_memory - Read or Write memory that backs a buffer object.
  *
@@ -1765,7 +1850,19 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
 	if (bo->resource->mem_type != TTM_PL_VRAM)
 		return -EIO;
 
+	/*
+	 * Attempt SDMA access over non-visible VRAM first.
+	 * On failure, fall back to MMIO access.
+	 *
+	 * Restrict this to PAGE_SIZE access for PTRACED memory operations.
+	 * Any other access size should use MM access.
+	 */
 	amdgpu_res_first(bo->resource, offset, len, &cursor);
+	if (adev->gmc.visible_vram_size < cursor.start + len && len == PAGE_SIZE &&
+			!amdgpu_in_reset(adev) &&
+				!amdgpu_ttm_access_memory_page_sdma(bo, offset, buf, write))
+		return len;
+
 	while (cursor.remaining) {
 		size_t count, size = cursor.size;
 		loff_t pos = cursor.start;
-- 
2.25.1