<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
</head>
<body text="#000000" bgcolor="#FFFFFF">
<div class="moz-cite-prefix">Am 08.09.2017 um 00:17 schrieb Samuel
Li:<br>
</div>
<blockquote type="cite"
cite="mid:d454ab74-2254-49c2-7dab-063e4f6cc770@amd.com">
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<div class="moz-cite-prefix">>>+ if (write) <br>
>>+ amdgpu_asic_invalidate_hdp(adev); <br>
> You can drop the HDP invalidation here.
amdgpu_gem_prime_pin() will pin the BO into system memory. <br>
<br>
My understanding here is between CPU and GPU access, the
invalidate/flush is still needed, since amdgpu_gem_prime_pin()
does not do that.<br>
</div>
</blockquote>
<br>
That understanding is incorrect. The HDP is a small read/write cache
between the CPU and the VRAM.<br>
<br>
So an HDP invalidate/flush is always needed when you want to access
VRAM (local memory), since amdgpu_gem_prime_pin() pins the memory
into system memory that is not necessary.<br>
<br>
<blockquote type="cite"
cite="mid:d454ab74-2254-49c2-7dab-063e4f6cc770@amd.com">
<div class="moz-cite-prefix"> For this one, I might need to move
the invalidate() before pin(). Thoughts?<br>
<br>
> BTW: You mixed up flush and invalidate. If we need to
implement CPU access to VRAM you need to flush when the cpu
access starts and invalidate when it ends. <br>
As per the doc I forwarded to you (HDP coherency check), I think
the code here is correct.<br>
</div>
</blockquote>
<br>
That might be correct. Need to double check the HDP documentation
once more.<br>
<br>
Anyway as long as we pin to system memory the whole HDP handling
here is unnecessary anyway, so please remove that.<br>
<br>
Regards,<br>
Christian.<br>
<br>
<blockquote type="cite"
cite="mid:d454ab74-2254-49c2-7dab-063e4f6cc770@amd.com">
<div class="moz-cite-prefix"> <br>
Regards,<br>
Sam<br>
<br>
<br>
<br>
<br>
On 2017-09-07 03:02 AM, Christian König wrote:<br>
</div>
Am 07.09.2017 um 00:07 schrieb Samuel Li: <br>
<blockquote type="cite">Signed-off-by: Samuel Li <a
class="moz-txt-link-rfc2396E" href="mailto:Samuel.Li@amd.com"
moz-do-not-send="true"><Samuel.Li@amd.com></a> <br>
--- <br>
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 + <br>
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 +- <br>
drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | 95
++++++++++++++++++++++++++++++- <br>
3 files changed, 99 insertions(+), 2 deletions(-) <br>
<br>
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h <br>
index 6f262ce..3c43acf 100644 <br>
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h <br>
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h <br>
@@ -395,11 +395,14 @@ amdgpu_gem_prime_import_sg_table(struct
drm_device *dev, <br>
struct dma_buf *amdgpu_gem_prime_export(struct drm_device
*dev, <br>
struct drm_gem_object *gobj, <br>
int flags); <br>
+struct drm_gem_object *amdgpu_gem_prime_import(struct
drm_device *dev, <br>
+ struct dma_buf *dma_buf); <br>
int amdgpu_gem_prime_pin(struct drm_gem_object *obj); <br>
void amdgpu_gem_prime_unpin(struct drm_gem_object *obj); <br>
struct reservation_object *amdgpu_gem_prime_res_obj(struct
drm_gem_object *); <br>
void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj); <br>
void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void
*vaddr); <br>
+int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct
vm_area_struct *vma); <br>
int amdgpu_gem_debugfs_init(struct amdgpu_device *adev); <br>
/* sub-allocation manager, it has to be protected by another
lock. <br>
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c <br>
index 2cdf844..9b63ac5 100644 <br>
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c <br>
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c <br>
@@ -835,7 +835,7 @@ static struct drm_driver kms_driver = { <br>
.prime_handle_to_fd = drm_gem_prime_handle_to_fd, <br>
.prime_fd_to_handle = drm_gem_prime_fd_to_handle, <br>
.gem_prime_export = amdgpu_gem_prime_export, <br>
- .gem_prime_import = drm_gem_prime_import, <br>
+ .gem_prime_import = amdgpu_gem_prime_import, <br>
.gem_prime_pin = amdgpu_gem_prime_pin, <br>
.gem_prime_unpin = amdgpu_gem_prime_unpin, <br>
.gem_prime_res_obj = amdgpu_gem_prime_res_obj, <br>
@@ -843,6 +843,7 @@ static struct drm_driver kms_driver = { <br>
.gem_prime_import_sg_table =
amdgpu_gem_prime_import_sg_table, <br>
.gem_prime_vmap = amdgpu_gem_prime_vmap, <br>
.gem_prime_vunmap = amdgpu_gem_prime_vunmap, <br>
+ .gem_prime_mmap = amdgpu_gem_prime_mmap, <br>
.name = DRIVER_NAME, <br>
.desc = DRIVER_DESC, <br>
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c <br>
index 5b3f928..629a654 100644 <br>
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c <br>
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c <br>
@@ -57,6 +57,42 @@ void amdgpu_gem_prime_vunmap(struct
drm_gem_object *obj, void *vaddr) <br>
ttm_bo_kunmap(&bo->dma_buf_vmap); <br>
} <br>
+int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct
vm_area_struct *vma) <br>
+{ <br>
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); <br>
+ struct amdgpu_device *adev =
amdgpu_ttm_adev(bo->tbo.bdev); <br>
+ unsigned asize = amdgpu_bo_size(bo); <br>
+ int ret; <br>
+ <br>
+ if (!obj->filp || !vma->vm_file) <br>
+ return -ENODEV; <br>
+ <br>
+ if (adev == NULL) <br>
+ return -ENODEV; <br>
+ <br>
+ /* Check for valid size. */ <br>
+ if (asize < vma->vm_end - vma->vm_start) <br>
+ return -EINVAL; <br>
+ <br>
+ if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) || <br>
+ (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)) {
<br>
+ return -EPERM; <br>
+ } <br>
+ vma->vm_pgoff = amdgpu_bo_mmap_offset(bo) >>
PAGE_SHIFT; <br>
+ <br>
+ /* prime mmap does not need to check access, so allow here
*/ <br>
+ ret = drm_vma_node_allow(&obj->vma_node,
vma->vm_file->private_data); <br>
+ if (ret) <br>
+ return ret; <br>
+ <br>
+ ret = ttm_bo_mmap(vma->vm_file, vma,
&adev->mman.bdev); <br>
+ drm_vma_node_revoke(&obj->vma_node,
vma->vm_file->private_data); <br>
+ if (ret) <br>
+ return ret; <br>
+ <br>
+ return 0; <br>
+} <br>
+ <br>
struct drm_gem_object * <br>
amdgpu_gem_prime_import_sg_table(struct drm_device *dev, <br>
struct dma_buf_attachment *attach, <br>
@@ -130,14 +166,71 @@ struct reservation_object
*amdgpu_gem_prime_res_obj(struct drm_gem_object *obj) <br>
return bo->tbo.resv; <br>
} <br>
+static int amdgpu_gem_begin_cpu_access(struct dma_buf
*dma_buf, enum dma_data_direction direction) <br>
+{ <br>
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv);
<br>
+ struct amdgpu_device *adev =
amdgpu_ttm_adev(bo->tbo.bdev); <br>
+ bool write = (direction == DMA_BIDIRECTIONAL || direction
== DMA_TO_DEVICE); <br>
+ int ret; <br>
+ <br>
+ ret = amdgpu_gem_prime_pin(dma_buf->priv); <br>
+ if (ret) <br>
+ return ret; <br>
+ /* invalidate the HDP read cache */ <br>
+ if (write) <br>
+ amdgpu_asic_invalidate_hdp(adev); <br>
</blockquote>
<br>
You can drop the HDP invalidation here. amdgpu_gem_prime_pin()
will pin the BO into system memory. <br>
<br>
<blockquote type="cite">+ <br>
+ return ret; <br>
+} <br>
+ <br>
+static int amdgpu_gem_end_cpu_access(struct dma_buf *dma_buf,
enum dma_data_direction direction) <br>
+{ <br>
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv);
<br>
+ struct amdgpu_device *adev =
amdgpu_ttm_adev(bo->tbo.bdev); <br>
+ <br>
+ /* flush hdp write queue */ <br>
+ amdgpu_asic_flush_hdp(adev); <br>
</blockquote>
<br>
Dito, HDP flush is completely unnecessary here. <br>
<br>
BTW: You mixed up flush and invalidate. If we need to implement
CPU access to VRAM you need to flush when the cpu access starts
and invalidate when it ends. <br>
<br>
Regards, <br>
Christian. <br>
<br>
<blockquote type="cite">+
amdgpu_gem_prime_unpin(dma_buf->priv); <br>
+ return 0; <br>
+} <br>
+ <br>
+static struct dma_buf_ops amdgpu_dmabuf_ops; <br>
+ <br>
struct dma_buf *amdgpu_gem_prime_export(struct drm_device
*dev, <br>
struct drm_gem_object *gobj, <br>
int flags) <br>
{ <br>
struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj); <br>
+ struct dma_buf *dma_buf; <br>
if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) <br>
return ERR_PTR(-EPERM); <br>
- return drm_gem_prime_export(dev, gobj, flags); <br>
+ dma_buf = drm_gem_prime_export(dev, gobj, flags); <br>
+ amdgpu_dmabuf_ops = *(dma_buf->ops); <br>
+ amdgpu_dmabuf_ops.begin_cpu_access =
amdgpu_gem_begin_cpu_access; <br>
+ amdgpu_dmabuf_ops.end_cpu_access =
amdgpu_gem_end_cpu_access; <br>
+ dma_buf->ops = &amdgpu_dmabuf_ops; <br>
+ <br>
+ return dma_buf; <br>
+} <br>
+ <br>
+struct drm_gem_object *amdgpu_gem_prime_import(struct
drm_device *dev, <br>
+ struct dma_buf *dma_buf) <br>
+{ <br>
+ struct drm_gem_object *obj; <br>
+ <br>
+ if (dma_buf->ops == &amdgpu_dmabuf_ops) { <br>
+ obj = dma_buf->priv; <br>
+ if (obj->dev == dev) { <br>
+ /* <br>
+ * Importing dmabuf exported from out own gem
increases <br>
+ * refcount on gem itself instead of f_count of
dmabuf. <br>
+ */ <br>
+ drm_gem_object_get(obj); <br>
+ return obj; <br>
+ } <br>
+ } <br>
+ <br>
+ return drm_gem_prime_import(dev, dma_buf); <br>
} <br>
</blockquote>
<br>
<br>
<p><br>
</p>
<span class="st4cursor"></span> </blockquote>
<p><br>
</p>
</body>
</html>