<html>
  <head>
    <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
  </head>
  <body smarttemplateinserted="true" text="#000000" bgcolor="#FFFFFF">
    <div class="moz-cite-prefix">>>+    if (write)
      <br>
      >>+        amdgpu_asic_invalidate_hdp(adev);
      <br>
      > You can drop the HDP invalidation here.
      amdgpu_gem_prime_pin() will pin the BO into system memory.
      <br>
      <br>
      My understanding here is between CPU and GPU access, the
      invalidate/flush is still needed, since amdgpu_gem_prime_pin()
      does not do that.<br>
      For this one, I might need to move the invalidate() before pin().
      Thoughts?<br>
      <br>
      > BTW: You mixed up flush and invalidate. If we need to
      implement CPU access to VRAM you need to flush when the cpu access
      starts and invalidate when it ends. <br>
      As per the doc I forwarded to you (HDP coherency check), I think
      the code here is correct.<br>
      <br>
      Regards,<br>
      Sam<br>
      <br>
      <br>
      <br>
      <br>
      On 2017-09-07 03:02 AM, Christian König wrote:<br>
    </div>
    Am 07.09.2017 um 00:07 schrieb Samuel Li:
    <br>
    <blockquote type="cite">Signed-off-by: Samuel Li
      <a class="moz-txt-link-rfc2396E" href="mailto:Samuel.Li@amd.com"><Samuel.Li@amd.com></a>
      <br>
      ---
      <br>
        drivers/gpu/drm/amd/amdgpu/amdgpu.h       |  3 +
      <br>
        drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |  3 +-
      <br>
        drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | 95
      ++++++++++++++++++++++++++++++-
      <br>
        3 files changed, 99 insertions(+), 2 deletions(-)
      <br>
      <br>
      diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
      b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
      <br>
      index 6f262ce..3c43acf 100644
      <br>
      --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
      <br>
      +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
      <br>
      @@ -395,11 +395,14 @@ amdgpu_gem_prime_import_sg_table(struct
      drm_device *dev,
      <br>
        struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
      <br>
                            struct drm_gem_object *gobj,
      <br>
                            int flags);
      <br>
      +struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device
      *dev,
      <br>
      +                        struct dma_buf *dma_buf);
      <br>
        int amdgpu_gem_prime_pin(struct drm_gem_object *obj);
      <br>
        void amdgpu_gem_prime_unpin(struct drm_gem_object *obj);
      <br>
        struct reservation_object *amdgpu_gem_prime_res_obj(struct
      drm_gem_object *);
      <br>
        void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);
      <br>
        void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void
      *vaddr);
      <br>
      +int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct
      vm_area_struct *vma);
      <br>
        int amdgpu_gem_debugfs_init(struct amdgpu_device *adev);
      <br>
          /* sub-allocation manager, it has to be protected by another
      lock.
      <br>
      diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
      b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
      <br>
      index 2cdf844..9b63ac5 100644
      <br>
      --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
      <br>
      +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
      <br>
      @@ -835,7 +835,7 @@ static struct drm_driver kms_driver = {
      <br>
            .prime_handle_to_fd = drm_gem_prime_handle_to_fd,
      <br>
            .prime_fd_to_handle = drm_gem_prime_fd_to_handle,
      <br>
            .gem_prime_export = amdgpu_gem_prime_export,
      <br>
      -    .gem_prime_import = drm_gem_prime_import,
      <br>
      +    .gem_prime_import = amdgpu_gem_prime_import,
      <br>
            .gem_prime_pin = amdgpu_gem_prime_pin,
      <br>
            .gem_prime_unpin = amdgpu_gem_prime_unpin,
      <br>
            .gem_prime_res_obj = amdgpu_gem_prime_res_obj,
      <br>
      @@ -843,6 +843,7 @@ static struct drm_driver kms_driver = {
      <br>
            .gem_prime_import_sg_table =
      amdgpu_gem_prime_import_sg_table,
      <br>
            .gem_prime_vmap = amdgpu_gem_prime_vmap,
      <br>
            .gem_prime_vunmap = amdgpu_gem_prime_vunmap,
      <br>
      +    .gem_prime_mmap = amdgpu_gem_prime_mmap,
      <br>
              .name = DRIVER_NAME,
      <br>
            .desc = DRIVER_DESC,
      <br>
      diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
      b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
      <br>
      index 5b3f928..629a654 100644
      <br>
      --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
      <br>
      +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
      <br>
      @@ -57,6 +57,42 @@ void amdgpu_gem_prime_vunmap(struct
      drm_gem_object *obj, void *vaddr)
      <br>
            ttm_bo_kunmap(&bo->dma_buf_vmap);
      <br>
        }
      <br>
        +int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct
      vm_area_struct *vma)
      <br>
      +{
      <br>
      +    struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
      <br>
      +    struct amdgpu_device *adev =
      amdgpu_ttm_adev(bo->tbo.bdev);
      <br>
      +    unsigned asize = amdgpu_bo_size(bo);
      <br>
      +    int ret;
      <br>
      +
      <br>
      +    if (!obj->filp || !vma->vm_file)
      <br>
      +        return -ENODEV;
      <br>
      +
      <br>
      +    if (adev == NULL)
      <br>
      +        return -ENODEV;
      <br>
      +
      <br>
      +    /* Check for valid size. */
      <br>
      +    if (asize < vma->vm_end - vma->vm_start)
      <br>
      +        return -EINVAL;
      <br>
      +
      <br>
      +    if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
      <br>
      +        (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)) {
      <br>
      +        return -EPERM;
      <br>
      +    }
      <br>
      +    vma->vm_pgoff = amdgpu_bo_mmap_offset(bo) >>
      PAGE_SHIFT;
      <br>
      +
      <br>
      +    /* prime mmap does not need to check access, so allow here */
      <br>
      +    ret = drm_vma_node_allow(&obj->vma_node,
      vma->vm_file->private_data);
      <br>
      +    if (ret)
      <br>
      +        return ret;
      <br>
      +
      <br>
      +    ret = ttm_bo_mmap(vma->vm_file, vma,
      &adev->mman.bdev);
      <br>
      +    drm_vma_node_revoke(&obj->vma_node,
      vma->vm_file->private_data);
      <br>
      +    if (ret)
      <br>
      +        return ret;
      <br>
      +
      <br>
      +    return 0;
      <br>
      +}
      <br>
      +
      <br>
        struct drm_gem_object *
      <br>
        amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
      <br>
                         struct dma_buf_attachment *attach,
      <br>
      @@ -130,14 +166,71 @@ struct reservation_object
      *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
      <br>
            return bo->tbo.resv;
      <br>
        }
      <br>
        +static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf,
      enum dma_data_direction direction)
      <br>
      +{
      <br>
      +    struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv);
      <br>
      +    struct amdgpu_device *adev =
      amdgpu_ttm_adev(bo->tbo.bdev);
      <br>
      +    bool write = (direction == DMA_BIDIRECTIONAL || direction ==
      DMA_TO_DEVICE);
      <br>
      +    int ret;
      <br>
      +
      <br>
      +    ret = amdgpu_gem_prime_pin(dma_buf->priv);
      <br>
      +    if (ret)
      <br>
      +        return ret;
      <br>
      +    /* invalidate the HDP read cache */
      <br>
      +    if (write)
      <br>
      +        amdgpu_asic_invalidate_hdp(adev);
      <br>
    </blockquote>
    <br>
    You can drop the HDP invalidation here. amdgpu_gem_prime_pin() will
    pin the BO into system memory.
    <br>
    <br>
    <blockquote type="cite">+
      <br>
      +    return ret;
      <br>
      +}
      <br>
      +
      <br>
      +static int amdgpu_gem_end_cpu_access(struct dma_buf *dma_buf,
      enum dma_data_direction direction)
      <br>
      +{
      <br>
      +    struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv);
      <br>
      +    struct amdgpu_device *adev =
      amdgpu_ttm_adev(bo->tbo.bdev);
      <br>
      +
      <br>
      +    /* flush hdp write queue */
      <br>
      +    amdgpu_asic_flush_hdp(adev);
      <br>
    </blockquote>
    <br>
    Dito, HDP flush is completely unnecessary here.
    <br>
    <br>
    BTW: You mixed up flush and invalidate. If we need to implement CPU
    access to VRAM you need to flush when the cpu access starts and
    invalidate when it ends.
    <br>
    <br>
    Regards,
    <br>
    Christian.
    <br>
    <br>
    <blockquote type="cite">+   
      amdgpu_gem_prime_unpin(dma_buf->priv);
      <br>
      +    return 0;
      <br>
      +}
      <br>
      +
      <br>
      +static struct dma_buf_ops amdgpu_dmabuf_ops;
      <br>
      +
      <br>
        struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
      <br>
                            struct drm_gem_object *gobj,
      <br>
                            int flags)
      <br>
        {
      <br>
            struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
      <br>
      +    struct dma_buf *dma_buf;
      <br>
              if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
      <br>
                return ERR_PTR(-EPERM);
      <br>
        -    return drm_gem_prime_export(dev, gobj, flags);
      <br>
      +    dma_buf = drm_gem_prime_export(dev, gobj, flags);
      <br>
      +    amdgpu_dmabuf_ops = *(dma_buf->ops);
      <br>
      +    amdgpu_dmabuf_ops.begin_cpu_access =
      amdgpu_gem_begin_cpu_access;
      <br>
      +    amdgpu_dmabuf_ops.end_cpu_access = amdgpu_gem_end_cpu_access;
      <br>
      +    dma_buf->ops = &amdgpu_dmabuf_ops;
      <br>
      +
      <br>
      +    return dma_buf;
      <br>
      +}
      <br>
      +
      <br>
      +struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device
      *dev,
      <br>
      +                        struct dma_buf *dma_buf)
      <br>
      +{
      <br>
      +    struct drm_gem_object *obj;
      <br>
      +
      <br>
      +    if (dma_buf->ops == &amdgpu_dmabuf_ops) {
      <br>
      +        obj = dma_buf->priv;
      <br>
      +        if (obj->dev == dev) {
      <br>
      +            /*
      <br>
      +             * Importing dmabuf exported from out own gem
      increases
      <br>
      +             * refcount on gem itself instead of f_count of
      dmabuf.
      <br>
      +             */
      <br>
      +            drm_gem_object_get(obj);
      <br>
      +            return obj;
      <br>
      +        }
      <br>
      +    }
      <br>
      +
      <br>
      +    return drm_gem_prime_import(dev, dma_buf);
      <br>
        }
      <br>
    </blockquote>
    <br>
    <br>
    <p><br>
    </p>
    <span class="st4cursor"></span>
  </body>
</html>