<html>
  <head>
    <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
  </head>
  <body text="#000000" bgcolor="#FFFFFF">
    <div class="moz-cite-prefix">Am 08.09.2017 um 00:17 schrieb Samuel
      Li:<br>
    </div>
    <blockquote type="cite"
      cite="mid:d454ab74-2254-49c2-7dab-063e4f6cc770@amd.com">
      <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
      <div class="moz-cite-prefix">>>+    if (write) <br>
        >>+        amdgpu_asic_invalidate_hdp(adev); <br>
        > You can drop the HDP invalidation here.
        amdgpu_gem_prime_pin() will pin the BO into system memory. <br>
        <br>
        My understanding here is between CPU and GPU access, the
        invalidate/flush is still needed, since amdgpu_gem_prime_pin()
        does not do that.<br>
      </div>
    </blockquote>
    <br>
    That understanding is incorrect. The HDP is a small read/write cache
    between the CPU and the VRAM.<br>
    <br>
    So an HDP invalidate/flush is always needed when you want to access
    VRAM (local memory), since amdgpu_gem_prime_pin() pins the memory
    into system memory that is not necessary.<br>
    <br>
    <blockquote type="cite"
      cite="mid:d454ab74-2254-49c2-7dab-063e4f6cc770@amd.com">
      <div class="moz-cite-prefix"> For this one, I might need to move
        the invalidate() before pin(). Thoughts?<br>
        <br>
        > BTW: You mixed up flush and invalidate. If we need to
        implement CPU access to VRAM you need to flush when the cpu
        access starts and invalidate when it ends. <br>
        As per the doc I forwarded to you (HDP coherency check), I think
        the code here is correct.<br>
      </div>
    </blockquote>
    <br>
    That might be correct. Need to double check the HDP documentation
    once more.<br>
    <br>
    Anyway as long as we pin to system memory the whole HDP handling
    here is unnecessary anyway, so please remove that.<br>
    <br>
    Regards,<br>
    Christian.<br>
    <br>
    <blockquote type="cite"
      cite="mid:d454ab74-2254-49c2-7dab-063e4f6cc770@amd.com">
      <div class="moz-cite-prefix"> <br>
        Regards,<br>
        Sam<br>
        <br>
        <br>
        <br>
        <br>
        On 2017-09-07 03:02 AM, Christian König wrote:<br>
      </div>
      Am 07.09.2017 um 00:07 schrieb Samuel Li: <br>
      <blockquote type="cite">Signed-off-by: Samuel Li <a
          class="moz-txt-link-rfc2396E" href="mailto:Samuel.Li@amd.com"
          moz-do-not-send="true"><Samuel.Li@amd.com></a> <br>
        --- <br>
          drivers/gpu/drm/amd/amdgpu/amdgpu.h       |  3 + <br>
          drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c   |  3 +- <br>
          drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | 95
        ++++++++++++++++++++++++++++++- <br>
          3 files changed, 99 insertions(+), 2 deletions(-) <br>
        <br>
        diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
        b/drivers/gpu/drm/amd/amdgpu/amdgpu.h <br>
        index 6f262ce..3c43acf 100644 <br>
        --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h <br>
        +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h <br>
        @@ -395,11 +395,14 @@ amdgpu_gem_prime_import_sg_table(struct
        drm_device *dev, <br>
          struct dma_buf *amdgpu_gem_prime_export(struct drm_device
        *dev, <br>
                              struct drm_gem_object *gobj, <br>
                              int flags); <br>
        +struct drm_gem_object *amdgpu_gem_prime_import(struct
        drm_device *dev, <br>
        +                        struct dma_buf *dma_buf); <br>
          int amdgpu_gem_prime_pin(struct drm_gem_object *obj); <br>
          void amdgpu_gem_prime_unpin(struct drm_gem_object *obj); <br>
          struct reservation_object *amdgpu_gem_prime_res_obj(struct
        drm_gem_object *); <br>
          void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj); <br>
          void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void
        *vaddr); <br>
        +int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct
        vm_area_struct *vma); <br>
          int amdgpu_gem_debugfs_init(struct amdgpu_device *adev); <br>
            /* sub-allocation manager, it has to be protected by another
        lock. <br>
        diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
        b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c <br>
        index 2cdf844..9b63ac5 100644 <br>
        --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c <br>
        +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c <br>
        @@ -835,7 +835,7 @@ static struct drm_driver kms_driver = { <br>
              .prime_handle_to_fd = drm_gem_prime_handle_to_fd, <br>
              .prime_fd_to_handle = drm_gem_prime_fd_to_handle, <br>
              .gem_prime_export = amdgpu_gem_prime_export, <br>
        -    .gem_prime_import = drm_gem_prime_import, <br>
        +    .gem_prime_import = amdgpu_gem_prime_import, <br>
              .gem_prime_pin = amdgpu_gem_prime_pin, <br>
              .gem_prime_unpin = amdgpu_gem_prime_unpin, <br>
              .gem_prime_res_obj = amdgpu_gem_prime_res_obj, <br>
        @@ -843,6 +843,7 @@ static struct drm_driver kms_driver = { <br>
              .gem_prime_import_sg_table =
        amdgpu_gem_prime_import_sg_table, <br>
              .gem_prime_vmap = amdgpu_gem_prime_vmap, <br>
              .gem_prime_vunmap = amdgpu_gem_prime_vunmap, <br>
        +    .gem_prime_mmap = amdgpu_gem_prime_mmap, <br>
                .name = DRIVER_NAME, <br>
              .desc = DRIVER_DESC, <br>
        diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
        b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c <br>
        index 5b3f928..629a654 100644 <br>
        --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c <br>
        +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c <br>
        @@ -57,6 +57,42 @@ void amdgpu_gem_prime_vunmap(struct
        drm_gem_object *obj, void *vaddr) <br>
              ttm_bo_kunmap(&bo->dma_buf_vmap); <br>
          } <br>
          +int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct
        vm_area_struct *vma) <br>
        +{ <br>
        +    struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); <br>
        +    struct amdgpu_device *adev =
        amdgpu_ttm_adev(bo->tbo.bdev); <br>
        +    unsigned asize = amdgpu_bo_size(bo); <br>
        +    int ret; <br>
        + <br>
        +    if (!obj->filp || !vma->vm_file) <br>
        +        return -ENODEV; <br>
        + <br>
        +    if (adev == NULL) <br>
        +        return -ENODEV; <br>
        + <br>
        +    /* Check for valid size. */ <br>
        +    if (asize < vma->vm_end - vma->vm_start) <br>
        +        return -EINVAL; <br>
        + <br>
        +    if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) || <br>
        +        (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)) {
        <br>
        +        return -EPERM; <br>
        +    } <br>
        +    vma->vm_pgoff = amdgpu_bo_mmap_offset(bo) >>
        PAGE_SHIFT; <br>
        + <br>
        +    /* prime mmap does not need to check access, so allow here
        */ <br>
        +    ret = drm_vma_node_allow(&obj->vma_node,
        vma->vm_file->private_data); <br>
        +    if (ret) <br>
        +        return ret; <br>
        + <br>
        +    ret = ttm_bo_mmap(vma->vm_file, vma,
        &adev->mman.bdev); <br>
        +    drm_vma_node_revoke(&obj->vma_node,
        vma->vm_file->private_data); <br>
        +    if (ret) <br>
        +        return ret; <br>
        + <br>
        +    return 0; <br>
        +} <br>
        + <br>
          struct drm_gem_object * <br>
          amdgpu_gem_prime_import_sg_table(struct drm_device *dev, <br>
                           struct dma_buf_attachment *attach, <br>
        @@ -130,14 +166,71 @@ struct reservation_object
        *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj) <br>
              return bo->tbo.resv; <br>
          } <br>
          +static int amdgpu_gem_begin_cpu_access(struct dma_buf
        *dma_buf, enum dma_data_direction direction) <br>
        +{ <br>
        +    struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv);
        <br>
        +    struct amdgpu_device *adev =
        amdgpu_ttm_adev(bo->tbo.bdev); <br>
        +    bool write = (direction == DMA_BIDIRECTIONAL || direction
        == DMA_TO_DEVICE); <br>
        +    int ret; <br>
        + <br>
        +    ret = amdgpu_gem_prime_pin(dma_buf->priv); <br>
        +    if (ret) <br>
        +        return ret; <br>
        +    /* invalidate the HDP read cache */ <br>
        +    if (write) <br>
        +        amdgpu_asic_invalidate_hdp(adev); <br>
      </blockquote>
      <br>
      You can drop the HDP invalidation here. amdgpu_gem_prime_pin()
      will pin the BO into system memory. <br>
      <br>
      <blockquote type="cite">+ <br>
        +    return ret; <br>
        +} <br>
        + <br>
        +static int amdgpu_gem_end_cpu_access(struct dma_buf *dma_buf,
        enum dma_data_direction direction) <br>
        +{ <br>
        +    struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv);
        <br>
        +    struct amdgpu_device *adev =
        amdgpu_ttm_adev(bo->tbo.bdev); <br>
        + <br>
        +    /* flush hdp write queue */ <br>
        +    amdgpu_asic_flush_hdp(adev); <br>
      </blockquote>
      <br>
      Dito, HDP flush is completely unnecessary here. <br>
      <br>
      BTW: You mixed up flush and invalidate. If we need to implement
      CPU access to VRAM you need to flush when the cpu access starts
      and invalidate when it ends. <br>
      <br>
      Regards, <br>
      Christian. <br>
      <br>
      <blockquote type="cite">+   
        amdgpu_gem_prime_unpin(dma_buf->priv); <br>
        +    return 0; <br>
        +} <br>
        + <br>
        +static struct dma_buf_ops amdgpu_dmabuf_ops; <br>
        + <br>
          struct dma_buf *amdgpu_gem_prime_export(struct drm_device
        *dev, <br>
                              struct drm_gem_object *gobj, <br>
                              int flags) <br>
          { <br>
              struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj); <br>
        +    struct dma_buf *dma_buf; <br>
                if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) <br>
                  return ERR_PTR(-EPERM); <br>
          -    return drm_gem_prime_export(dev, gobj, flags); <br>
        +    dma_buf = drm_gem_prime_export(dev, gobj, flags); <br>
        +    amdgpu_dmabuf_ops = *(dma_buf->ops); <br>
        +    amdgpu_dmabuf_ops.begin_cpu_access =
        amdgpu_gem_begin_cpu_access; <br>
        +    amdgpu_dmabuf_ops.end_cpu_access =
        amdgpu_gem_end_cpu_access; <br>
        +    dma_buf->ops = &amdgpu_dmabuf_ops; <br>
        + <br>
        +    return dma_buf; <br>
        +} <br>
        + <br>
        +struct drm_gem_object *amdgpu_gem_prime_import(struct
        drm_device *dev, <br>
        +                        struct dma_buf *dma_buf) <br>
        +{ <br>
        +    struct drm_gem_object *obj; <br>
        + <br>
        +    if (dma_buf->ops == &amdgpu_dmabuf_ops) { <br>
        +        obj = dma_buf->priv; <br>
        +        if (obj->dev == dev) { <br>
        +            /* <br>
        +             * Importing dmabuf exported from out own gem
        increases <br>
        +             * refcount on gem itself instead of f_count of
        dmabuf. <br>
        +             */ <br>
        +            drm_gem_object_get(obj); <br>
        +            return obj; <br>
        +        } <br>
        +    } <br>
        + <br>
        +    return drm_gem_prime_import(dev, dma_buf); <br>
          } <br>
      </blockquote>
      <br>
      <br>
      <p><br>
      </p>
      <span class="st4cursor"></span> </blockquote>
    <p><br>
    </p>
  </body>
</html>