<html>
  <head>
    <meta content="text/html; charset=utf-8" http-equiv="Content-Type">
  </head>
  <body text="#000000" bgcolor="#FFFFFF">
    <br>
    <br>
    <div class="moz-cite-prefix">On 2017年05月24日 17:15, Christian König
      wrote:<br>
    </div>
    <blockquote
      cite="mid:5efe2316-b0bb-799b-345e-120c040237ae@vodafone.de"
      type="cite">Am 18.05.2017 um 07:30 schrieb zhoucm1:
      <br>
      <blockquote type="cite">
        <br>
        <br>
        On 2017年05月17日 17:22, Christian König wrote:
        <br>
        <blockquote type="cite">[SNIP]
          <br>
          +    /* In the case of a mixed PT the PDE must point to it*/
          <br>
          +    if (p->adev->asic_type < CHIP_VEGA10 ||
          <br>
          +        nptes != AMDGPU_VM_PTE_COUNT(p->adev) ||
          <br>
          +        p->func != amdgpu_vm_do_set_ptes ||
          <br>
          +        !(flags & AMDGPU_PTE_VALID)) {
          <br>
          +
          <br>
          +        pt = (*bo)->tbo.mem.start << PAGE_SHIFT;
          <br>
          +        pt = amdgpu_gart_get_vm_pde(p->adev, pt);
          <br>
          +        flags = AMDGPU_PTE_VALID;
          <br>
        </blockquote>
        This case should be handled when updating levels, so return
        directly?
        <br>
      </blockquote>
      <br>
      The problem is that when we switch from huge page back to a normal
      mapping because the BO was evicted we also need to reset the PDE.
      <br>
      <br>
      <blockquote type="cite">
        <blockquote type="cite">+    } else {
          <br>
          +        pt = amdgpu_gart_get_vm_pde(p->adev, dst);
          <br>
          +        flags |= AMDGPU_PDE_PTE;
          <br>
          +    }
          <br>
            -    return entry->bo;
          <br>
          +    if (entry->addr == pt &&
          <br>
          +        entry->huge_page == !!(flags &
          AMDGPU_PDE_PTE))
          <br>
          +        return 0;
          <br>
          +
          <br>
          +    entry->addr = pt;
          <br>
          +    entry->huge_page = !!(flags & AMDGPU_PDE_PTE);
          <br>
          +
          <br>
          +    if (parent->bo->shadow) {
          <br>
          +        pd_addr =
          amdgpu_bo_gpu_offset(parent->bo->shadow);
          <br>
          +        pde = pd_addr + pt_idx * 8;
          <br>
          +        amdgpu_vm_do_set_ptes(p, pde, pt, 1, 0, flags);
          <br>
        </blockquote>
        From the spec "any pde in the chain can itself take on the
        format of a PTE and point directly to <font color="#ff0000">an
          aligned block of logical address space</font> by setting the P
        bit.",
        <br>
        So here should pass addr into PDE instead of pt.
        <br>
      </blockquote>
      <br>
      The pt variable was overwritten with the address a few lines
      above. The code was indeed hard to understand, so I've fixed it.
      <br>
    </blockquote>
    this isn't my mean, if I understand spec correctly, it should be
    amdgpu_vm_do_set_ptes(p, pde, <font color="#ff0000">addr</font>, 1,
    0, flags);<br>
    <br>
    <blockquote
      cite="mid:5efe2316-b0bb-799b-345e-120c040237ae@vodafone.de"
      type="cite">
      <br>
      <blockquote type="cite">
        <blockquote type="cite">+    }
          <br>
          +
          <br>
          +    pd_addr = amdgpu_bo_gpu_offset(parent->bo);
          <br>
          +    pde = pd_addr + pt_idx * 8;
          <br>
          +    amdgpu_vm_do_set_ptes(p, pde, pt, 1, 0, flags);
          <br>
        </blockquote>
        Should pass addr into PDE instead of pt as well.
        <br>
        <br>
        <br>
        <blockquote type="cite">+    return 0;
          <br>
            }
          <br>
              /**
          <br>
          @@ -1194,14 +1237,20 @@ static int
          amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
          <br>
                uint64_t addr, pe_start;
          <br>
                struct amdgpu_bo *pt;
          <br>
                unsigned nptes;
          <br>
          +    int r;
          <br>
                  /* walk over the address space and update the page
          tables */
          <br>
                for (addr = start; addr < end; addr += nptes) {
          <br>
          -        pt = amdgpu_vm_get_pt(params, addr);
          <br>
          -        if (!pt) {
          <br>
          -            pr_err("PT not found, aborting update_ptes\n");
          <br>
          -            return -EINVAL;
          <br>
          -        }
          <br>
          +
          <br>
          +        if ((addr & ~mask) == (end & ~mask))
          <br>
          +            nptes = end - addr;
          <br>
          +        else
          <br>
          +            nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr &
          mask);
          <br>
          +
          <br>
          +        r = amdgpu_vm_handle_huge_pages(params, addr, nptes,
          <br>
          +                        dst, flags, &pt);
          <br>
        </blockquote>
        If huge page happens, its sub PTEs don't need to update more,
        they cannot be indexed by page table when that PDE is PTE,
        right?
        <br>
      </blockquote>
      <br>
      Right, but I wasn't sure if we don't need them for something. So
      I've kept the update for now.
      <br>
      <br>
      Going to try dropping this today.
      <br>
      <br>
      <blockquote type="cite">Btw: Is this BigK which people often said?
        <br>
      </blockquote>
      <br>
      Yes and no. I can explain more internally if you like.
      <br>
    </blockquote>
    Welcome.<br>
    <br>
    Regards,<br>
    David Zhou<br>
    <blockquote
      cite="mid:5efe2316-b0bb-799b-345e-120c040237ae@vodafone.de"
      type="cite">
      <br>
      Regards,
      <br>
      Christian.
      <br>
      <br>
      <blockquote type="cite">
        <br>
        Regards,
        <br>
        David Zhou
        <br>
        <blockquote type="cite">+        if (r)
          <br>
          +            return r;
          <br>
                      if (params->shadow) {
          <br>
                        if (!pt->shadow)
          <br>
          @@ -1209,11 +1258,6 @@ static int amdgpu_vm_update_ptes(struct
          amdgpu_pte_update_params *params,
          <br>
                        pt = pt->shadow;
          <br>
                    }
          <br>
            -        if ((addr & ~mask) == (end & ~mask))
          <br>
          -            nptes = end - addr;
          <br>
          -        else
          <br>
          -            nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr &
          mask);
          <br>
          -
          <br>
                    pe_start = amdgpu_bo_gpu_offset(pt);
          <br>
                    pe_start += (addr & mask) * 8;
          <br>
            @@ -1353,6 +1397,9 @@ static int
          amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
          <br>
                /* padding, etc. */
          <br>
                ndw = 64;
          <br>
            +    /* one PDE write for each huge page */
          <br>
          +    ndw += ((nptes >> adev->vm_manager.block_size) +
          1) * 7;
          <br>
          +
          <br>
                if (src) {
          <br>
                    /* only copy commands needed */
          <br>
                    ndw += ncmds * 7;
          <br>
          @@ -1437,6 +1484,7 @@ static int
          amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
          <br>
              error_free:
          <br>
                amdgpu_job_free(job);
          <br>
          +    amdgpu_vm_invalidate_level(&vm->root);
          <br>
                return r;
          <br>
            }
          <br>
            diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
          b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
          <br>
          index afe9073..1c5e0ce 100644
          <br>
          --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
          <br>
          +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
          <br>
          @@ -68,6 +68,9 @@ struct amdgpu_bo_list_entry;
          <br>
            /* TILED for VEGA10, reserved for older ASICs  */
          <br>
            #define AMDGPU_PTE_PRT        (1ULL << 51)
          <br>
            +/* PDE is handled as PTE for VEGA10 */
          <br>
          +#define AMDGPU_PDE_PTE        (1ULL << 54)
          <br>
          +
          <br>
            /* VEGA10 only */
          <br>
            #define AMDGPU_PTE_MTYPE(a)    ((uint64_t)a << 57)
          <br>
            #define AMDGPU_PTE_MTYPE_MASK    AMDGPU_PTE_MTYPE(3ULL)
          <br>
          @@ -90,6 +93,7 @@ struct amdgpu_bo_list_entry;
          <br>
            struct amdgpu_vm_pt {
          <br>
                struct amdgpu_bo    *bo;
          <br>
                uint64_t        addr;
          <br>
          +    bool            huge_page;
          <br>
                  /* array of page tables, one for each directory entry
          */
          <br>
                struct amdgpu_vm_pt    *entries;
          <br>
        </blockquote>
        <br>
      </blockquote>
      <br>
    </blockquote>
    <br>
  </body>
</html>