[PATCH umr] Add initial AI VM decoding
Tom St Denis
tom.stdenis at amd.com
Thu Apr 6 18:19:00 UTC 2017
The more I think about it I wonder if
// AI+ allows 0=default (4KB) whereas VI requires it to be explictly
set to >=4
page_table_size = page_table_size ? page_table_size : 4;
Should be instead
page_table_size += 4;
Since in the later code
pde_idx = (pde_address >> (page_table_depth*9 + (12 +
page_table_size - 4)));
Would result in a 9-bit page size if say page_table_size was 1 which
should actually be 13 bits (8KB).
Tom
On 06/04/17 02:14 PM, Tom St Denis wrote:
> On 06/04/17 02:11 PM, Deucher, Alexander wrote:
>>> -----Original Message-----
>>> From: amd-gfx [mailto:amd-gfx-bounces at lists.freedesktop.org] On Behalf
>>> Of Tom St Denis
>>> Sent: Thursday, April 06, 2017 1:53 PM
>>> To: amd-gfx at lists.freedesktop.org
>>> Cc: StDenis, Tom
>>> Subject: [PATCH umr] Add initial AI VM decoding
>>>
>>> Tested with VMID0 decodings just fine. Haven't tried VMID1-15 yet.
>>>
>>> Signed-off-by: Tom St Denis <tom.stdenis at amd.com>
>>
>> Looks reasonable:
>> Acked-by: Alex Deucher <alexander.deucher at amd.com>
>
> Thanks. What I'm leery about is the page_table_size. On VI it's always
> 4KB (with our kernel) so the logic isn't vetted much in umr.
>
> Then to add to it it seems AI+ interpretation is a bit different with
> 0==4KB (instead of 4). I presume values greater than 0 are for
> multiples of 4KB (or multiples of 512 8-byte PDE/PTE entries) e.g. 1 =
> 8KB of next level entries, etc.
>
> I'll try adding a stall to libdrm's GFX test again and see if the VMID
>>= 1 decoding works in the meantime.
>
> Cheers,
> Tom
>
>
>
>>
>>> ---
>>> src/lib/read_vram.c | 180
>>> ++++++++++++++++++++++++++++++++++++++++++++++++++--
>>> 1 file changed, 176 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/src/lib/read_vram.c b/src/lib/read_vram.c
>>> index e2087a252c10..4c74f4521857 100644
>>> --- a/src/lib/read_vram.c
>>> +++ b/src/lib/read_vram.c
>>> @@ -77,7 +77,6 @@ static int umr_read_sram(uint64_t address, uint32_t
>>> size, void *dst)
>>> return -1;
>>> }
>>>
>>> -
>>> static int umr_read_vram_vi(struct umr_asic *asic, uint32_t vmid,
>>> uint64_t
>>> address, uint32_t size, void *dst)
>>> {
>>> uint64_t start_addr, page_table_start_addr, page_table_base_addr,
>>> @@ -144,7 +143,7 @@ static int umr_read_vram_vi(struct umr_asic *asic,
>>> uint32_t vmid, uint64_t addre
>>> if (page_table_depth == 1) {
>>> // decode addr into pte and pde selectors...
>>> pde_idx = (address >> (12 + 9 +
>>> page_table_size)) &
>>> ((1ULL << (40 - 12 - 9 - page_table_size)) - 1);
>>> - pte_idx = (address >> 12) & ((1ULL << (9 +
>>> page_table_size)) - 1);
>>> + pte_idx = (address >> (12 + page_table_size -
>>> 4)) &
>>> ((1ULL << (9 + page_table_size)) - 1);
>>>
>>> // read PDE entry
>>> umr_read_vram(asic, 0xFFFF, page_table_base_addr
>>> + pde_idx * 8, 8, &pde_entry);
>>> @@ -210,6 +209,172 @@ static int umr_read_vram_vi(struct umr_asic *asic,
>>> uint32_t vmid, uint64_t addre
>>> return 0;
>>> }
>>>
>>> +static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid,
>>> uint64_t
>>> address, uint32_t size, void *dst)
>>> +{
>>> + uint64_t start_addr, page_table_start_addr, page_table_base_addr,
>>> + page_table_size, pte_idx, pde_idx, pte_entry, pde_entry,
>>> + pde_address;
>>> + uint32_t chunk_size, tmp;
>>> + int page_table_depth, first;
>>> + struct {
>>> + uint64_t
>>> + frag_size,
>>> + pte_base_addr,
>>> + valid;
>>> + } pde_fields;
>>> + struct {
>>> + uint64_t
>>> + page_base_addr,
>>> + fragment,
>>> + system,
>>> + valid;
>>> + } pte_fields;
>>> + char buf[64];
>>> + unsigned char *pdst = dst;
>>> +
>>> + /*
>>> + * PTE format on VI:
>>> + * 63:40 reserved
>>> + * 39:12 4k physical page base address
>>> + * 11:7 fragment
>>> + * 6 write
>>> + * 5 read
>>> + * 4 exe
>>> + * 3 reserved
>>> + * 2 snooped
>>> + * 1 system
>>> + * 0 valid
>>> + *
>>> + * PDE format on VI:
>>> + * 63:59 block fragment size
>>> + * 58:40 reserved
>>> + * 39:1 physical base address of PTE
>>> + * bits 5:1 must be 0.
>>> + * 0 valid
>>> + */
>>> +
>>> + // read vm registers
>>> + sprintf(buf,
>>> "mmVM_CONTEXT%d_PAGE_TABLE_START_ADDR_LO32", (int)vmid);
>>> + page_table_start_addr =
>>> (uint64_t)umr_read_reg_by_name(asic, buf) << 12;
>>> + sprintf(buf,
>>> "mmVM_CONTEXT%d_PAGE_TABLE_START_ADDR_HI32", (int)vmid);
>>> + page_table_start_addr |=
>>> (uint64_t)umr_read_reg_by_name(asic, buf) << 44;
>>> +
>>> + sprintf(buf, "mmVM_CONTEXT%d_CNTL", (int)vmid);
>>> + tmp = umr_read_reg_by_name(asic, buf);
>>> + page_table_depth = umr_bitslice_reg_by_name(asic,
>>> buf,
>>> "PAGE_TABLE_DEPTH", tmp);
>>> + page_table_size = umr_bitslice_reg_by_name(asic,
>>> buf,
>>> "PAGE_TABLE_BLOCK_SIZE", tmp);
>>> +
>>> + sprintf(buf,
>>> "mmVM_CONTEXT%d_PAGE_TABLE_BASE_ADDR_LO32", (int)vmid);
>>> + page_table_base_addr =
>>> (uint64_t)umr_read_reg_by_name(asic, buf) << 0;
>>> + sprintf(buf,
>>> "mmVM_CONTEXT%d_PAGE_TABLE_BASE_ADDR_HI32", (int)vmid);
>>> + page_table_base_addr |=
>>> (uint64_t)umr_read_reg_by_name(asic, buf) << 32;
>>> +
>>> + DEBUG("VIRT_ADDR = %08llx\n", (unsigned long long)address);
>>> + DEBUG("PAGE_START_ADDR = %08llx\n", (unsigned long
>>> long)page_table_start_addr);
>>> + DEBUG("BASE_ADDR = 0x%08llx\n", (unsigned long
>>> long)page_table_base_addr);
>>> + DEBUG("BASE_SIZE = %lu\n", page_table_size);
>>> + DEBUG("PAGE_TABLE_DEPTH = %d\n", page_table_depth);
>>> +
>>> + address -= page_table_start_addr;
>>> +
>>> + // AI+ allows 0=default (4KB) whereas VI requires it to be
>>> explictly set
>>> to >=4
>>> + page_table_size = page_table_size ? page_table_size : 4;
>>> +
>>> + first = 1;
>>> + while (size) {
>>> + if (page_table_depth >= 1) {
>>> + // page_table_base_addr is not a PDE entry in this
>>> config so shift it out (it's a page address)
>>> + page_table_base_addr <<= 12;
>>> + pte_idx = (address >> (12 + page_table_size -
>>> 4)) &
>>> ((1ULL << (9 + page_table_size)) - 1);
>>> +
>>> + // AI+ supports more than 1 level of PDEs so we
>>> iterate for all of the depths
>>> + pde_address = address;
>>> + while (page_table_depth) {
>>> + // decode addr into pte and pde
>>> selectors...
>>> + pde_idx = (pde_address >>
>>> (page_table_depth*9 + (12 + page_table_size - 4)));
>>> +
>>> + // don't mask the first PDE idx
>>> + if (!first)
>>> + pde_idx &= (1ULL << 9) - 1;
>>> + first = 0;
>>> +
>>> + // read PDE entry
>>> + umr_read_vram(asic, 0xFFFF,
>>> page_table_base_addr + pde_idx * 8, 8, &pde_entry);
>>> +
>>> + // decode PDE values
>>> + pde_fields.frag_size = (pde_entry
>>> >> 59) &
>>> 0x1F;
>>> + pde_fields.pte_base_addr = pde_entry &
>>> 0xFFFFFFFFF000ULL;
>>> + pde_fields.valid = pde_entry & 1;
>>> + DEBUG("pde_idx=%llx, frag_size=%u,
>>> pte_base_addr=0x%llx, valid=%d\n", (unsigned long long)pde_idx,
>>> (unsigned)pde_fields.frag_size, (unsigned long
>>> long)pde_fields.pte_base_addr, (int)pde_fields.valid);
>>> +
>>> + // for the next round the address we're
>>> decoding is the phys address in the currently decoded PDE
>>> + --page_table_depth;
>>> + pde_address = pde_fields.pte_base_addr;
>>> + }
>>> +
>>> + // now read PTE entry for this page
>>> + umr_read_vram(asic, 0xFFFF,
>>> pde_fields.pte_base_addr + pte_idx*8, 8, &pte_entry);
>>> +
>>> + // decode PTE values
>>> + pte_fields.page_base_addr = pte_entry &
>>> 0xFFFFFFFFF000ULL;
>>> + pte_fields.fragment = (pte_entry >> 7) &
>>> 0x1F;
>>> + pte_fields.system = (pte_entry >> 1) & 1;
>>> + pte_fields.valid = pte_entry & 1;
>>> + DEBUG("pte_idx=%llx, page_base_addr=0x%llx,
>>> fragment=%u, system=%d, valid=%d\n", (unsigned long long)pte_idx,
>>> (unsigned long long)pte_fields.page_base_addr,
>>> (unsigned)pte_fields.fragment, (int)pte_fields.system,
>>> (int)pte_fields.valid);
>>> +
>>> + // compute starting address
>>> + start_addr = pte_fields.page_base_addr + (address
>>> & 0xFFF);
>>> + } else {
>>> + // in AI+ the BASE_ADDR is treated like a PDE
>>> entry...
>>> + // decode PDE values
>>> + pde_idx = 0; // unused
>>> + pde_fields.frag_size =
>>> (page_table_base_addr >>
>>> 59) & 0x1F;
>>> + pde_fields.pte_base_addr = page_table_base_addr
>>> & 0xFFFFFFFFF000ULL;
>>> + pde_fields.valid = page_table_base_addr
>>> & 1;
>>> + DEBUG("pde_idx=%llx, frag_size=%u,
>>> pte_base_addr=0x%llx, valid=%d\n", (unsigned long long)pde_idx,
>>> (unsigned)pde_fields.frag_size, (unsigned long
>>> long)pde_fields.pte_base_addr, (int)pde_fields.valid);
>>> +
>>> + // PTE addr = baseaddr[47:6] + (logical -
>>> start) >>
>>> fragsize)
>>> + pte_idx = (address >> (12 +
>>> pde_fields.frag_size));
>>> +
>>> + umr_read_vram(asic, 0xFFFF,
>>> pde_fields.pte_base_addr + pte_idx * 8, 8, &pte_entry);
>>> +
>>> + // decode PTE values
>>> + pte_fields.page_base_addr = pte_entry &
>>> 0xFFFFFFFF000ULL;
>>> + pte_fields.fragment = (pte_entry >> 7) &
>>> 0x1F;
>>> + pte_fields.system = (pte_entry >> 1) & 1;
>>> + pte_fields.valid = pte_entry & 1;
>>> + DEBUG("pte_idx=%llx, page_base_addr=0x%llx,
>>> fragment=%u, system=%d, valid=%d\n", (unsigned long long)pte_idx,
>>> (unsigned long long)pte_fields.page_base_addr,
>>> (unsigned)pte_fields.fragment, (int)pte_fields.system,
>>> (int)pte_fields.valid);
>>> +
>>> + // compute starting address
>>> + start_addr = pte_fields.page_base_addr + (address
>>> & 0xFFF);
>>> + }
>>> +
>>> + // read upto 4K from it
>>> + // TODO: Support page sizes >4KB
>>> + if (((start_addr & 0xFFF) + size) & ~0xFFF) {
>>> + chunk_size = 0x1000 - (start_addr & 0xFFF);
>>> + } else {
>>> + chunk_size = size;
>>> + }
>>> + DEBUG("Computed address we will read from: %s:%llx
>>> (reading: %lu bytes)\n", pte_fields.system ? "sys" : "vram",
>>> (unsigned long
>>> long)start_addr, (unsigned long)chunk_size);
>>> + if (pte_fields.system) {
>>> + if (umr_read_sram(start_addr, chunk_size, pdst)
>>> < 0)
>>> {
>>> + fprintf(stderr, "[ERROR] Cannot read
>>> system
>>> ram, perhaps CONFIG_STRICT_DEVMEM is set in your kernel config?\n");
>>> + fprintf(stderr, "[ERROR] Alternatively
>>> download and install /dev/fmem\n");
>>> + return -1;
>>> + }
>>> + } else {
>>> + if (umr_read_vram(asic, 0xFFFF, start_addr,
>>> chunk_size, pdst) < 0) {
>>> + fprintf(stderr, "[ERROR] Cannot read from
>>> VRAM\n");
>>> + return -1;
>>> + }
>>> + }
>>> + pdst += chunk_size;
>>> + size -= chunk_size;
>>> + address += chunk_size;
>>> + }
>>> + return 0;
>>> +}
>>>
>>> int umr_read_vram(struct umr_asic *asic, uint32_t vmid, uint64_t
>>> address,
>>> uint32_t size, void *dst)
>>> {
>>> @@ -234,8 +399,15 @@ int umr_read_vram(struct umr_asic *asic, uint32_t
>>> vmid, uint64_t address, uint32
>>> return 0;
>>> }
>>>
>>> - if (asic->family == FAMILY_VI)
>>> - return umr_read_vram_vi(asic, vmid, address, size, dst);
>>> + switch (asic->family) {
>>> + case FAMILY_VI:
>>> + return umr_read_vram_vi(asic, vmid, address, size,
>>> dst);
>>> + case FAMILY_AI:
>>> + return umr_read_vram_ai(asic, vmid, address, size,
>>> dst);
>>> + default:
>>> + fprintf(stderr, "[BUG] Unsupported ASIC family
>>> type
>>> for umr_read_vram()\n");
>>> + return -1;
>>> + }
>>>
>>> return 0;
>>> }
>>> --
>>> 2.12.0
>>>
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx at lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
More information about the amd-gfx
mailing list