[PATCH umr] Add initial AI VM decoding
Tom St Denis
tom.stdenis at amd.com
Thu Apr 6 17:52:35 UTC 2017
Tested with VMID0 decodings just fine. Haven't tried VMID1-15 yet.
Signed-off-by: Tom St Denis <tom.stdenis at amd.com>
---
src/lib/read_vram.c | 180 ++++++++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 176 insertions(+), 4 deletions(-)
diff --git a/src/lib/read_vram.c b/src/lib/read_vram.c
index e2087a252c10..4c74f4521857 100644
--- a/src/lib/read_vram.c
+++ b/src/lib/read_vram.c
@@ -77,7 +77,6 @@ static int umr_read_sram(uint64_t address, uint32_t size, void *dst)
return -1;
}
-
static int umr_read_vram_vi(struct umr_asic *asic, uint32_t vmid, uint64_t address, uint32_t size, void *dst)
{
uint64_t start_addr, page_table_start_addr, page_table_base_addr,
@@ -144,7 +143,7 @@ static int umr_read_vram_vi(struct umr_asic *asic, uint32_t vmid, uint64_t addre
if (page_table_depth == 1) {
// decode addr into pte and pde selectors...
pde_idx = (address >> (12 + 9 + page_table_size)) & ((1ULL << (40 - 12 - 9 - page_table_size)) - 1);
- pte_idx = (address >> 12) & ((1ULL << (9 + page_table_size)) - 1);
+ pte_idx = (address >> (12 + page_table_size - 4)) & ((1ULL << (9 + page_table_size)) - 1);
// read PDE entry
umr_read_vram(asic, 0xFFFF, page_table_base_addr + pde_idx * 8, 8, &pde_entry);
@@ -210,6 +209,172 @@ static int umr_read_vram_vi(struct umr_asic *asic, uint32_t vmid, uint64_t addre
return 0;
}
+static int umr_read_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t address, uint32_t size, void *dst)
+{
+ uint64_t start_addr, page_table_start_addr, page_table_base_addr,
+ page_table_size, pte_idx, pde_idx, pte_entry, pde_entry,
+ pde_address;
+ uint32_t chunk_size, tmp;
+ int page_table_depth, first;
+ struct {
+ uint64_t
+ frag_size,
+ pte_base_addr,
+ valid;
+ } pde_fields;
+ struct {
+ uint64_t
+ page_base_addr,
+ fragment,
+ system,
+ valid;
+ } pte_fields;
+ char buf[64];
+ unsigned char *pdst = dst;
+
+ /*
+ * PTE format on VI:
+ * 63:40 reserved
+ * 39:12 4k physical page base address
+ * 11:7 fragment
+ * 6 write
+ * 5 read
+ * 4 exe
+ * 3 reserved
+ * 2 snooped
+ * 1 system
+ * 0 valid
+ *
+ * PDE format on VI:
+ * 63:59 block fragment size
+ * 58:40 reserved
+ * 39:1 physical base address of PTE
+ * bits 5:1 must be 0.
+ * 0 valid
+ */
+
+ // read vm registers
+ sprintf(buf, "mmVM_CONTEXT%d_PAGE_TABLE_START_ADDR_LO32", (int)vmid);
+ page_table_start_addr = (uint64_t)umr_read_reg_by_name(asic, buf) << 12;
+ sprintf(buf, "mmVM_CONTEXT%d_PAGE_TABLE_START_ADDR_HI32", (int)vmid);
+ page_table_start_addr |= (uint64_t)umr_read_reg_by_name(asic, buf) << 44;
+
+ sprintf(buf, "mmVM_CONTEXT%d_CNTL", (int)vmid);
+ tmp = umr_read_reg_by_name(asic, buf);
+ page_table_depth = umr_bitslice_reg_by_name(asic, buf, "PAGE_TABLE_DEPTH", tmp);
+ page_table_size = umr_bitslice_reg_by_name(asic, buf, "PAGE_TABLE_BLOCK_SIZE", tmp);
+
+ sprintf(buf, "mmVM_CONTEXT%d_PAGE_TABLE_BASE_ADDR_LO32", (int)vmid);
+ page_table_base_addr = (uint64_t)umr_read_reg_by_name(asic, buf) << 0;
+ sprintf(buf, "mmVM_CONTEXT%d_PAGE_TABLE_BASE_ADDR_HI32", (int)vmid);
+ page_table_base_addr |= (uint64_t)umr_read_reg_by_name(asic, buf) << 32;
+
+ DEBUG("VIRT_ADDR = %08llx\n", (unsigned long long)address);
+ DEBUG("PAGE_START_ADDR = %08llx\n", (unsigned long long)page_table_start_addr);
+ DEBUG("BASE_ADDR = 0x%08llx\n", (unsigned long long)page_table_base_addr);
+ DEBUG("BASE_SIZE = %lu\n", page_table_size);
+ DEBUG("PAGE_TABLE_DEPTH = %d\n", page_table_depth);
+
+ address -= page_table_start_addr;
+
+ // AI+ allows 0=default (4KB) whereas VI requires it to be explictly set to >=4
+ page_table_size = page_table_size ? page_table_size : 4;
+
+ first = 1;
+ while (size) {
+ if (page_table_depth >= 1) {
+ // page_table_base_addr is not a PDE entry in this config so shift it out (it's a page address)
+ page_table_base_addr <<= 12;
+ pte_idx = (address >> (12 + page_table_size - 4)) & ((1ULL << (9 + page_table_size)) - 1);
+
+ // AI+ supports more than 1 level of PDEs so we iterate for all of the depths
+ pde_address = address;
+ while (page_table_depth) {
+ // decode addr into pte and pde selectors...
+ pde_idx = (pde_address >> (page_table_depth*9 + (12 + page_table_size - 4)));
+
+ // don't mask the first PDE idx
+ if (!first)
+ pde_idx &= (1ULL << 9) - 1;
+ first = 0;
+
+ // read PDE entry
+ umr_read_vram(asic, 0xFFFF, page_table_base_addr + pde_idx * 8, 8, &pde_entry);
+
+ // decode PDE values
+ pde_fields.frag_size = (pde_entry >> 59) & 0x1F;
+ pde_fields.pte_base_addr = pde_entry & 0xFFFFFFFFF000ULL;
+ pde_fields.valid = pde_entry & 1;
+ DEBUG("pde_idx=%llx, frag_size=%u, pte_base_addr=0x%llx, valid=%d\n", (unsigned long long)pde_idx, (unsigned)pde_fields.frag_size, (unsigned long long)pde_fields.pte_base_addr, (int)pde_fields.valid);
+
+ // for the next round the address we're decoding is the phys address in the currently decoded PDE
+ --page_table_depth;
+ pde_address = pde_fields.pte_base_addr;
+ }
+
+ // now read PTE entry for this page
+ umr_read_vram(asic, 0xFFFF, pde_fields.pte_base_addr + pte_idx*8, 8, &pte_entry);
+
+ // decode PTE values
+ pte_fields.page_base_addr = pte_entry & 0xFFFFFFFFF000ULL;
+ pte_fields.fragment = (pte_entry >> 7) & 0x1F;
+ pte_fields.system = (pte_entry >> 1) & 1;
+ pte_fields.valid = pte_entry & 1;
+ DEBUG("pte_idx=%llx, page_base_addr=0x%llx, fragment=%u, system=%d, valid=%d\n", (unsigned long long)pte_idx, (unsigned long long)pte_fields.page_base_addr, (unsigned)pte_fields.fragment, (int)pte_fields.system, (int)pte_fields.valid);
+
+ // compute starting address
+ start_addr = pte_fields.page_base_addr + (address & 0xFFF);
+ } else {
+ // in AI+ the BASE_ADDR is treated like a PDE entry...
+ // decode PDE values
+ pde_idx = 0; // unused
+ pde_fields.frag_size = (page_table_base_addr >> 59) & 0x1F;
+ pde_fields.pte_base_addr = page_table_base_addr & 0xFFFFFFFFF000ULL;
+ pde_fields.valid = page_table_base_addr & 1;
+ DEBUG("pde_idx=%llx, frag_size=%u, pte_base_addr=0x%llx, valid=%d\n", (unsigned long long)pde_idx, (unsigned)pde_fields.frag_size, (unsigned long long)pde_fields.pte_base_addr, (int)pde_fields.valid);
+
+ // PTE addr = baseaddr[47:6] + (logical - start) >> fragsize)
+ pte_idx = (address >> (12 + pde_fields.frag_size));
+
+ umr_read_vram(asic, 0xFFFF, pde_fields.pte_base_addr + pte_idx * 8, 8, &pte_entry);
+
+ // decode PTE values
+ pte_fields.page_base_addr = pte_entry & 0xFFFFFFFF000ULL;
+ pte_fields.fragment = (pte_entry >> 7) & 0x1F;
+ pte_fields.system = (pte_entry >> 1) & 1;
+ pte_fields.valid = pte_entry & 1;
+ DEBUG("pte_idx=%llx, page_base_addr=0x%llx, fragment=%u, system=%d, valid=%d\n", (unsigned long long)pte_idx, (unsigned long long)pte_fields.page_base_addr, (unsigned)pte_fields.fragment, (int)pte_fields.system, (int)pte_fields.valid);
+
+ // compute starting address
+ start_addr = pte_fields.page_base_addr + (address & 0xFFF);
+ }
+
+ // read upto 4K from it
+ // TODO: Support page sizes >4KB
+ if (((start_addr & 0xFFF) + size) & ~0xFFF) {
+ chunk_size = 0x1000 - (start_addr & 0xFFF);
+ } else {
+ chunk_size = size;
+ }
+ DEBUG("Computed address we will read from: %s:%llx (reading: %lu bytes)\n", pte_fields.system ? "sys" : "vram", (unsigned long long)start_addr, (unsigned long)chunk_size);
+ if (pte_fields.system) {
+ if (umr_read_sram(start_addr, chunk_size, pdst) < 0) {
+ fprintf(stderr, "[ERROR] Cannot read system ram, perhaps CONFIG_STRICT_DEVMEM is set in your kernel config?\n");
+ fprintf(stderr, "[ERROR] Alternatively download and install /dev/fmem\n");
+ return -1;
+ }
+ } else {
+ if (umr_read_vram(asic, 0xFFFF, start_addr, chunk_size, pdst) < 0) {
+ fprintf(stderr, "[ERROR] Cannot read from VRAM\n");
+ return -1;
+ }
+ }
+ pdst += chunk_size;
+ size -= chunk_size;
+ address += chunk_size;
+ }
+ return 0;
+}
int umr_read_vram(struct umr_asic *asic, uint32_t vmid, uint64_t address, uint32_t size, void *dst)
{
@@ -234,8 +399,15 @@ int umr_read_vram(struct umr_asic *asic, uint32_t vmid, uint64_t address, uint32
return 0;
}
- if (asic->family == FAMILY_VI)
- return umr_read_vram_vi(asic, vmid, address, size, dst);
+ switch (asic->family) {
+ case FAMILY_VI:
+ return umr_read_vram_vi(asic, vmid, address, size, dst);
+ case FAMILY_AI:
+ return umr_read_vram_ai(asic, vmid, address, size, dst);
+ default:
+ fprintf(stderr, "[BUG] Unsupported ASIC family type for umr_read_vram()\n");
+ return -1;
+ }
return 0;
}
--
2.12.0
More information about the amd-gfx
mailing list