[PATCH umr 2/3] Generalize decoding of PDEs and PTEs in AI+
Joseph Greathouse
Joseph.Greathouse at amd.com
Thu Jun 17 19:25:39 UTC 2021
Brings decoding of PDEs and PTEs for AI+ chips into their own
functions, so that we don't end up with subtly different decoding
bugs in the variety of places such decodings are done.
Also fixes a minor bug where we were pulling PTE.PRT from bit 61
instead of the proper bit 51.
Signed-off-by: Joseph Greathouse <Joseph.Greathouse at amd.com>
---
src/lib/read_vram.c | 187 ++++++++++++++++++++++++++------------------
1 file changed, 109 insertions(+), 78 deletions(-)
diff --git a/src/lib/read_vram.c b/src/lib/read_vram.c
index 049acd4..2998873 100644
--- a/src/lib/read_vram.c
+++ b/src/lib/read_vram.c
@@ -317,6 +317,104 @@ static uint64_t log2_vm_size(uint64_t page_table_start_addr, uint64_t page_table
return vm_bits;
}
+typedef struct {
+ uint64_t
+ frag_size,
+ pte_base_addr,
+ valid,
+ system,
+ coherent,
+ pte,
+ further;
+} pde_fields_ai_t;
+
+typedef struct {
+ uint64_t
+ valid,
+ system,
+ coherent,
+ tmz,
+ execute,
+ read,
+ write,
+ fragment,
+ page_base_addr,
+ prt,
+ pde,
+ further,
+ mtype;
+} pte_fields_ai_t;
+
+/*
+ * PDE format on AI:
+ * 63:59 block fragment size
+ * 58:55 reserved
+ * But if bit 56 is set, this is a PTE with 'further' set,
+ * which makes it act like a PDE.
+ * 54 pde-is-pte
+ * 53:48 reserved
+ * 47:6 physical base address of PTE
+ * 2 cache coherent/snoop
+ * 1 system
+ * 0 valid
+ */
+static pde_fields_ai_t decode_pde_entry_ai(uint64_t pde_entry)
+{
+ pde_fields_ai_t pde_fields;
+ pde_fields.frag_size = (pde_entry >> 59) & 0x1F;
+ pde_fields.pte_base_addr = pde_entry & 0xFFFFFFFFFFC0ULL;
+ pde_fields.valid = pde_entry & 1;
+ pde_fields.system = (pde_entry >> 1) & 1;
+ pde_fields.coherent = (pde_entry >> 2) & 1;
+ pde_fields.pte = (pde_entry >> 54) & 1;
+ pde_fields.further = (pde_entry >> 56) & 1;
+ return pde_fields;
+}
+
+/*
+ * PTE format on AI and PI:
+ * 58:57 mtype
+ * 56 further
+ * 54 reserved
+ * But if it is set, then this is actually a PDE with 'P'
+ * bit set, which makes the PDE act like a PTE.
+ * 51 prt
+ * 47:12 4k physical page base address
+ * 11:7 fragment
+ * 6 write
+ * 5 read
+ * 4 exe
+ * 3 tmz (PI+)
+ * 2 snooped / coherent
+ * 1 system
+ * 0 valid
+ */
+static pte_fields_ai_t decode_pte_entry_ai(uint64_t pte_entry)
+{
+ pte_fields_ai_t pte_fields;
+ pte_fields.valid = pte_entry & 1;
+ pte_fields.system = (pte_entry >> 1) & 1;
+ pte_fields.coherent = (pte_entry >> 2) & 1;
+ pte_fields.tmz = (pte_entry >> 3) & 1;
+ pte_fields.execute = (pte_entry >> 4) & 1;
+ pte_fields.read = (pte_entry >> 5) & 1;
+ pte_fields.write = (pte_entry >> 6) & 1;
+ pte_fields.fragment = (pte_entry >> 7) & 0x1F;
+ pte_fields.prt = (pte_entry >> 51) & 1;
+ pte_fields.pde = (pte_entry >> 54) & 1;
+ pte_fields.further = (pte_entry >> 56) & 1;
+ pte_fields.mtype = (pte_entry >> 57) & 3;
+
+ // PTEs hold physical address in 47:12
+ // PDEs hold physical address in 47:6, so if this is a PTE-as-PDE (further), need a differnt mask
+ if (pte_fields.further)
+ pte_fields.page_base_addr = pte_entry & 0xFFFFFFFFFFC0ULL;
+ else
+ pte_fields.page_base_addr = pte_entry & 0xFFFFFFFFF000ULL;
+
+ return pte_fields;
+}
+
/**
* umr_access_vram_ai - Access GPU mapped memory for GFX9+ platforms
*/
@@ -352,24 +450,9 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid,
mmMC_VM_AGP_BOT,
mmMC_VM_AGP_TOP;
} registers;
- struct {
- uint64_t
- frag_size,
- pte_base_addr,
- valid,
- system,
- cache,
- pte;
- } pde_fields, pde_array[8];
- struct {
- uint64_t
- page_base_addr,
- fragment,
- system,
- valid,
- prt,
- further;
- } pte_fields;
+
+ pde_fields_ai_t pde_fields, pde_array[8];
+ pte_fields_ai_t pte_fields;
char buf[64];
unsigned char *pdst = dst;
char *hub, *vm0prefix, *regprefix;
@@ -379,27 +462,6 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid,
memset(®isters, 0, sizeof registers);
memset(&pde_array, 0xff, sizeof pde_array);
- /*
- * PTE format on AI:
- * 47:12 4k physical page base address
- * 11:7 fragment
- * 6 write
- * 5 read
- * 4 exe
- * 3 reserved
- * 2 snooped
- * 1 system
- * 0 valid
- *
- * PDE format on AI:
- * 63:59 block fragment size
- * 58:40 reserved
- * 47:6 physical base address of PTE
- * 2 cache coherent/snoop
- * 1 system
- * 0 valid
- */
-
hubid = vmid & 0xFF00;
vmid &= 0xFF;
@@ -627,13 +689,7 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid,
further = 0;
if (page_table_depth >= 1) {
- // decode PDE values
- pde_fields.frag_size = (pde_entry >> 59) & 0x1F;
- pde_fields.pte_base_addr = pde_entry & 0xFFFFFFFFF000ULL;
- pde_fields.valid = pde_entry & 1;
- pde_fields.system = (pde_entry >> 1) & 1;
- pde_fields.cache = (pde_entry >> 2) & 1;
- pde_fields.pte = (pde_entry >> 54) & 1;
+ pde_fields = decode_pde_entry_ai(pde_entry);
// AI+ supports more than 1 level of PDEs so we iterate for all of the depths
pde_address = pde_fields.pte_base_addr;
@@ -663,7 +719,7 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid,
pde_fields.pte_base_addr,
pde_fields.valid,
pde_fields.system,
- pde_fields.cache,
+ pde_fields.coherent,
pde_fields.pte);
memcpy(&pde_array[pde_cnt++], &pde_fields, sizeof pde_fields);
@@ -712,13 +768,7 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid,
}
}
- // decode PDE values
- pde_fields.frag_size = (pde_entry >> 59) & 0x1F;
- pde_fields.pte_base_addr = pde_entry & 0xFFFFFFFFF000ULL;
- pde_fields.valid = pde_entry & 1;
- pde_fields.system = (pde_entry >> 1) & 1;
- pde_fields.cache = (pde_entry >> 2) & 1;
- pde_fields.pte = (pde_entry >> 54) & 1;
+ pde_fields = decode_pde_entry_ai(pde_entry);
if (current_depth == 1) {
pde0_block_fragment_size = pde_fields.frag_size;
/*
@@ -751,7 +801,7 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid,
pde_fields.pte_base_addr,
pde_fields.valid,
pde_fields.system,
- pde_fields.cache,
+ pde_fields.coherent,
pde_fields.pte,
pde_fields.frag_size);
memcpy(&pde_array[pde_cnt++], &pde_fields, sizeof pde_fields);
@@ -817,14 +867,8 @@ pte_further:
return -1;
}
- // decode PTE values
pde_is_pte:
- pte_fields.fragment = (pte_entry >> 7) & 0x1F;
- pte_fields.system = (pte_entry >> 1) & 1;
- pte_fields.valid = pte_entry & 1;
- pte_fields.prt = (pte_entry >> 61) & 1;
- pte_fields.further = (pte_entry >> 56) & 1;
- pte_fields.page_base_addr = pte_entry & (pte_fields.further ? 0xFFFFFFFFFFC0ULL : 0xFFFFFFFFF000ULL);
+ pte_fields = decode_pte_entry_ai(pte_entry);
if (asic->options.verbose)
asic->mem_funcs.vm_message("%s %s@{0x%" PRIx64 "/%" PRIx64"}==0x%016" PRIx64 ", VA=0x%012" PRIx64 ", PBA==0x%012" PRIx64 ", V=%" PRIu64 ", S=%" PRIu64 ", P=%" PRIu64 ", FS=%" PRIu64 ", F=%" PRIu64 "\n",
@@ -901,12 +945,7 @@ pde_is_pte:
va_mask &= (upper_mask & ~pte_page_mask);
// grab PTE base address and other data from the PTE that has the F bit set.
- pde_fields.frag_size = (pte_entry >> 59) & 0x1F;
- pde_fields.pte_base_addr = pte_entry & 0xFFFFFFFFFFC0ULL;
- pde_fields.valid = pte_entry & 1;
- pde_fields.system = (pte_entry >> 1) & 1;
- pde_fields.cache = (pte_entry >> 2) & 1;
- pde_fields.pte = 0;
+ pde_fields = decode_pde_entry_ai(pte_entry);
further = 1;
goto pte_further;
}
@@ -928,12 +967,9 @@ pde_is_pte:
} else {
// in AI+ the BASE_ADDR is treated like a PDE entry...
// decode PDE values
- pde_fields.frag_size = (page_table_base_addr >> 59) & 0x1F;
+ pde_fields = decode_pde_entry_ai(pde_entry);
pde0_block_fragment_size = pde_fields.frag_size;
pte_page_mask = (1ULL << (12 + pde0_block_fragment_size)) - 1;
- pde_fields.pte_base_addr = page_table_base_addr & 0xFFFFFFFFF000ULL;
- pde_fields.system = (page_table_base_addr >> 1) & 1;
- pde_fields.valid = page_table_base_addr & 1;
if ((asic->options.no_fold_vm_decode || memcmp(&pde_array[0], &pde_fields, sizeof pde_fields)) && asic->options.verbose)
asic->mem_funcs.vm_message("PDE=0x%016" PRIx64 ", PBA==0x%012" PRIx64 ", V=%" PRIu64 ", S=%" PRIu64 ", FS=%" PRIu64 "\n",
@@ -953,12 +989,7 @@ pde_is_pte:
if (umr_read_vram(asic, UMR_LINEAR_HUB, pde_fields.pte_base_addr + pte_idx * 8, 8, &pte_entry) < 0)
return -1;
- // decode PTE values
- pte_fields.page_base_addr = pte_entry & 0xFFFFFFFFF000ULL;
- pte_fields.fragment = (pte_entry >> 7) & 0x1F;
- pte_fields.system = (pte_entry >> 1) & 1;
- pte_fields.valid = pte_entry & 1;
- pte_fields.prt = 0;
+ pte_fields = decode_pte_entry_ai(pte_entry);
if (asic->options.verbose)
asic->mem_funcs.vm_message("\\-> PTE=0x%016" PRIx64 ", VA=0x%016" PRIx64 ", PBA==0x%012" PRIx64 ", F=%" PRIu64 ", V=%" PRIu64 ", S=%" PRIu64 "\n",
--
2.20.1
More information about the amd-gfx
mailing list