[PATCH 22/25] drm/amdkfd: Add TC flush on VMID deallocation for Hawaii
Felix Kuehling
Felix.Kuehling at amd.com
Sat Jan 27 01:09:38 UTC 2018
On GFX7 the CP does not perform a TC flush when queues are unmapped.
To avoid TC eviction from accessing an invalid VMID, flush it
explicitly before releasing a VMID.
Signed-off-by: Amber Lin <Amber.Lin at amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>
---
.../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 22 +++++++++-
drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c | 37 ++++++++++++++++
drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 3 ++
drivers/gpu/drm/amd/amdkfd/kfd_process.c | 51 ++++++++++++++++++++++
4 files changed, 112 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index b3b6dab..c18e048 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -142,12 +142,31 @@ static int allocate_vmid(struct device_queue_manager *dqm,
return 0;
}
+static int flush_texture_cache_nocpsch(struct kfd_dev *kdev,
+ struct qcm_process_device *qpd)
+{
+ uint32_t len;
+
+ if (!qpd->ib_kaddr)
+ return -ENOMEM;
+
+ len = pm_create_release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr);
+
+ return kdev->kfd2kgd->submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid,
+ qpd->ib_base, (uint32_t *)qpd->ib_kaddr, len);
+}
+
static void deallocate_vmid(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
struct queue *q)
{
int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd;
+ /* On GFX v7, CP doesn't flush TC at dequeue */
+ if (q->device->device_info->asic_family == CHIP_HAWAII)
+ if (flush_texture_cache_nocpsch(q->device, qpd))
+ pr_err("Failed to flush TC\n");
+
kfd_flush_tlb(qpd_to_pdd(qpd));
/* Release the vmid mapping */
@@ -792,11 +811,12 @@ static void uninitialize(struct device_queue_manager *dqm)
static int start_nocpsch(struct device_queue_manager *dqm)
{
init_interrupts(dqm);
- return 0;
+ return pm_init(&dqm->packets, dqm);
}
static int stop_nocpsch(struct device_queue_manager *dqm)
{
+ pm_uninit(&dqm->packets);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index 0ecbd1f..7614375 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -356,6 +356,43 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
return retval;
}
+/* pm_create_release_mem - Create a RELEASE_MEM packet and return the size
+ * of this packet
+ * @gpu_addr - GPU address of the packet. It's a virtual address.
+ * @buffer - buffer to fill up with the packet. It's a CPU kernel pointer
+ * Return - length of the packet
+ */
+uint32_t pm_create_release_mem(uint64_t gpu_addr, uint32_t *buffer)
+{
+ struct pm4_mec_release_mem *packet;
+
+ WARN_ON(!buffer);
+
+ packet = (struct pm4_mec_release_mem *)buffer;
+ memset(buffer, 0, sizeof(*packet));
+
+ packet->header.u32All = build_pm4_header(IT_RELEASE_MEM,
+ sizeof(*packet));
+
+ packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
+ packet->bitfields2.event_index = event_index___release_mem__end_of_pipe;
+ packet->bitfields2.tcl1_action_ena = 1;
+ packet->bitfields2.tc_action_ena = 1;
+ packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
+ packet->bitfields2.atc = 0;
+
+ packet->bitfields3.data_sel = data_sel___release_mem__send_32_bit_low;
+ packet->bitfields3.int_sel =
+ int_sel___release_mem__send_interrupt_after_write_confirm;
+
+ packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
+ packet->address_hi = upper_32_bits(gpu_addr);
+
+ packet->data_lo = 0;
+
+ return sizeof(*packet) / sizeof(unsigned int);
+}
+
int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
{
pm->dqm = dqm;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 78200ba..050fd00 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -494,6 +494,7 @@ struct qcm_process_device {
/* IB memory */
uint64_t ib_base;
+ void *ib_kaddr;
};
/* KFD Memory Eviction */
@@ -832,6 +833,8 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
void pm_release_ib(struct packet_manager *pm);
+uint32_t pm_create_release_mem(uint64_t gpu_addr, uint32_t *buffer);
+
uint64_t kfd_get_number_elems(struct kfd_dev *kfd);
/* Events */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 12101fb..25d7dfe 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -151,6 +151,53 @@ static int kfd_process_alloc_gpuvm(struct kfd_process *p,
return err;
}
+/* kfd_process_reserve_ib_mem - Reserve memory inside the process for IB usage
+ * The memory reserved is for KFD to submit IB to AMDGPU from kernel.
+ * If the memory is reserved successfully, ib_kaddr will have
+ * the CPU/kernel address. Check ib_kaddr before accessing the
+ * memory.
+ */
+static int kfd_process_reserve_ib_mem(struct kfd_process *p)
+{
+ int ret = 0;
+ struct kfd_process_device *temp, *pdd = NULL;
+ struct kfd_dev *kdev = NULL;
+ struct qcm_process_device *qpd = NULL;
+ void *kaddr;
+ uint32_t flags = ALLOC_MEM_FLAGS_GTT |
+ ALLOC_MEM_FLAGS_NO_SUBSTITUTE |
+ ALLOC_MEM_FLAGS_WRITABLE |
+ ALLOC_MEM_FLAGS_EXECUTABLE;
+
+ list_for_each_entry_safe(pdd, temp, &p->per_device_data,
+ per_device_list) {
+ kdev = pdd->dev;
+ qpd = &pdd->qpd;
+ if (qpd->ib_kaddr)
+ continue;
+
+ if (qpd->ib_base) { /* is dGPU */
+ ret = kfd_process_alloc_gpuvm(p, kdev,
+ qpd->ib_base, PAGE_SIZE,
+ &kaddr, pdd, flags);
+ if (!ret)
+ qpd->ib_kaddr = kaddr;
+ else
+ /* In case of error, the kfd_bos for some pdds
+ * which are already allocated successfully
+ * will be freed in upper level function
+ * i.e. create_process().
+ */
+ return ret;
+ } else {
+ /* FIXME: Support APU */
+ continue;
+ }
+ }
+
+ return 0;
+}
+
struct kfd_process *kfd_create_process(struct file *filep)
{
struct kfd_process *process;
@@ -499,6 +546,9 @@ static struct kfd_process *create_process(const struct task_struct *thread,
INIT_DELAYED_WORK(&process->restore_work, restore_process_worker);
process->last_restore_timestamp = get_jiffies_64();
+ err = kfd_process_reserve_ib_mem(process);
+ if (err)
+ goto err_reserve_ib_mem;
err = kfd_process_init_cwsr(process, filep);
if (err)
goto err_init_cwsr;
@@ -506,6 +556,7 @@ static struct kfd_process *create_process(const struct task_struct *thread,
return process;
err_init_cwsr:
+err_reserve_ib_mem:
kfd_process_free_outstanding_kfd_bos(process);
kfd_process_destroy_pdds(process);
err_init_apertures:
--
2.7.4
More information about the amd-gfx
mailing list