[PATCH 3/3] drm/amdkfd: optimize memory mapping latency
Eric Huang
jinhuieric.huang at amd.com
Fri May 28 20:49:06 UTC 2021
1. conditionally flush TLBs after map.
2. add heavy weight TLBs flushing after unmap.
Signed-off-by: Eric Huang <jinhuieric.huang at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 +
.../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 2 ++
drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 1 +
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 +-
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 24 +++++++++++--------
.../drm/amd/amdkfd/kfd_device_queue_manager.c | 6 ++---
drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 +-
drivers/gpu/drm/amd/amdkfd/kfd_process.c | 4 ++--
8 files changed, 25 insertions(+), 17 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 2560977760b3..997258c24ef2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -85,6 +85,7 @@ struct kgd_mem {
bool aql_queue;
bool is_imported;
+ bool table_freed;
};
/* KFD Memory Eviction */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 385c33675227..8ac0d849fd3f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1132,6 +1132,8 @@ static int update_gpuvm_pte(struct kgd_mem *mem,
return ret;
}
+ mem->table_freed = bo_va->table_freed;
+
return amdgpu_sync_fence(sync, bo_va->last_pt_update);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 36e7f088d4ee..0e0f27f779cd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -87,6 +87,7 @@ struct amdgpu_bo_va {
bool cleared;
bool is_xgmi;
+ bool table_freed;
};
struct amdgpu_bo {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index b1bdc89cb5d1..57793483c8d3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1897,7 +1897,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
resv, mapping->start,
mapping->last, update_flags,
mapping->offset, mem,
- pages_addr, last_update, NULL,
+ pages_addr, last_update, &bo_va->table_freed,
vram_base_offset);
if (r)
return r;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 960913a35ee4..c45ccd1d03c0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1658,16 +1658,18 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
}
/* Flush TLBs after waiting for the page table updates to complete */
- for (i = 0; i < args->n_devices; i++) {
- peer = kfd_device_by_id(devices_arr[i]);
- if (WARN_ON_ONCE(!peer))
- continue;
- peer_pdd = kfd_get_process_device_data(peer, p);
- if (WARN_ON_ONCE(!peer_pdd))
- continue;
- if (!amdgpu_read_lock(peer->ddev, true)) {
- kfd_flush_tlb(peer_pdd);
- amdgpu_read_unlock(peer->ddev);
+ if (((struct kgd_mem *)mem)->table_freed) {
+ for (i = 0; i < args->n_devices; i++) {
+ peer = kfd_device_by_id(devices_arr[i]);
+ if (WARN_ON_ONCE(!peer))
+ continue;
+ peer_pdd = kfd_get_process_device_data(peer, p);
+ if (WARN_ON_ONCE(!peer_pdd))
+ continue;
+ if (!amdgpu_read_lock(peer->ddev, true)) {
+ kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY);
+ amdgpu_read_unlock(peer->ddev);
+ }
}
}
@@ -1766,6 +1768,8 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
amdgpu_read_unlock(peer->ddev);
goto unmap_memory_from_gpu_failed;
}
+ ((struct kgd_mem *)mem)->table_freed = false;
+ kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT);
amdgpu_read_unlock(peer->ddev);
args->n_success = i+1;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index c1bea1f7627b..a4920bc5cfbc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -278,7 +278,7 @@ static int allocate_vmid(struct device_queue_manager *dqm,
qpd->vmid,
qpd->page_table_base);
/* invalidate the VM context after pasid and vmid mapping is set up */
- kfd_flush_tlb(qpd_to_pdd(qpd));
+ kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
if (dqm->dev->kfd2kgd->set_scratch_backing_va)
dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->kgd,
@@ -314,7 +314,7 @@ static void deallocate_vmid(struct device_queue_manager *dqm,
if (flush_texture_cache_nocpsch(q->device, qpd))
pr_err("Failed to flush TC\n");
- kfd_flush_tlb(qpd_to_pdd(qpd));
+ kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
/* Release the vmid mapping */
set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
@@ -885,7 +885,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
dqm->dev->kgd,
qpd->vmid,
qpd->page_table_base);
- kfd_flush_tlb(pdd);
+ kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY);
}
/* Take a safe reference to the mm_struct, which may otherwise
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index ecdd5e782b81..edce3ecf207d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -1338,7 +1338,7 @@ void kfd_signal_reset_event(struct kfd_dev *dev);
void kfd_signal_poison_consumed_event(struct kfd_dev *dev, u32 pasid);
-void kfd_flush_tlb(struct kfd_process_device *pdd);
+void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type);
int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 4ab9da288f90..a03373743a3d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -2161,7 +2161,7 @@ int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot);
}
-void kfd_flush_tlb(struct kfd_process_device *pdd)
+void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type)
{
struct kfd_dev *dev = pdd->dev;
@@ -2174,7 +2174,7 @@ void kfd_flush_tlb(struct kfd_process_device *pdd)
pdd->qpd.vmid);
} else {
amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd,
- pdd->process->pasid, TLB_FLUSH_LEGACY);
+ pdd->process->pasid, type);
}
}
--
2.25.1
More information about the amd-gfx
mailing list