[PATCH 2/5] drm/amdgpu: export function to flush TLB via pasid
Felix Kuehling
felix.kuehling at amd.com
Fri Dec 20 21:32:49 UTC 2019
On 2019-12-20 1:24, Alex Sierra wrote:
> This can be used directly from amdgpu and amdkfd to invalidate
> TLB through pasid.
> It supports gmc v7, v8, v9 and v10.
Two small corrections inline to make the behaviour between KIQ and
MMIO-based flushing consistent. Looks good otherwise.
>
> Change-Id: I6563a8eba2e42d1a67fa2547156c20da41d1e490
> Signed-off-by: Alex Sierra <alex.sierra at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 6 ++
> drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 81 ++++++++++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 33 ++++++++++
> drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 34 ++++++++++
> drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 84 +++++++++++++++++++++++++
> 5 files changed, 238 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> index b499a3de8bb6..b6413a56f546 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
> @@ -92,6 +92,9 @@ struct amdgpu_gmc_funcs {
> /* flush the vm tlb via mmio */
> void (*flush_gpu_tlb)(struct amdgpu_device *adev, uint32_t vmid,
> uint32_t vmhub, uint32_t flush_type);
> + /* flush the vm tlb via pasid */
> + int (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid,
> + uint32_t flush_type, bool all_hub);
> /* flush the vm tlb via ring */
> uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid,
> uint64_t pd_addr);
> @@ -216,6 +219,9 @@ struct amdgpu_gmc {
> };
>
> #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type)))
> +#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub) \
> + ((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \
> + ((adev), (pasid), (type), (allhub)))
> #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
> #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
> #define amdgpu_gmc_map_mtype(adev, flags) (adev)->gmc.gmc_funcs->map_mtype((adev),(flags))
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index f5725336a5f2..b1a5408a8d7e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -30,6 +30,8 @@
> #include "hdp/hdp_5_0_0_sh_mask.h"
> #include "gc/gc_10_1_0_sh_mask.h"
> #include "mmhub/mmhub_2_0_0_sh_mask.h"
> +#include "athub/athub_2_0_0_sh_mask.h"
> +#include "athub/athub_2_0_0_offset.h"
> #include "dcn/dcn_2_0_0_offset.h"
> #include "dcn/dcn_2_0_0_sh_mask.h"
> #include "oss/osssys_5_0_0_offset.h"
> @@ -37,6 +39,7 @@
> #include "navi10_enum.h"
>
> #include "soc15.h"
> +#include "soc15d.h"
> #include "soc15_common.h"
>
> #include "nbio_v2_3.h"
> @@ -234,6 +237,48 @@ static bool gmc_v10_0_use_invalidate_semaphore(struct amdgpu_device *adev,
> (!amdgpu_sriov_vf(adev)));
> }
>
> +static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info(
> + struct amdgpu_device *adev,
> + uint8_t vmid, uint16_t *p_pasid)
> +{
> + uint32_t value;
> +
> + value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
> + + vmid);
> + *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
> +
> + return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
> +}
> +
> +static int gmc_v10_0_invalidate_tlbs_with_kiq(struct amdgpu_device *adev,
> + uint16_t pasid, uint32_t flush_type,
> + bool all_hub)
> +{
> + signed long r;
> + uint32_t seq;
> + struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
> +
> + spin_lock(&adev->gfx.kiq.ring_lock);
> + amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
> + amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
> + amdgpu_ring_write(ring,
> + PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
> + PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
> + PACKET3_INVALIDATE_TLBS_PASID(pasid) |
> + PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
> + amdgpu_fence_emit_polling(ring, &seq);
> + amdgpu_ring_commit(ring);
> + spin_unlock(&adev->gfx.kiq.ring_lock);
> +
> + r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
> + if (r < 1) {
> + DRM_ERROR("wait for kiq fence error: %ld.\n", r);
> + return -ETIME;
> + }
> +
> + return 0;
> +}
> +
> /*
> * GART
> * VMID 0 is the physical GPU addresses as used by the kernel.
> @@ -380,6 +425,41 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
> DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
> }
>
> +/**
> + * gmc_v10_0_flush_gpu_tlb_pasid - tlb flush via pasid
> + *
> + * @adev: amdgpu_device pointer
> + * @pasid: pasid to be flush
> + *
> + * Flush the TLB for the requested pasid.
> + */
> +static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> + uint16_t pasid, uint32_t flush_type,
> + bool all_hub)
> +{
> + int vmid;
> + uint16_t queried_pasid;
> + bool ret;
> + struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
> +
> + if (amdgpu_emu_mode == 0 && ring->sched.ready)
> + return gmc_v10_0_invalidate_tlbs_with_kiq(adev,
> + pasid, flush_type, all_hub);
> +
> + for (vmid = 1; vmid < 16; vmid++) {
> +
> + ret = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
> + &queried_pasid);
> + if (ret && queried_pasid == pasid) {
> + amdgpu_gmc_flush_gpu_tlb(adev, vmid,
> + AMDGPU_GFXHUB_0, 0);
This should honor the all_hub flag.
Also, this calls the function through the function pointer, which is
unnecessary. You know that you need the gfx_10 version of the function,
so you can call gmc_v10_0_flush_gpu_tlb directly here.
> + break;
> + }
> + }
> +
> + return 0;
> +}
> +
> static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
> unsigned vmid, uint64_t pd_addr)
> {
> @@ -531,6 +611,7 @@ static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev,
>
> static const struct amdgpu_gmc_funcs gmc_v10_0_gmc_funcs = {
> .flush_gpu_tlb = gmc_v10_0_flush_gpu_tlb,
> + .flush_gpu_tlb_pasid = gmc_v10_0_flush_gpu_tlb_pasid,
> .emit_flush_gpu_tlb = gmc_v10_0_emit_flush_gpu_tlb,
> .emit_pasid_mapping = gmc_v10_0_emit_pasid_mapping,
> .map_mtype = gmc_v10_0_map_mtype,
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> index f08e5330642d..19d5b133e1d7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
> @@ -418,6 +418,38 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
> return 0;
> }
>
> +/**
> + * gmc_v7_0_flush_gpu_tlb_pasid - tlb flush via pasid
> + *
> + * @adev: amdgpu_device pointer
> + * @pasid: pasid to be flush
> + *
> + * Flush the TLB for the requested pasid.
> + */
> +static int gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> + uint16_t pasid, uint32_t flush_type,
> + bool all_hub)
> +{
> + int vmid;
> + unsigned int tmp;
> +
> + if (adev->in_gpu_reset)
> + return -EIO;
> +
> + for (vmid = 1; vmid < 16; vmid++) {
> +
> + tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
> + if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
> + (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
> + WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
> + RREG32(mmVM_INVALIDATE_RESPONSE);
> + break;
> + }
> + }
> +
> + return 0;
> +}
> +
> /*
> * GART
> * VMID 0 is the physical GPU addresses as used by the kernel.
> @@ -1333,6 +1365,7 @@ static const struct amd_ip_funcs gmc_v7_0_ip_funcs = {
>
> static const struct amdgpu_gmc_funcs gmc_v7_0_gmc_funcs = {
> .flush_gpu_tlb = gmc_v7_0_flush_gpu_tlb,
> + .flush_gpu_tlb_pasid = gmc_v7_0_flush_gpu_tlb_pasid,
> .emit_flush_gpu_tlb = gmc_v7_0_emit_flush_gpu_tlb,
> .emit_pasid_mapping = gmc_v7_0_emit_pasid_mapping,
> .set_prt = gmc_v7_0_set_prt,
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> index 6d96d40fbcb8..27d83204fa2b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> @@ -620,6 +620,39 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
> return 0;
> }
>
> +/**
> + * gmc_v8_0_flush_gpu_tlb_pasid - tlb flush via pasid
> + *
> + * @adev: amdgpu_device pointer
> + * @pasid: pasid to be flush
> + *
> + * Flush the TLB for the requested pasid.
> + */
> +static int gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> + uint16_t pasid, uint32_t flush_type,
> + bool all_hub)
> +{
> + int vmid;
> + unsigned int tmp;
> +
> + if (adev->in_gpu_reset)
> + return -EIO;
> +
> + for (vmid = 1; vmid < 16; vmid++) {
> +
> + tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
> + if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
> + (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
> + WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
> + RREG32(mmVM_INVALIDATE_RESPONSE);
> + break;
> + }
> + }
> +
> + return 0;
> +
> +}
> +
> /*
> * GART
> * VMID 0 is the physical GPU addresses as used by the kernel.
> @@ -1700,6 +1733,7 @@ static const struct amd_ip_funcs gmc_v8_0_ip_funcs = {
>
> static const struct amdgpu_gmc_funcs gmc_v8_0_gmc_funcs = {
> .flush_gpu_tlb = gmc_v8_0_flush_gpu_tlb,
> + .flush_gpu_tlb_pasid = gmc_v8_0_flush_gpu_tlb_pasid,
> .emit_flush_gpu_tlb = gmc_v8_0_emit_flush_gpu_tlb,
> .emit_pasid_mapping = gmc_v8_0_emit_pasid_mapping,
> .set_prt = gmc_v8_0_set_prt,
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index fa025ceeea0f..eb1e64bd56ed 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -38,10 +38,12 @@
> #include "dce/dce_12_0_sh_mask.h"
> #include "vega10_enum.h"
> #include "mmhub/mmhub_1_0_offset.h"
> +#include "athub/athub_1_0_sh_mask.h"
> #include "athub/athub_1_0_offset.h"
> #include "oss/osssys_4_0_offset.h"
>
> #include "soc15.h"
> +#include "soc15d.h"
> #include "soc15_common.h"
> #include "umc/umc_6_0_sh_mask.h"
>
> @@ -434,6 +436,47 @@ static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev,
> adev->pdev->device == 0x15d8)));
> }
>
> +static bool gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
> + uint8_t vmid, uint16_t *p_pasid)
> +{
> + uint32_t value;
> +
> + value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
> + + vmid);
> + *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
> +
> + return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
> +}
> +
> +static int gmc_v9_0_invalidate_tlbs_with_kiq(struct amdgpu_device *adev,
> + uint16_t pasid, uint32_t flush_type,
> + bool all_hub)
> +{
> + signed long r;
> + uint32_t seq;
> + struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
> +
> + spin_lock(&adev->gfx.kiq.ring_lock);
> + amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
> + amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
> + amdgpu_ring_write(ring,
> + PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
> + PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
> + PACKET3_INVALIDATE_TLBS_PASID(pasid) |
> + PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
> + amdgpu_fence_emit_polling(ring, &seq);
> + amdgpu_ring_commit(ring);
> + spin_unlock(&adev->gfx.kiq.ring_lock);
> +
> + r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
> + if (r < 1) {
> + DRM_ERROR("wait for kiq fence error: %ld.\n", r);
> + return -ETIME;
> + }
> +
> + return 0;
> +}
> +
> /*
> * GART
> * VMID 0 is the physical GPU addresses as used by the kernel.
> @@ -532,6 +575,46 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
> DRM_ERROR("Timeout waiting for VM flush ACK!\n");
> }
>
> +/**
> + * gmc_v9_0_flush_gpu_tlb_pasid - tlb flush via pasid
> + *
> + * @adev: amdgpu_device pointer
> + * @pasid: pasid to be flush
> + *
> + * Flush the TLB for the requested pasid.
> + */
> +static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
> + uint16_t pasid, uint32_t flush_type,
> + bool all_hub)
> +{
> + int vmid, i;
> + uint16_t queried_pasid;
> + bool ret;
> + struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
> +
> + if (adev->in_gpu_reset)
> + return -EIO;
> +
> + if (ring->sched.ready)
> + return gmc_v9_0_invalidate_tlbs_with_kiq(adev,
> + pasid, flush_type, all_hub);
> +
> + for (vmid = 1; vmid < 16; vmid++) {
> +
> + ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid,
> + &queried_pasid);
> + if (ret && queried_pasid == pasid) {
> + for (i = 0; i < adev->num_vmhubs; i++)
> + amdgpu_gmc_flush_gpu_tlb(adev, vmid,
> + i, flush_type);
This unconditionally flushes all hubs. It should honor the all_hubs flag.
As above, you can call gmc_v9_0_flush_gpu_tlb directly here.
Regards,
Felix
> + break;
> + }
> + }
> +
> + return 0;
> +
> +}
> +
> static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
> unsigned vmid, uint64_t pd_addr)
> {
> @@ -693,6 +776,7 @@ static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev,
>
> static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
> .flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb,
> + .flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid,
> .emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb,
> .emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping,
> .map_mtype = gmc_v9_0_map_mtype,
More information about the amd-gfx
mailing list