[PATCH 3/7] drm/amdgpu: implement tlbs invalidate on gfx9 gfx10
Felix Kuehling
felix.kuehling at amd.com
Mon Jan 13 16:36:54 UTC 2020
On 2020-01-11 1:39 p.m., Alex Sierra wrote:
> tlbs invalidate pointer function added to kiq_pm4_funcs struct.
> This way, tlb flush can be done through kiq member.
> TLBs invalidatation implemented for gfx9 and gfx10.
>
> Change-Id: I1b77b364f3ae0038ff3e70e869be5f2ef6e6d293
> Signed-off-by: Alex Sierra <alex.sierra at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 4 +++
> drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 33 +++++++++++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 32 ++++++++++++++++++++++++
> 3 files changed, 69 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index 8e88e0411662..2927837bd401 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -76,11 +76,15 @@ struct kiq_pm4_funcs {
> struct amdgpu_ring *ring,
> u64 addr,
> u64 seq);
> + int (*kiq_invalidate_tlbs)(struct amdgpu_ring *kiq_ring,
> + uint16_t pasid, uint32_t flush_type,
> + bool all_hub);
> /* Packet sizes */
> int set_resources_size;
> int map_queues_size;
> int unmap_queues_size;
> int query_status_size;
> + int invalidate_tlbs_size;
> };
>
> struct amdgpu_kiq {
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 379e46c1b7f6..2e82213f57eb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -40,6 +40,7 @@
> #include "ivsrcid/gfx/irqsrcs_gfx_10_1.h"
>
> #include "soc15.h"
> +#include "soc15d.h"
> #include "soc15_common.h"
> #include "clearstate_gfx10.h"
> #include "v10_structs.h"
> @@ -346,15 +347,47 @@ static void gfx10_kiq_query_status(struct amdgpu_ring *kiq_ring,
> amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
> }
>
> +static int gfx10_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
> + uint16_t pasid, uint32_t flush_type,
> + bool all_hub)
> +{
> + signed long r;
> + uint32_t seq;
> + struct amdgpu_device *adev = kiq_ring->adev;
> + struct amdgpu_kiq *kiq = &adev->gfx.kiq;
> +
> + spin_lock(&adev->gfx.kiq.ring_lock);
> + amdgpu_ring_alloc(kiq_ring, kiq->pmf->invalidate_tlbs_size);
> + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
> + amdgpu_ring_write(kiq_ring,
> + PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
> + PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
> + PACKET3_INVALIDATE_TLBS_PASID(pasid) |
> + PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
> + amdgpu_fence_emit_polling(kiq_ring, &seq);
> + amdgpu_ring_commit(kiq_ring);
> + spin_unlock(&adev->gfx.kiq.ring_lock);
The other KIQ functions don't include the emit_polling, commit and
locking. I think the way the KIQ-funcs interface is meant to be used,
all that should be outside the IP-version-specific functions. For
consistency all you should do here is the amdgpu_ring_write calls with
IP-version-specific packets.
Regards,
Felix
> +
> + r = amdgpu_fence_wait_polling(kiq_ring, seq, adev->usec_timeout);
> + if (r < 1) {
> + DRM_ERROR("wait for kiq fence error: %ld.\n", r);
> + return -ETIME;
> + }
> +
> + return 0;
> +}
> +
> static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = {
> .kiq_set_resources = gfx10_kiq_set_resources,
> .kiq_map_queues = gfx10_kiq_map_queues,
> .kiq_unmap_queues = gfx10_kiq_unmap_queues,
> .kiq_query_status = gfx10_kiq_query_status,
> + .kiq_invalidate_tlbs = gfx10_kiq_invalidate_tlbs,
> .set_resources_size = 8,
> .map_queues_size = 7,
> .unmap_queues_size = 6,
> .query_status_size = 7,
> + .invalidate_tlbs_size = 12,
> };
>
> static void gfx_v10_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index ad0179ea2cc5..5be6fab55b73 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -837,15 +837,47 @@ static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
> amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
> }
>
> +static int gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
> + uint16_t pasid, uint32_t flush_type,
> + bool all_hub)
> +{
> + signed long r;
> + uint32_t seq;
> + struct amdgpu_device *adev = kiq_ring->adev;
> + struct amdgpu_kiq *kiq = &adev->gfx.kiq;
> +
> + spin_lock(&adev->gfx.kiq.ring_lock);
> + amdgpu_ring_alloc(kiq_ring, kiq->pmf->invalidate_tlbs_size);
> + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
> + amdgpu_ring_write(kiq_ring,
> + PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
> + PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
> + PACKET3_INVALIDATE_TLBS_PASID(pasid) |
> + PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
> + amdgpu_fence_emit_polling(kiq_ring, &seq);
> + amdgpu_ring_commit(kiq_ring);
> + spin_unlock(&adev->gfx.kiq.ring_lock);
> +
> + r = amdgpu_fence_wait_polling(kiq_ring, seq, adev->usec_timeout);
> + if (r < 1) {
> + DRM_ERROR("wait for kiq fence error: %ld.\n", r);
> + return -ETIME;
> + }
> +
> + return 0;
> +}
> +
> static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
> .kiq_set_resources = gfx_v9_0_kiq_set_resources,
> .kiq_map_queues = gfx_v9_0_kiq_map_queues,
> .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
> .kiq_query_status = gfx_v9_0_kiq_query_status,
> + .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
> .set_resources_size = 8,
> .map_queues_size = 7,
> .unmap_queues_size = 6,
> .query_status_size = 7,
> + .invalidate_tlbs_size = 12,
> };
>
> static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
More information about the amd-gfx
mailing list