[PATCH] drm/amdgpu: fix a kcq hang issue for SRIOV
Christian König
ckoenig.leichtzumerken at gmail.com
Tue Mar 27 15:43:14 UTC 2018
Am 27.03.2018 um 17:37 schrieb Alex Deucher:
> On Tue, Mar 27, 2018 at 11:15 AM, Alex Deucher <alexdeucher at gmail.com> wrote:
>> On Tue, Mar 27, 2018 at 1:58 AM, Emily Deng <Emily.Deng at amd.com> wrote:
>>> issue:
>>> the vmflush in KCQ could be preempted (not like GFX ring
>>> which doesn't allow preemption in ring buffer) and this lead
>>> to vm flush fail when there is a world switch during
>>> the vm flush procedure (between write invalidate request
>>> and query invalidate ack)
>>>
>>> fix:
>>> separate vm flush for gfx and compute ring, and use
>>> the new format command in compute's vm flush which
>>> use only one package so no preemption could allowed
>>>
>>> Signed-off-by: Monk Liu <Monk.Liu at amd.com>
>>> Signed-off-by: Emily Deng <Emily.Deng at amd.com>
>>> ---
>>> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 +
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 2 ++
>>> drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 10 +++++++++-
>>> drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 18 +++++++++++++-----
>>> 4 files changed, 25 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> index a7e2229..986659f 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> @@ -1790,6 +1790,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
>>> #define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d))
>>> #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
>>> #define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))
>>> +#define amdgpu_ring_emit_reg_wait1(r, d0, d1, v, m) (r)->funcs->emit_reg_wait1((r), (d0), (d1), (v), (m))
>>> #define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b))
>>> #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
>>> #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>> index 1d0d250..d85df5d 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>> @@ -152,6 +152,8 @@ struct amdgpu_ring_funcs {
>>> void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
>>> void (*emit_reg_wait)(struct amdgpu_ring *ring, uint32_t reg,
>>> uint32_t val, uint32_t mask);
>>> + void (*emit_reg_wait1)(struct amdgpu_ring *ring, uint32_t reg0,
>>> + uint32_t reg1, uint32_t val, uint32_t mask);
>> This is pretty ugly. We don't need a new callback for this
>> workaround. It will just confuse things for other IPs. Just call
>> gfx_v9_0_wait_reg_mem() directly if you need it in
>> gmc_v9_0_emit_flush_gpu_tlb().
> Nevermind, I misread the patch and thought the change was in the gfx9
> module. In this case, I'd rather split this into three patches.
>
> 1. add the new ring callback with a better name. E.g., emit_reg_wait_oneshot()
Maybe "emit_reg_write_wait()", but I still think that is an absolutely
worse idea.
> 2. add the new callback implementation to gfx9 and gfx8 (I think gfx8
> will need this as well since we support sr-iov there too)
gfx8 doesn't have the hardware bug which seems to make this necessary,
not does it have the same VMHUB design as gfx9.
> 3. in the gmc8 and 9 code tlb_flush callbacks, check if the ring
> supports the new callback and if so, use that, otherwise, fall back to
> the existing code.
Actually I would rather say we provide a stub in amdgpu_ring.c which
just uses the separate write and wait callbacks.
Christian.
>
>
>> Alex
>>
>>> void (*emit_tmz)(struct amdgpu_ring *ring, bool start);
>>> /* priority functions */
>>> void (*set_priority) (struct amdgpu_ring *ring,
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>>> index 1ae3de1..509c9d2 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>>> @@ -4078,6 +4078,13 @@ static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
>>> gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
>>> }
>>>
>>> +static void gfx_v9_0_ring_emit_reg_wait_compute(struct amdgpu_ring *ring,
>>> + uint32_t reg0, uint32_t reg1,
>>> + uint32_t val, uint32_t mask)
>>> +{
>>> + gfx_v9_0_wait_reg_mem(ring, 0, 0, 1, reg0, reg1, val, mask, 0x20);
>>> +}
>>> +
>>> static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
>>> enum amdgpu_interrupt_state state)
>>> {
>>> @@ -4415,7 +4422,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
>>> 7 + /* gfx_v9_0_ring_emit_hdp_flush */
>>> 5 + /* hdp invalidate */
>>> 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
>>> - SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
>>> + (SOC15_FLUSH_GPU_TLB_NUM_WREG - 1) * 5 +
>>> SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
>>> 2 + /* gfx_v9_0_ring_emit_vm_flush */
>>> 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
>>> @@ -4433,6 +4440,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
>>> .set_priority = gfx_v9_0_ring_set_priority_compute,
>>> .emit_wreg = gfx_v9_0_ring_emit_wreg,
>>> .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
>>> + .emit_reg_wait1 = gfx_v9_0_ring_emit_reg_wait_compute,
>>> };
>>>
>>> static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>> index e687363..968447d 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
>>> @@ -385,11 +385,19 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
>>> amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
>>> upper_32_bits(pd_addr));
>>>
>>> - amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
>>> -
>>> - /* wait for the invalidate to complete */
>>> - amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
>>> - 1 << vmid, 1 << vmid);
>>> + /* The world switch cannot be allowed to occur while
>>> + some invalidation controller code is waiting for an ack.
>>> + To workaround the hardware restriction, replace the original
>>> + two command with one command for compute ring */
>>> + if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE && amdgpu_sriov_vf(adev)) {
> This will break gfx 8 with sr-iov.
>
> Alex
>
>>> + amdgpu_ring_emit_reg_wait1(ring, hub->vm_inv_eng0_req + eng,
>>> + hub->vm_inv_eng0_ack + eng, req, 1 << vmid);
>>> + } else {
>>> + amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
>>> + /* wait for the invalidate to complete */
>>> + amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
>>> + 1 << vmid, 1 << vmid);
>>> + }
>>>
>>> return pd_addr;
>>> }
>>> --
>>> 2.7.4
>>>
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx at lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
More information about the amd-gfx
mailing list