[PATCH 2/2] drm/amdgpu/virt: schedule work to handle vm fault for VF

Mon Feb 6 08:36:01 UTC 2017

Hi Pixel,
I got your mean just now, since your VF must use KIQ to read/write 
registers, which use fence_wait to wait reading register completed.
The alternative way is implementing a new kiq reading/writing register 
way by using udelay instead of fence wait when reading/writing register 
in interrupt context.

Regards,
David Zhou

On 2017年02月06日 15:55, Ding, Pixel wrote:
> Thanks you for your comments, David. I totally agree on your point.
>
> However, The VM fault status registers record the latest VM fault info no matter when they’re changed, that’s what we care about since we don’t handle too much for VM fault even in bare metal system.
>
> On the other hand, what do you think if we insist to sleep in the interrupt context and let the driver runs into a software bug?
>
> The VM registers are shared among all VFs and we don’t have a copy for each in hardware, I think there's no other way to access them in this case. Do you have any suggestion?
>
> —
> Sincerely Yours,
> Pixel
>
>
>
>
>
>
>
>
> On 06/02/2017, 3:33 PM, "Zhou, David(ChunMing)" <David1.Zhou at amd.com> wrote:
>
>> INIT_WORK(&work->base, gmc_v8_0_vm_fault_sched);
>> However VF is used or not, schedule work shouldn't handle registers reading for interrupt, especially for status register, which could have been changed when you handle it in schedule work after interrupt.
>>
>> Regards,
>> David Zhou
>>
>> -----Original Message-----
>> From: amd-gfx [mailto:amd-gfx-bounces at lists.freedesktop.org] On Behalf Of Pixel Ding
>> Sent: Monday, February 06, 2017 3:00 PM
>> To: amd-gfx at lists.freedesktop.org
>> Cc: Ding, Pixel <Pixel.Ding at amd.com>
>> Subject: [PATCH 2/2] drm/amdgpu/virt: schedule work to handle vm fault for VF
>>
>> VF uses KIQ to access registers that invoking fence_wait to get the accessing completed. When VM fault occurs, the driver can't sleep in interrupt context.
>>
>> For some test cases, VM fault is 'legal' and shouldn't cause driver soft lockup.
>>
>> Signed-off-by: Pixel Ding <Pixel.Ding at amd.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 46 ++++++++++++++++++++++++++++++++---
>> 1 file changed, 43 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>> index 7669b32..75c913f 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
>> @@ -1231,9 +1231,9 @@ static int gmc_v8_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
>> 	return 0;
>> }
>>
>> -static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
>> -				      struct amdgpu_irq_src *source,
>> -				      struct amdgpu_iv_entry *entry)
>> +static int gmc_v8_0_process_vm_fault(struct amdgpu_device *adev,
>> +				     struct amdgpu_irq_src *source,
>> +				     struct amdgpu_iv_entry *entry)
>> {
>> 	u32 addr, status, mc_client;
>>
>> @@ -1262,6 +1262,46 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
>> 	return 0;
>> }
>>
>> +struct gmc_vm_fault_work {
>> +	struct work_struct      base;
>> +	struct amdgpu_device    *adev;
>> +	struct amdgpu_irq_src   *source;
>> +	struct amdgpu_iv_entry  *entry;
>> +};
>> +
>> +static void gmc_v8_0_vm_fault_sched(struct work_struct *work) {
>> +	struct gmc_vm_fault_work *vm_work =
>> +		container_of(work, struct gmc_vm_fault_work, base);
>> +	struct amdgpu_device *adev = vm_work->adev;
>> +	struct amdgpu_irq_src *source = vm_work->source;
>> +	struct amdgpu_iv_entry *entry = vm_work->entry;
>> +
>> +	gmc_v8_0_process_vm_fault(adev, source, entry);
>> +
>> +	kfree(vm_work);
>> +}
>> +
>> +static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
>> +				      struct amdgpu_irq_src *source,
>> +				      struct amdgpu_iv_entry *entry) {
>> +	struct gmc_vm_fault_work *work = NULL;
>> +
>> +	if (amdgpu_sriov_vf(adev)) {
>> +		work = kmalloc(sizeof(struct gmc_vm_fault_work), GFP_ATOMIC);
>> +		if (!work)
>> +			return -ENOMEM;
>> +		INIT_WORK(&work->base, gmc_v8_0_vm_fault_sched);
>> +		work->adev = adev;
>> +		work->source = source;
>> +		work->entry = entry;
>> +		return schedule_work(&work->base);
>> +	}
>> +
>> +	return gmc_v8_0_process_vm_fault(adev, source, entry); }
>> +
>> static void fiji_update_mc_medium_grain_clock_gating(struct amdgpu_device *adev,
>> 						     bool enable)
>> {
>> --
>> 2.7.4
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx