[PATCH v3 1/5] drm/amdgpu/vcn: refactor dec message functions
Christian König
christian.koenig at amd.com
Thu Nov 19 19:51:20 UTC 2020
Am 19.11.20 um 16:37 schrieb James Zhu:
>
> On 2020-11-19 9:58 a.m., Christian König wrote:
>> Am 19.11.20 um 15:52 schrieb James Zhu:
>>>
>>> On 2020-11-19 2:59 a.m., Christian König wrote:
>>>> Am 18.11.20 um 17:23 schrieb James Zhu:
>>>>> refactor dec message functions to add dec software ring support.
>>>>>
>>>>> Signed-off-by: James Zhu <James.Zhu at amd.com>
>>>>> ---
>>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 30
>>>>> +++++++++++++++++++-----------
>>>>> 1 file changed, 19 insertions(+), 11 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
>>>>> index 7e19a66..32251db 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
>>>>> @@ -510,16 +510,16 @@ static int amdgpu_vcn_dec_send_msg(struct
>>>>> amdgpu_ring *ring,
>>>>> }
>>>>> static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring
>>>>> *ring, uint32_t handle,
>>>>> - struct dma_fence **fence)
>>>>> + struct amdgpu_bo **bo)
>>>>> {
>>>>> struct amdgpu_device *adev = ring->adev;
>>>>> - struct amdgpu_bo *bo = NULL;
>>>>> uint32_t *msg;
>>>>> int r, i;
>>>>> + *bo = NULL;
>>>>
>>>> This looks unnecessary to me.
>>>
>>> Hi Christian,
>>>
>>> I saw the code has such initialization before refactor. So I kept
>>> them.
>>>
>>> But If I remove this initialization, I will have kernel panic. Did I
>>> miss any other step.
>>
>> Ah, yes that's because the allocator thinks there is already a BO.
>>
>> I thought that this is for error handling. You need to initialize BO
>> to zero in the caller and not here.
>
> [JZ] Since this BO reference point is shared between create/destroy
> messages, so it needs initialization
>
> before bo create separately. So is it better to keep the
> initialization inside each functions?
Ok, good point. Feel free to add my rb as it is.
Thanks,
Christian.
>
> Best Regars!
>
> James
>
>>
>> Regards,
>> Christian
>>
>>>
>>> Thanks!
>>>
>>> James
>>>
>>> Nov 19 09:39:04 jz-tester kernel: [ 123.781336] BUG: kernel NULL
>>> pointer dereference, address: 000000000000028a
>>> Nov 19 09:39:04 jz-tester kernel: [ 123.781412] #PF: supervisor
>>> read access in kernel mode
>>> Nov 19 09:39:04 jz-tester kernel: [ 123.781463] #PF:
>>> error_code(0x0000) - not-present page
>>> Nov 19 09:39:04 jz-tester kernel: [ 123.781514] PGD 0 P4D 0
>>> Nov 19 09:39:04 jz-tester kernel: [ 123.781547] Oops: 0000 [#1] SMP
>>> PTI
>>> Nov 19 09:39:04 jz-tester kernel: [ 123.781586] CPU: 1 PID: 19
>>> Comm: kworker/1:0 Tainted: G OE 5.4.0-39-generic #43-Ubuntu
>>> Nov 19 09:39:04 jz-tester kernel: [ 123.781670] Hardware name: MSI
>>> MS-7971/Z170A PC MATE (MS-7971), BIOS A.D0 12/22/2016
>>> Nov 19 09:39:04 jz-tester kernel: [ 123.781922] Workqueue: events
>>> amdgpu_device_delayed_init_work_handler [amdgpu]
>>> Nov 19 09:39:04 jz-tester kernel: [ 123.782156] RIP:
>>> 0010:amdgpu_bo_create_reserved+0xc1/0x1c0 [amdgpu]
>>> Nov 19 09:39:04 jz-tester kernel: [ 123.782219] Code: 00 00 00 00
>>> 89 55 a8 89 4d ac 48 89 45 b8 c7 45 c0 01 00 00 00 48 c7 45 c8 00 00
>>> 00 00 c6 45 8f 00 4d 85 c9 0f 84 98 00 00 00 <49> 8b 81 90 01 00 00
>>> 49 8b b9 40 01 00 00 31 f6 4c 89 4d 90 48 89
>>> Nov 19 09:39:04 jz-tester kernel: [ 123.782382] RSP:
>>> 0018:ffffb0cc40123d18 EFLAGS: 00010206
>>> Nov 19 09:39:04 jz-tester kernel: [ 123.782435] RAX:
>>> 0000000000000021 RBX: ffffb0cc40123de0 RCX: 0000000000000004
>>> Nov 19 09:39:04 jz-tester kernel: [ 123.782502] RDX:
>>> 0000000000001000 RSI: 0000000000000400 RDI: ffff9de4d4a80000
>>> Nov 19 09:39:04 jz-tester kernel: [ 123.782569] RBP:
>>> ffffb0cc40123d98 R08: ffffb0cc40123de0 R09: 00000000000000fa
>>> Nov 19 09:39:04 jz-tester kernel: [ 123.782636] R10:
>>> 0000000000000015 R11: ffff9de50ea699e0 R12: 0000000000000000
>>> Nov 19 09:39:04 jz-tester kernel: [ 123.782702] R13:
>>> 0000000000000004 R14: ffffb0cc40123db0 R15: 0000000000000000
>>> Nov 19 09:39:04 jz-tester kernel: [ 123.782771] FS:
>>> 0000000000000000(0000) GS:ffff9de50ea40000(0000) knlGS:0000000000000000
>>> Nov 19 09:39:04 jz-tester kernel: [ 123.782846] CS: 0010 DS: 0000
>>> ES: 0000 CR0: 0000000080050033
>>> Nov 19 09:39:04 jz-tester kernel: [ 123.782901] CR2:
>>> 000000000000028a CR3: 00000007aa00a003 CR4: 00000000003606e0
>>> Nov 19 09:39:04 jz-tester kernel: [ 123.782968] DR0:
>>> 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
>>> Nov 19 09:39:04 jz-tester kernel: [ 123.783035] DR3:
>>> 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
>>> Nov 19 09:39:04 jz-tester kernel: [ 123.783101] Call Trace:
>>> Nov 19 09:39:04 jz-tester kernel: [ 123.783138] ? call_rcu+0x10/0x20
>>> Nov 19 09:39:04 jz-tester kernel: [ 123.783391]
>>> amdgpu_vcn_dec_get_create_msg.isra.0.constprop.0+0x3b/0xd0 [amdgpu]
>>> Nov 19 09:39:04 jz-tester kernel: [ 123.783676]
>>> amdgpu_vcn_dec_ring_test_ib+0x3a/0xf0 [amdgpu]
>>> Nov 19 09:39:04 jz-tester kernel: [ 123.783898]
>>> amdgpu_ib_ring_tests+0xf5/0x160 [amdgpu]
>>> Nov 19 09:39:04 jz-tester kernel: [ 123.784094]
>>> amdgpu_device_delayed_init_work_handler+0x15/0x30 [amdgpu]
>>> Nov 19 09:39:04 jz-tester kernel: [ 123.784163]
>>> process_one_work+0x1eb/0x3b0
>>> Nov 19 09:39:04 jz-tester kernel: [ 123.784206]
>>> worker_thread+0x4d/0x400
>>> Nov 19 09:39:04 jz-tester kernel: [ 123.784248] kthread+0x104/0x140
>>> Nov 19 09:39:04 jz-tester kernel: [ 123.784285] ?
>>> process_one_work+0x3b0/0x3b0
>>> Nov 19 09:39:04 jz-tester kernel: [ 123.784329] ?
>>> kthread_park+0x90/0x90
>>> Nov 19 09:39:04 jz-tester kernel: [ 123.784371]
>>> ret_from_fork+0x35/0x40
>>> Nov 19 09:39:04 jz-tester kernel: [ 123.784411] Modules linked in:
>>> amdgpu(OE) amd_iommu_v2 amd_sched(OE) amdttm(OE) amdkcl(OE)
>>> drm_kms_helper i2c_algo_bit fb_sys_fops syscopyarea sysfillrect
>>> sysimgblt binfmt_misc nls_iso8859_1 intel_rapl_msr intel_rapl_common
>>> x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel
>>> snd_hda_codec_realtek snd_hda_codec_generic ledtrig_audio
>>> snd_hda_codec_hdmi kvm snd_hda_intel snd_intel_dspcfg snd_hda_codec
>>> snd_hda_core snd_hwdep snd_pcm snd_seq_midi snd_seq_midi_event
>>> snd_rawmidi crct10dif_pclmul ghash_clmulni_intel snd_seq aesni_intel
>>> crypto_simd cryptd glue_helper snd_seq_device intel_cstate snd_timer
>>> intel_rapl_perf input_leds joydev snd serio_raw mxm_wmi soundcore
>>> mei_me mei intel_pch_thermal mac_hid acpi_pad sch_fq_codel
>>> parport_pc ppdev lp parport drm ip_tables x_tables autofs4
>>> hid_generic usbhid hid crc32_pclmul psmouse r8169 ahci i2c_i801
>>> realtek libahci wmi video
>>> Nov 19 09:39:04 jz-tester kernel: [ 123.785115] CR2: 000000000000028a
>>> Nov 19 09:39:04 jz-tester kernel: [ 123.785152] ---[ end trace
>>> 58c4ccffcda9e3c8 ]---
>>> Nov 19 09:39:04 jz-tester kernel: [ 123.785354] RIP:
>>> 0010:amdgpu_bo_create_reserved+0xc1/0x1c0 [amdgpu]
>>> Nov 19 09:39:04 jz-tester kernel: [ 123.785416] Code: 00 00 00 00
>>> 89 55 a8 89 4d ac 48 89 45 b8 c7 45 c0 01 00 00 00 48 c7 45 c8 00 00
>>> 00 00 c6 45 8f 00 4d 85 c9 0f 84 98 00 00 00 <49> 8b 81 90 01 00 00
>>> 49 8b b9 40 01 00 00 31 f6 4c 89 4d 90 48 89
>>> Nov 19 09:39:04 jz-tester kernel: [ 123.785579] RSP:
>>> 0018:ffffb0cc40123d18 EFLAGS: 00010206
>>> Nov 19 09:39:04 jz-tester kernel: [ 123.785631] RAX:
>>> 0000000000000021 RBX: ffffb0cc40123de0 RCX: 0000000000000004
>>> Nov 19 09:39:04 jz-tester kernel: [ 123.785698] RDX:
>>> 0000000000001000 RSI: 0000000000000400 RDI: ffff9de4d4a80000
>>> Nov 19 09:39:04 jz-tester kernel: [ 123.785764] RBP:
>>> ffffb0cc40123d98 R08: ffffb0cc40123de0 R09: 00000000000000fa
>>> Nov 19 09:39:04 jz-tester kernel: [ 123.785831] R10:
>>> 0000000000000015 R11: ffff9de50ea699e0 R12: 0000000000000000
>>> Nov 19 09:39:04 jz-tester kernel: [ 123.785898] R13:
>>> 0000000000000004 R14: ffffb0cc40123db0 R15: 0000000000000000
>>> Nov 19 09:39:04 jz-tester kernel: [ 123.785965] FS:
>>> 0000000000000000(0000) GS:ffff9de50ea40000(0000) knlGS:0000000000000000
>>> Nov 19 09:39:04 jz-tester kernel: [ 123.786041] CS: 0010 DS: 0000
>>> ES: 0000 CR0: 0000000080050033
>>> Nov 19 09:39:04 jz-tester kernel: [ 123.786096] CR2:
>>> 000000000000028a CR3: 00000007aa00a003 CR4: 00000000003606e0
>>> Nov 19 09:39:04 jz-tester kernel: [ 123.786163] DR0:
>>> 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
>>>
>>>>
>>>>> r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
>>>>> AMDGPU_GEM_DOMAIN_VRAM,
>>>>> - &bo, NULL, (void **)&msg);
>>>>> + bo, NULL, (void **)&msg);
>>>>> if (r)
>>>>> return r;
>>>>> @@ -540,20 +540,20 @@ static int
>>>>> amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
>>>>> for (i = 14; i < 1024; ++i)
>>>>> msg[i] = cpu_to_le32(0x0);
>>>>> - return amdgpu_vcn_dec_send_msg(ring, bo, fence);
>>>>> + return 0;
>>>>> }
>>>>> static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring
>>>>> *ring, uint32_t handle,
>>>>> - struct dma_fence **fence)
>>>>> + struct amdgpu_bo **bo)
>>>>> {
>>>>> struct amdgpu_device *adev = ring->adev;
>>>>> - struct amdgpu_bo *bo = NULL;
>>>>> uint32_t *msg;
>>>>> int r, i;
>>>>> + *bo = NULL;
>>>>
>>>> Same here.
>>>>
>>>> Apart from that looks good to me.
>>>>
>>>> With that fixed the patch is Reviewed-by: Christian König
>>>> <christian.koenig at amd.com>
>>>>
>>>> Regards,
>>>> Christian.
>>>>
>>>>> r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
>>>>> AMDGPU_GEM_DOMAIN_VRAM,
>>>>> - &bo, NULL, (void **)&msg);
>>>>> + bo, NULL, (void **)&msg);
>>>>> if (r)
>>>>> return r;
>>>>> @@ -566,19 +566,27 @@ static int
>>>>> amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
>>>>> for (i = 6; i < 1024; ++i)
>>>>> msg[i] = cpu_to_le32(0x0);
>>>>> - return amdgpu_vcn_dec_send_msg(ring, bo, fence);
>>>>> + return 0;
>>>>> }
>>>>> int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long
>>>>> timeout)
>>>>> {
>>>>> - struct dma_fence *fence;
>>>>> + struct dma_fence *fence = NULL;
>>>>> + struct amdgpu_bo *bo;
>>>>> long r;
>>>>> - r = amdgpu_vcn_dec_get_create_msg(ring, 1, NULL);
>>>>> + r = amdgpu_vcn_dec_get_create_msg(ring, 1, &bo);
>>>>> + if (r)
>>>>> + goto error;
>>>>> +
>>>>> + r = amdgpu_vcn_dec_send_msg(ring, bo, NULL);
>>>>> + if (r)
>>>>> + goto error;
>>>>> + r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &bo);
>>>>> if (r)
>>>>> goto error;
>>>>> - r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &fence);
>>>>> + r = amdgpu_vcn_dec_send_msg(ring, bo, &fence);
>>>>> if (r)
>>>>> goto error;
>>>>
>>
More information about the amd-gfx
mailing list