[PATCH v2] drm/amdgpu: add support for user trap handlers

Wed Sep 23 12:52:30 UTC 2020

On 8/28/20 10:23 AM, Christian König wrote:
> Am 28.08.20 um 10:14 schrieb Samuel Pitoiset:
>>
>> On 8/28/20 9:57 AM, Christian König wrote:
>>> Am 25.08.20 um 16:07 schrieb Samuel Pitoiset:
>>>> A trap handler can be used by userspace to catch shader exceptions
>>>> like divide by zero, memory violations etc.
>>>>
>>>> On GFX6-GFX8, the registers used to configure TBA/TMA aren't
>>>> privileged and can be configured from userpace.
>>>>
>>>> On GFX9+ they are per VMID and privileged, only the KMD can
>>>> configure them. At the moment, we don't know how to set them
>>>> via the CP, so they are only emitted if a VMID is reserved.
>>>>
>>>> This introduces a new CS chunk that can be used to set the
>>>> TBA/TMA virtual address at submit time.
>>>>
>>>> TODO:
>>>> - rebase on top of amd-staging-drm-next (this branch currently
>>>> hangs my GPU at boot)
>>>
>>> Please split that up into multiple patches. The first one adding the 
>>> general infrastructure and the following one the implementation for 
>>> gfx9 and gfx10.
>> Sounds good.
>>>
>>> And maybe even support this for gfx6-8 even if it is not necessary? 
>>> Looks trivial to implement and would give userspace a more uniform 
>>> handling for this.
>> v1 added gfx6-8 support but I removed it in v2 as requested by Alex 
>> because it's useless and might be better for preemption.
>>>
>>> A few more comments below.
>>>
>>>>
>>>> Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
>>>> ---
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   | 31 
>>>> ++++++++++++++++++++++++
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c  |  3 ++-
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h  |  4 +++
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_job.h  |  4 +++
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  4 +++
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c   | 20 +++++++++++++--
>>>>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c   | 20 +++++++++++++++
>>>>   drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c    | 19 +++++++++++++++
>>>>   include/uapi/drm/amdgpu_drm.h            |  8 ++++++
>>>>   9 files changed, 110 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>> index a512ccbc4dea..6ca5c4912e3a 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>>> @@ -104,6 +104,19 @@ static int amdgpu_cs_bo_handles_chunk(struct 
>>>> amdgpu_cs_parser *p,
>>>>       return r;
>>>>   }
>>>>   +static int amdgpu_cs_user_trap_chunk(struct amdgpu_cs_parser *p,
>>>> +                     struct drm_amdgpu_cs_chunk_trap *data,
>>>> +                     uint64_t *tba_addr, uint64_t *tma_addr)
>>>> +{
>>>> +    if (!data->tba_addr || !data->tma_addr)
>>>> +        return -EINVAL;
>>>> +
>>>> +    *tba_addr = data->tba_addr;
>>>> +    *tma_addr = data->tma_addr;
>>>> +
>>>> +    return 0;
>>>> +}
>>>> +
>>>>   static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, 
>>>> union drm_amdgpu_cs *cs)
>>>>   {
>>>>       struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
>>>> @@ -112,6 +125,7 @@ static int amdgpu_cs_parser_init(struct 
>>>> amdgpu_cs_parser *p, union drm_amdgpu_cs
>>>>       uint64_t *chunk_array;
>>>>       unsigned size, num_ibs = 0;
>>>>       uint32_t uf_offset = 0;
>>>> +    uint64_t tba_addr = 0, tma_addr = 0;
>>>>       int i;
>>>>       int ret;
>>>>   @@ -214,6 +228,19 @@ static int amdgpu_cs_parser_init(struct 
>>>> amdgpu_cs_parser *p, union drm_amdgpu_cs
>>>>                 break;
>>>>   +        case AMDGPU_CHUNK_ID_TRAP:
>>>> +            size = sizeof(struct drm_amdgpu_cs_chunk_trap);
>>>> +            if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
>>>> +                ret = -EINVAL;
>>>> +                goto free_partial_kdata;
>>>> +            }
>>>> +
>>>> +            ret = amdgpu_cs_user_trap_chunk(p, p->chunks[i].kdata,
>>>> +                            &tba_addr, &tma_addr);
>>>> +            if (ret)
>>>> +                goto free_partial_kdata;
>>>> +            break;
>>>> +
>>>>           case AMDGPU_CHUNK_ID_DEPENDENCIES:
>>>>           case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
>>>>           case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
>>>> @@ -239,6 +266,10 @@ static int amdgpu_cs_parser_init(struct 
>>>> amdgpu_cs_parser *p, union drm_amdgpu_cs
>>>>         if (p->uf_entry.tv.bo)
>>>>           p->job->uf_addr = uf_offset;
>>>> +
>>>> +    p->job->tba_addr = tba_addr;
>>>> +    p->job->tma_addr = tma_addr;
>>>> +
>>>>       kfree(chunk_array);
>>>>         /* Use this opportunity to fill in task info for the vm */
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>>> index 26127c7d2f32..1e703119e4c2 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>>> @@ -88,9 +88,10 @@
>>>>    * - 3.37.0 - L2 is invalidated before SDMA IBs, needed for 
>>>> correctness
>>>>    * - 3.38.0 - Add AMDGPU_IB_FLAG_EMIT_MEM_SYNC
>>>>    * - 3.39.0 - DMABUF implicit sync does a full pipeline sync
>>>> + * - 3.40.0 - Add AMDGPU_CHUNK_ID_TRAP
>>>>    */
>>>>   #define KMS_DRIVER_MAJOR    3
>>>> -#define KMS_DRIVER_MINOR    39
>>>> +#define KMS_DRIVER_MINOR    40
>>>>   #define KMS_DRIVER_PATCHLEVEL    0
>>>>     int amdgpu_vram_limit = 0;
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
>>>> index 8e58325bbca2..fd0d56724b4d 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
>>>> @@ -58,6 +58,10 @@ struct amdgpu_vmid {
>>>>       uint32_t        oa_base;
>>>>       uint32_t        oa_size;
>>>>   +    /* user trap */
>>>> +    uint64_t        tba_addr;
>>>> +    uint64_t        tma_addr;
>>>> +
>>>>       unsigned        pasid;
>>>>       struct dma_fence    *pasid_mapping;
>>>>   };
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
>>>> index 81caac9b958a..b8ed5b13ea44 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
>>>> @@ -62,6 +62,10 @@ struct amdgpu_job {
>>>>       /* user fence handling */
>>>>       uint64_t        uf_addr;
>>>>       uint64_t        uf_sequence;
>>>> +
>>>> +    /* user trap handling */
>>>> +    uint64_t        tba_addr;
>>>> +    uint64_t        tma_addr;
>>>>   };
>>>>     int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>>> index da871d84b742..1f165a6295d9 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>>>> @@ -197,6 +197,9 @@ struct amdgpu_ring_funcs {
>>>>       void (*soft_recovery)(struct amdgpu_ring *ring, unsigned vmid);
>>>>       int (*preempt_ib)(struct amdgpu_ring *ring);
>>>>       void (*emit_mem_sync)(struct amdgpu_ring *ring);
>>>> +    void (*emit_trap_handler)(struct amdgpu_ring *ring,
>>>> +                  uint32_t vmid,
>>>> +                  uint64_t tba_addr, uint64_t tma_addr);
>>>>   };
>>>>     struct amdgpu_ring {
>>>> @@ -265,6 +268,7 @@ struct amdgpu_ring {
>>>>   #define amdgpu_ring_emit_vm_flush(r, vmid, addr) 
>>>> (r)->funcs->emit_vm_flush((r), (vmid), (addr))
>>>>   #define amdgpu_ring_emit_fence(r, addr, seq, flags) 
>>>> (r)->funcs->emit_fence((r), (addr), (seq), (flags))
>>>>   #define amdgpu_ring_emit_gds_switch(r, v, db, ds, wb, ws, ab, as) 
>>>> (r)->funcs->emit_gds_switch((r), (v), (db), (ds), (wb), (ws), (ab), 
>>>> (as))
>>>> +#define amdgpu_ring_emit_trap_handler(r, v, tba, tma) 
>>>> (r)->funcs->emit_trap_handler((r), (v), (tba), (tma))
>>>>   #define amdgpu_ring_emit_hdp_flush(r) 
>>>> (r)->funcs->emit_hdp_flush((r))
>>>>   #define amdgpu_ring_emit_switch_buffer(r) 
>>>> (r)->funcs->emit_switch_buffer((r))
>>>>   #define amdgpu_ring_emit_cntxcntl(r, d) 
>>>> (r)->funcs->emit_cntxcntl((r), (d))
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> index 71e005cf2952..6b619bb03777 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> @@ -1079,15 +1079,22 @@ int amdgpu_vm_flush(struct amdgpu_ring 
>>>> *ring, struct amdgpu_job *job,
>>>>       bool vm_flush_needed = job->vm_needs_flush;
>>>>       struct dma_fence *fence = NULL;
>>>>       bool pasid_mapping_needed = false;
>>>> +    bool trap_handler_needed = false;
>>>>       unsigned patch_offset = 0;
>>>>       bool update_spm_vmid_needed = (job->vm && 
>>>> (job->vm->reserved_vmid[vmhub] != NULL));
>>>>       int r;
>>>>   -    if (update_spm_vmid_needed && 
>>>> adev->gfx.rlc.funcs->update_spm_vmid)
>>>> +    if (update_spm_vmid_needed && 
>>>> adev->gfx.rlc.funcs->update_spm_vmid) {
>>>>           adev->gfx.rlc.funcs->update_spm_vmid(adev, job->vmid);
>>>>   +        trap_handler_needed = ring->funcs->emit_trap_handler && (
>>>> +            id->tba_addr != job->tba_addr ||
>>>> +            id->tma_addr != job->tma_addr);
>>>
>>> That's probably not such a good idea since it makes the trap handler 
>>> depend on the VMID reservation.
>>>
>>>> +    }
>>>> +
>>>>       if (amdgpu_vmid_had_gpu_reset(adev, id)) {
>>>>           gds_switch_needed = true;
>>>> +        trap_handler_needed = true;
>>>>           vm_flush_needed = true;
>>>>           pasid_mapping_needed = true;
>>>>       }
>>>> @@ -1099,12 +1106,14 @@ int amdgpu_vm_flush(struct amdgpu_ring 
>>>> *ring, struct amdgpu_job *job,
>>>>       mutex_unlock(&id_mgr->lock);
>>>>         gds_switch_needed &= !!ring->funcs->emit_gds_switch;
>>>> +    trap_handler_needed &= !!ring->funcs->emit_trap_handler;
>>>>       vm_flush_needed &= !!ring->funcs->emit_vm_flush &&
>>>>               job->vm_pd_addr != AMDGPU_BO_INVALID_OFFSET;
>>>>       pasid_mapping_needed &= 
>>>> adev->gmc.gmc_funcs->emit_pasid_mapping &&
>>>>           ring->funcs->emit_wreg;
>>>>   -    if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync)
>>>> +    if (!vm_flush_needed && !gds_switch_needed &&
>>>> +        !trap_handler_needed && !need_pipe_sync)
>>>>           return 0;
>>>>         if (ring->funcs->init_cond_exec)
>>>> @@ -1158,6 +1167,13 @@ int amdgpu_vm_flush(struct amdgpu_ring 
>>>> *ring, struct amdgpu_job *job,
>>>>                           job->oa_size);
>>>>       }
>>>>   +    if (ring->funcs->emit_trap_handler && trap_handler_needed) {
>>>> +        id->tba_addr = job->tba_addr;
>>>> +        id->tma_addr = job->tma_addr;
>>>> +        amdgpu_ring_emit_trap_handler(ring, job->vmid, job->tba_addr,
>>>> +                          job->tma_addr);
>>>> +    }
>>>> +
>>>
>>> Well that doesn't seem to make sense at all here.
>>>
>>>>       if (ring->funcs->patch_cond_exec)
>>>>           amdgpu_ring_patch_cond_exec(ring, patch_offset);
>>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>>>> index 65997ffaed45..f864b217589f 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>>>> @@ -7141,6 +7141,24 @@ static void 
>>>> gfx_v10_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
>>>>                       (1 << (oa_size + oa_base)) - (1 << oa_base));
>>>>   }
>>>>   +static void gfx_v10_0_ring_emit_trap_handler(struct amdgpu_ring 
>>>> *ring,
>>>> +                        uint32_t vmid,
>>>> +                        uint64_t tba_addr,
>>>> +                        uint64_t tma_addr)
>>>> +{
>>>> +    struct amdgpu_device *adev = ring->adev;
>>>> +
>>>> +    mutex_lock(&adev->srbm_mutex);
>>>> +    nv_grbm_select(adev, 0, 0, 0, vmid);
>>>> +    WREG32_SOC15_RLC(GC, 0, mmSQ_SHADER_TBA_LO, 
>>>> lower_32_bits(tba_addr >> 8));
>>>> +    WREG32_SOC15_RLC(GC, 0, mmSQ_SHADER_TBA_HI, 
>>>> upper_32_bits(tba_addr >> 8) |
>>>> +                1 << SQ_SHADER_TBA_HI__TRAP_EN__SHIFT);
>>>> +    WREG32_SOC15_RLC(GC, 0, mmSQ_SHADER_TMA_LO, 
>>>> lower_32_bits(tma_addr >> 8));
>>>> +    WREG32_SOC15_RLC(GC, 0, mmSQ_SHADER_TMA_HI, 
>>>> upper_32_bits(tma_addr >> 8));
>>>> +    nv_grbm_select(adev, 0, 0, 0, 0);
>>>> +    mutex_unlock(&adev->srbm_mutex);
>>>
>>> This is not emitting the trap handler update to the ring, but 
>>> writing it directly to the registers.
>>
>> This uses direct driver access because we don't know how to emit them 
>> via CP. This is also why it's only emitted if a VMID is reserved.
>>
>> I think Alex is having a discussion with the CP team about that.
>
> Ah! Ok in this case please keep the patch on hold until this is cleared.
>
> But even when the VMID is reserved this could cause big problems if 
> userspace decides to change the trap addresses on the fly.
>
> So we can't really do it like this without waiting for the hardware to 
> be idle and causing massive performance loss.

So, according to Alex, nothing will happen for current GFX9-10 ASICs but 
we would like to be able to use that behind the vmid reservation logic.

Do you have any more comments on this patch?

>
> Regards,
> Christian.
>
>>
>>>
>>> Regards,
>>> Christian.
>>>
>>>> +}
>>>> +
>>>>   static int gfx_v10_0_early_init(void *handle)
>>>>   {
>>>>       struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>>>> @@ -8530,6 +8548,7 @@ static const struct amdgpu_ring_funcs 
>>>> gfx_v10_0_ring_funcs_gfx = {
>>>>       .emit_reg_write_reg_wait = 
>>>> gfx_v10_0_ring_emit_reg_write_reg_wait,
>>>>       .soft_recovery = gfx_v10_0_ring_soft_recovery,
>>>>       .emit_mem_sync = gfx_v10_0_emit_mem_sync,
>>>> +    .emit_trap_handler = gfx_v10_0_ring_emit_trap_handler,
>>>>   };
>>>>     static const struct amdgpu_ring_funcs 
>>>> gfx_v10_0_ring_funcs_compute = {
>>>> @@ -8566,6 +8585,7 @@ static const struct amdgpu_ring_funcs 
>>>> gfx_v10_0_ring_funcs_compute = {
>>>>       .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
>>>>       .emit_reg_write_reg_wait = 
>>>> gfx_v10_0_ring_emit_reg_write_reg_wait,
>>>>       .emit_mem_sync = gfx_v10_0_emit_mem_sync,
>>>> +    .emit_trap_handler = gfx_v10_0_ring_emit_trap_handler,
>>>>   };
>>>>     static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>>>> index cb9d60a4e05e..4fc00f196085 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>>>> @@ -4162,6 +4162,23 @@ static void 
>>>> gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
>>>>                      (1 << (oa_size + oa_base)) - (1 << oa_base));
>>>>   }
>>>>   +static void gfx_v9_0_ring_emit_trap_handler(struct amdgpu_ring 
>>>> *ring,
>>>> +                        uint32_t vmid,
>>>> +                        uint64_t tba_addr,
>>>> +                        uint64_t tma_addr)
>>>> +{
>>>> +    struct amdgpu_device *adev = ring->adev;
>>>> +
>>>> +    mutex_lock(&adev->srbm_mutex);
>>>> +    soc15_grbm_select(adev, 0, 0, 0, vmid);
>>>> +    WREG32_SOC15_RLC(GC, 0, mmSQ_SHADER_TBA_LO, 
>>>> lower_32_bits(tba_addr >> 8));
>>>> +    WREG32_SOC15_RLC(GC, 0, mmSQ_SHADER_TBA_HI, 
>>>> upper_32_bits(tba_addr >> 8));
>>>> +    WREG32_SOC15_RLC(GC, 0, mmSQ_SHADER_TMA_LO, 
>>>> lower_32_bits(tma_addr >> 8));
>>>> +    WREG32_SOC15_RLC(GC, 0, mmSQ_SHADER_TMA_HI, 
>>>> upper_32_bits(tma_addr >> 8));
>>>> +    soc15_grbm_select(adev, 0, 0, 0, 0);
>>>> +    mutex_unlock(&adev->srbm_mutex);
>>>> +}
>>>> +
>>>>   static const u32 vgpr_init_compute_shader[] =
>>>>   {
>>>>       0xb07c0000, 0xbe8000ff,
>>>> @@ -6720,6 +6737,7 @@ static const struct amdgpu_ring_funcs 
>>>> gfx_v9_0_ring_funcs_gfx = {
>>>>       .emit_reg_write_reg_wait = 
>>>> gfx_v9_0_ring_emit_reg_write_reg_wait,
>>>>       .soft_recovery = gfx_v9_0_ring_soft_recovery,
>>>>       .emit_mem_sync = gfx_v9_0_emit_mem_sync,
>>>> +    .emit_trap_handler = gfx_v9_0_ring_emit_trap_handler,
>>>>   };
>>>>     static const struct amdgpu_ring_funcs 
>>>> gfx_v9_0_ring_funcs_compute = {
>>>> @@ -6756,6 +6774,7 @@ static const struct amdgpu_ring_funcs 
>>>> gfx_v9_0_ring_funcs_compute = {
>>>>       .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
>>>>       .emit_reg_write_reg_wait = 
>>>> gfx_v9_0_ring_emit_reg_write_reg_wait,
>>>>       .emit_mem_sync = gfx_v9_0_emit_mem_sync,
>>>> +    .emit_trap_handler = gfx_v9_0_ring_emit_trap_handler,
>>>>   };
>>>>     static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
>>>> diff --git a/include/uapi/drm/amdgpu_drm.h 
>>>> b/include/uapi/drm/amdgpu_drm.h
>>>> index 3218576e109d..7eae264adb5d 100644
>>>> --- a/include/uapi/drm/amdgpu_drm.h
>>>> +++ b/include/uapi/drm/amdgpu_drm.h
>>>> @@ -551,6 +551,7 @@ struct drm_amdgpu_gem_va {
>>>>   #define AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES    0x07
>>>>   #define AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT    0x08
>>>>   #define AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL  0x09
>>>> +#define AMDGPU_CHUNK_ID_TRAP            0x0a
>>>>     struct drm_amdgpu_cs_chunk {
>>>>       __u32        chunk_id;
>>>> @@ -645,6 +646,13 @@ struct drm_amdgpu_cs_chunk_syncobj {
>>>>          __u64 point;
>>>>   };
>>>>   +struct drm_amdgpu_cs_chunk_trap {
>>>> +    /** Trap Base Address */
>>>> +       __u64 tba_addr;
>>>> +    /** Trap Memory Address */
>>>> +       __u64 tma_addr;
>>>> +};
>>>> +
>>>>   #define AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ    0
>>>>   #define AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ_FD    1
>>>>   #define AMDGPU_FENCE_TO_HANDLE_GET_SYNC_FILE_FD    2
>>>
>