[PATCH v3] drm/xe: Add process name to devcoredump

Rodrigo Vivi rodrigo.vivi at intel.com
Thu May 23 17:39:21 UTC 2024


On Thu, May 23, 2024 at 12:23:11PM +0200, Nirmoy Das wrote:
>    On 5/22/2024 10:12 PM, José Roberto de Souza wrote:                          
>                                                                                 
>    Process name help us track what application caused the gpug hang, this       
>    is crucial when running several applications at the same time.               
>                                                                                 
>    v2:                                                                          
>    - handle Xe KMD exec_queues without VM                                       
>                                                                                 
>    v3:                                                                          
>    - use get_pid_task() (suggested by Nirmoy)                                   
>                                                                                 
>    Cc: Rodrigo Vivi [1]<rodrigo.vivi at intel.com>                                 
>    Cc: Nirmoy Das [2]<nirmoy.das at intel.com>                                     
>    Signed-off-by: José Roberto de Souza [3]<jose.souza at intel.com>               
>                                                                                 
>    Reviewed-by : Nirmoy Das [4]<nirmoy.das at intel.com>                           

thank you both, pushed to drm-xe-next

>                                                                                 
>    ---                                                                          
>     drivers/gpu/drm/xe/xe_devcoredump.c       | 13 +++++++++++++                
>     drivers/gpu/drm/xe/xe_devcoredump_types.h |  2 ++                           
>     2 files changed, 15 insertions(+)                                           
>                                                                                 
>    diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
>    index e70aef7971930..1643d44f8bc42 100644                                    
>    --- a/drivers/gpu/drm/xe/xe_devcoredump.c                                    
>    +++ b/drivers/gpu/drm/xe/xe_devcoredump.c                                    
>    @@ -110,6 +110,7 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
>            drm_printf(&p, "Snapshot time: %lld.%09ld\n", ts.tv_sec, ts.tv_nsec);
>            ts = ktime_to_timespec64(ss->boot_time);                             
>            drm_printf(&p, "Uptime: %lld.%09ld\n", ts.tv_sec, ts.tv_nsec);       
>    +       drm_printf(&p, "Process: %s\n", ss->process_name);                   
>            xe_device_snapshot_print(xe, &p);                                    
>                                                                                 
>            drm_printf(&p, "\n**** GuC CT ****\n");                              
>    @@ -166,12 +167,24 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
>            enum xe_hw_engine_id id;                                             
>            u32 adj_logical_mask = q->logical_mask;                              
>            u32 width_mask = (0x1 << q->width) - 1;                              
>    +       const char *process_name = "no process";                             
>    +       struct task_struct *task = NULL;                                     
>    +                                                                            
>            int i;                                                               
>            bool cookie;                                                         
>                                                                                 
>            ss->snapshot_time = ktime_get_real();                                
>            ss->boot_time = ktime_get_boottime();                                
>                                                                                 
>    +       if (q->vm) {                                                         
>    +               task = get_pid_task(q->vm->xef->drm->pid, PIDTYPE_PID);      
>    +               if (task)                                                    
>    +                       process_name = task->comm;                           
>    +       }                                                                    
>    +       snprintf(ss->process_name, sizeof(ss->process_name), process_name);  
>    +       if (task)                                                            
>    +               put_task_struct(task);                                       
>    +                                                                            
>            ss->gt = q->gt;                                                      
>            INIT_WORK(&ss->work, xe_devcoredump_deferred_snap_work);             
>                                                                                 
>    diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h b/drivers/gpu/drm/xe/xe_devcoredump_types.h
>    index 6f654b63c7f1c..923cdf72a816a 100644                                    
>    --- a/drivers/gpu/drm/xe/xe_devcoredump_types.h                              
>    +++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h                              
>    @@ -26,6 +26,8 @@ struct xe_devcoredump_snapshot {                           
>            ktime_t snapshot_time;                                               
>            /** @boot_time:  Relative boot time so the uptime can be calculated. */
>            ktime_t boot_time;                                                   
>    +       /** @process_name: Name of process that triggered this gpu hang */   
>    +       char process_name[TASK_COMM_LEN];                                    
>                                                                                 
>            /** @gt: Affected GT, used by forcewake for delayed capture */       
>            struct xe_gt *gt;                                                    
> 
> References
> 
>    Visible links
>    1. mailto:rodrigo.vivi at intel.com
>    2. mailto:nirmoy.das at intel.com
>    3. mailto:jose.souza at intel.com
>    4. mailto:nirmoy.das at intel.com


More information about the Intel-xe mailing list