[PATCH 2/7] drm/msm: add hang_debug module param

Rob Clark robdclark at gmail.com
Mon Mar 10 13:27:21 PDT 2014


On Mon, Mar 10, 2014 at 4:17 PM, Jordan Crouse <jcrouse at codeaurora.org> wrote:
> On 03/10/2014 10:47 AM, Rob Clark wrote:
>>
>> msm.hang_debug=y will dump out current register values if the gpu locks
>> up, for easier debugging.
>>
>> Signed-off-by: Rob Clark <robdclark at gmail.com>
>> ---
>>   drivers/gpu/drm/msm/adreno/a3xx_gpu.c   | 34
>> ++++++++++++++++++++++++++++++++-
>>   drivers/gpu/drm/msm/adreno/adreno_gpu.c | 18 +++++++++++++++++
>>   drivers/gpu/drm/msm/adreno/adreno_gpu.h |  1 +
>>   3 files changed, 52 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
>> b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
>> index 461df93..8b6fb84 100644
>> --- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
>> +++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
>> @@ -35,6 +35,12 @@
>>          A3XX_INT0_CP_AHB_ERROR_HALT |     \
>>          A3XX_INT0_UCHE_OOB_ACCESS)
>>
>> +
>> +static bool hang_debug = false;
>> +MODULE_PARM_DESC(hang_debug, "Dump registers when hang is detected (can
>> be slow!)");
>> +module_param_named(hang_debug, hang_debug, bool, 0600);
>> +static void a3xx_dump(struct msm_gpu *gpu);
>> +
>>   static struct platform_device *a3xx_pdev;
>>
>>   static void a3xx_me_init(struct msm_gpu *gpu)
>> @@ -291,6 +297,9 @@ static int a3xx_hw_init(struct msm_gpu *gpu)
>>
>>   static void a3xx_recover(struct msm_gpu *gpu)
>>   {
>> +       /* dump registers before resetting gpu, if enabled: */
>> +       if (hang_debug)
>> +               a3xx_dump(gpu);
>
>
> You will need to be careful here - not all the registers are suitable for
> reading back.
> On some flavors of A330 in particular some HLSQ registers reads will bring
> the system
> down. You can see the gory details in the KGSL driver.  If this works for
> you for now
> great but fair warning that some cores might have different experiences.

>From what I've seen, certain types of GPU lockup (for example shader
const overrun), even on a320, leave things such that reading HLSQ
registers does bad things, which is why I don't enable the bootarg by
default.

If there is a way to *detect* when HLSQ is in a bad state and skip
those registers, that would be awesome.  I guess I should have another
look at latest kgsl to see if there is some trick I missed.

Or otherwise I should probably make the bootarg a bitmask so you can
choose to enable/disable certain groups of registers.

>
>>         gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 1);
>>         gpu_read(gpu, REG_A3XX_RBBM_SW_RESET_CMD);
>>         gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 0);
>> @@ -352,7 +361,6 @@ static irqreturn_t a3xx_irq(struct msm_gpu *gpu)
>>         return IRQ_HANDLED;
>>   }
>>
>> -#ifdef CONFIG_DEBUG_FS
>>   static const unsigned int a3xx_registers[] = {
>>         0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027,
>>         0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c,
>> @@ -392,6 +400,7 @@ static const unsigned int a3xx_registers[] = {
>>         0x303c, 0x303c, 0x305e, 0x305f,
>>   };
>>
>> +#ifdef CONFIG_DEBUG_FS
>>   static void a3xx_show(struct msm_gpu *gpu, struct seq_file *m)
>>   {
>>         int i;
>> @@ -415,6 +424,29 @@ static void a3xx_show(struct msm_gpu *gpu, struct
>> seq_file *m)
>>   }
>>   #endif
>>
>> +/* would be nice to not have to duplicate the _show() stuff with
>> printk(): */
>> +static void a3xx_dump(struct msm_gpu *gpu)
>> +{
>> +       int i;
>> +
>> +       adreno_dump(gpu);
>> +       printk("status:   %08x\n",
>> +                       gpu_read(gpu, REG_A3XX_RBBM_STATUS));
>> +
>> +       /* dump these out in a form that can be parsed by demsm: */
>> +       printk("IO:region %s 00000000 00020000\n", gpu->name);
>> +       for (i = 0; i < ARRAY_SIZE(a3xx_registers); i += 2) {
>> +               uint32_t start = a3xx_registers[i];
>> +               uint32_t end   = a3xx_registers[i+1];
>> +               uint32_t addr;
>> +
>> +               for (addr = start; addr <= end; addr++) {
>> +                       uint32_t val = gpu_read(gpu, addr);
>> +                       printk("IO:R %08x %08x\n", addr<<2, val);
>> +               }
>> +       }
>> +}
>> +
>>   static const struct adreno_gpu_funcs funcs = {
>>         .base = {
>>                 .get_param = adreno_get_param,
>> diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
>> b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
>> index d321099..cf6eb97 100644
>> --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
>> +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
>> @@ -260,6 +260,24 @@ void adreno_show(struct msm_gpu *gpu, struct seq_file
>> *m)
>>   }
>>   #endif
>>
>> +/* would be nice to not have to duplicate the _show() stuff with
>> printk(): */
>> +void adreno_dump(struct msm_gpu *gpu)
>> +{
>> +       struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
>> +
>> +       printk("revision: %d (%d.%d.%d.%d)\n",
>> +                       adreno_gpu->info->revn, adreno_gpu->rev.core,
>> +                       adreno_gpu->rev.major, adreno_gpu->rev.minor,
>> +                       adreno_gpu->rev.patchid);
>> +
>> +       printk("fence:    %d/%d\n", adreno_gpu->memptrs->fence,
>> +                       gpu->submitted_fence);
>> +       printk("rptr:     %d\n", adreno_gpu->memptrs->rptr);
>> +       printk("wptr:     %d\n", adreno_gpu->memptrs->wptr);
>> +       printk("rb wptr:  %d\n", get_wptr(gpu->rb));
>> +
>> +}
>> +
>
>
> We should introduce you to the gooey wonderfulness of the snapshot :)  All
> in
> due time.

if I get the rd dumping upstream (for which I first need to fix some
chicken/egg problem in drm debugfs init), that plus scratch register
writes (for tile and draw # so I can "triangulate" the position), it's
actually not a bad way to debug.

(although I suppose there are some juicy status bits in the register
dump which would be pretty helpful too if I knew what they meant :-P)

BR,
-R


> Jordan
> --
> The Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
> hosted by The Linux Foundation


More information about the dri-devel mailing list