[PATCH 07/11] drm/msm/a6xx: Add a6xx gpu state

Wed Oct 31 09:07:50 UTC 2018


On 10/17/2018 9:58 PM, Jordan Crouse wrote:
> Add support for gathering and dumping the a6xx GPU state including
> registers, GMU registers, indexed registers, shader blocks,
> context clusters and debugbus.
> 
> Signed-off-by: Jordan Crouse <jcrouse at codeaurora.org>
> ---
>   drivers/gpu/drm/msm/Makefile                |    1 +
>   drivers/gpu/drm/msm/adreno/a6xx_gmu.c       |   25 +-
>   drivers/gpu/drm/msm/adreno/a6xx_gmu.h       |    3 +
>   drivers/gpu/drm/msm/adreno/a6xx_gpu.c       |   39 +-
>   drivers/gpu/drm/msm/adreno/a6xx_gpu.h       |    8 +
>   drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c | 1159 +++++++++++++++++++
>   drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h |  430 +++++++
>   7 files changed, 1627 insertions(+), 38 deletions(-)
>   create mode 100644 drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
>   create mode 100644 drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h
> 
> diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile
> index 19ab521d4c3a..33645c6539ee 100644
> --- a/drivers/gpu/drm/msm/Makefile
> +++ b/drivers/gpu/drm/msm/Makefile
> @@ -14,6 +14,7 @@ msm-y := \
>   	adreno/a6xx_gpu.o \
>   	adreno/a6xx_gmu.o \
>   	adreno/a6xx_hfi.o \
> +	adreno/a6xx_gpu_state.o \
>   	hdmi/hdmi.o \
>   	hdmi/hdmi_audio.o \
>   	hdmi/hdmi_bridge.o \
> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
> index d4e98e5876bc..089b013d7bb9 100644
> --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
> @@ -51,10 +51,31 @@ static irqreturn_t a6xx_hfi_irq(int irq, void *data)
>   	return IRQ_HANDLED;
>   }
>   
> +bool a6xx_gmu_sptprac_is_on(struct a6xx_gmu *gmu)
> +{
> +	u32 val;
> +
> +	/* This can be called from gpu state code so make sure GMU is valid */
> +	if (IS_ERR_OR_NULL(gmu->mmio))
> +		return false;
> +
> +	val = gmu_read(gmu, REG_A6XX_GMU_SPTPRAC_PWR_CLK_STATUS);
> +
> +	return !(val &
> +		(A6XX_GMU_SPTPRAC_PWR_CLK_STATUS_SPTPRAC_GDSC_POWER_OFF |
> +		A6XX_GMU_SPTPRAC_PWR_CLK_STATUS_SP_CLOCK_OFF));
> +}
> +
>   /* Check to see if the GX rail is still powered */
> -static bool a6xx_gmu_gx_is_on(struct a6xx_gmu *gmu)
> +bool a6xx_gmu_gx_is_on(struct a6xx_gmu *gmu)
>   {
> -	u32 val = gmu_read(gmu, REG_A6XX_GMU_SPTPRAC_PWR_CLK_STATUS);
> +	u32 val;
> +
> +	/* This can be called from gpu state code so make sure GMU is valid */
> +	if (IS_ERR_OR_NULL(gmu->mmio))
> +		return false;
> +
> +	val = gmu_read(gmu, REG_A6XX_GMU_SPTPRAC_PWR_CLK_STATUS);
>   
>   	return !(val &
>   		(A6XX_GMU_SPTPRAC_PWR_CLK_STATUS_GX_HM_GDSC_POWER_OFF |
> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
> index 35f765afae45..c721d9165d8e 100644
> --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
> @@ -164,4 +164,7 @@ void a6xx_hfi_init(struct a6xx_gmu *gmu);
>   int a6xx_hfi_start(struct a6xx_gmu *gmu, int boot_state);
>   void a6xx_hfi_stop(struct a6xx_gmu *gmu);
>   
> +bool a6xx_gmu_gx_is_on(struct a6xx_gmu *gmu);
> +bool a6xx_gmu_sptprac_is_on(struct a6xx_gmu *gmu);
> +
>   #endif
> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> index 631257c297fd..3afd4df2e250 100644
> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> @@ -645,33 +645,6 @@ static const u32 a6xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
>   	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A6XX_CP_RB_CNTL),
>   };
>   
> -static const u32 a6xx_registers[] = {
> -	0x0000, 0x0002, 0x0010, 0x0010, 0x0012, 0x0012, 0x0018, 0x001b,
> -	0x001e, 0x0032, 0x0038, 0x003c, 0x0042, 0x0042, 0x0044, 0x0044,
> -	0x0047, 0x0047, 0x0056, 0x0056, 0x00ad, 0x00ae, 0x00b0, 0x00fb,
> -	0x0100, 0x011d, 0x0200, 0x020d, 0x0210, 0x0213, 0x0218, 0x023d,
> -	0x0400, 0x04f9, 0x0500, 0x0500, 0x0505, 0x050b, 0x050e, 0x0511,
> -	0x0533, 0x0533, 0x0540, 0x0555, 0x0800, 0x0808, 0x0810, 0x0813,
> -	0x0820, 0x0821, 0x0823, 0x0827, 0x0830, 0x0833, 0x0840, 0x0843,
> -	0x084f, 0x086f, 0x0880, 0x088a, 0x08a0, 0x08ab, 0x08c0, 0x08c4,
> -	0x08d0, 0x08dd, 0x08f0, 0x08f3, 0x0900, 0x0903, 0x0908, 0x0911,
> -	0x0928, 0x093e, 0x0942, 0x094d, 0x0980, 0x0984, 0x098d, 0x0996,
> -	0x0998, 0x099e, 0x09a0, 0x09a6, 0x09a8, 0x09ae, 0x09b0, 0x09b1,
> -	0x09c2, 0x09c8, 0x0a00, 0x0a03, 0x0c00, 0x0c04, 0x0c06, 0x0c06,
> -	0x0c10, 0x0cd9, 0x0e00, 0x0e0e, 0x0e10, 0x0e13, 0x0e17, 0x0e19,
> -	0x0e1c, 0x0e2b, 0x0e30, 0x0e32, 0x0e38, 0x0e39, 0x8600, 0x8601,
> -	0x8610, 0x861b, 0x8620, 0x8620, 0x8628, 0x862b, 0x8630, 0x8637,
> -	0x8e01, 0x8e01, 0x8e04, 0x8e05, 0x8e07, 0x8e08, 0x8e0c, 0x8e0c,
> -	0x8e10, 0x8e1c, 0x8e20, 0x8e25, 0x8e28, 0x8e28, 0x8e2c, 0x8e2f,
> -	0x8e3b, 0x8e3e, 0x8e40, 0x8e43, 0x8e50, 0x8e5e, 0x8e70, 0x8e77,
> -	0x9600, 0x9604, 0x9624, 0x9637, 0x9e00, 0x9e01, 0x9e03, 0x9e0e,
> -	0x9e11, 0x9e16, 0x9e19, 0x9e19, 0x9e1c, 0x9e1c, 0x9e20, 0x9e23,
> -	0x9e30, 0x9e31, 0x9e34, 0x9e34, 0x9e70, 0x9e72, 0x9e78, 0x9e79,
> -	0x9e80, 0x9fff, 0xa600, 0xa601, 0xa603, 0xa603, 0xa60a, 0xa60a,
> -	0xa610, 0xa617, 0xa630, 0xa630,
> -	~0
> -};
> -
>   static int a6xx_pm_resume(struct msm_gpu *gpu)
>   {
>   	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
> @@ -724,14 +697,6 @@ static int a6xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
>   	return 0;
>   }
>   
> -#if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
> -static void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
> -		struct drm_printer *p)
> -{
> -	adreno_show(gpu, state, p);
> -}
> -#endif
> -
>   static struct msm_ringbuffer *a6xx_active_ring(struct msm_gpu *gpu)
>   {
>   	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
> @@ -796,6 +761,8 @@ static const struct adreno_gpu_funcs funcs = {
>   		.gpu_busy = a6xx_gpu_busy,
>   		.gpu_get_freq = a6xx_gmu_get_freq,
>   		.gpu_set_freq = a6xx_gmu_set_freq,
> +		.gpu_state_get = a6xx_gpu_state_get,
> +		.gpu_state_put = a6xx_gpu_state_put,
>   	},
>   	.get_timestamp = a6xx_get_timestamp,
>   };
> @@ -817,7 +784,7 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
>   	adreno_gpu = &a6xx_gpu->base;
>   	gpu = &adreno_gpu->base;
>   
> -	adreno_gpu->registers = a6xx_registers;
> +	adreno_gpu->registers = NULL;
>   	adreno_gpu->reg_offsets = a6xx_register_offsets;
>   
>   	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
> index 4127dcebc202..528a4cfe07cd 100644
> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
> @@ -56,6 +56,14 @@ void a6xx_gmu_clear_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state);
>   
>   int a6xx_gmu_probe(struct a6xx_gpu *a6xx_gpu, struct device_node *node);
>   void a6xx_gmu_remove(struct a6xx_gpu *a6xx_gpu);
> +
>   void a6xx_gmu_set_freq(struct msm_gpu *gpu, unsigned long freq);
>   unsigned long a6xx_gmu_get_freq(struct msm_gpu *gpu);
> +
> +void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
> +		struct drm_printer *p);
> +
> +struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu);
> +int a6xx_gpu_state_put(struct msm_gpu_state *state);
> +
>   #endif /* __A6XX_GPU_H__ */
> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
> new file mode 100644
> index 000000000000..d46b94462bb5
> --- /dev/null
> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
> @@ -0,0 +1,1159 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright (c) 2018 The Linux Foundation. All rights reserved. */
> +
> +#include <linux/ascii85.h>
> +#include "msm_gem.h"
> +#include "a6xx_gpu.h"
> +#include "a6xx_gmu.h"
> +#include "a6xx_gpu_state.h"
> +#include "a6xx_gmu.xml.h"
> +
> +struct a6xx_gpu_state_obj {
> +	const void *handle;
> +	u32 *data;
> +};
> +
> +struct a6xx_gpu_state {
> +	struct msm_gpu_state base;
> +
> +	struct a6xx_gpu_state_obj *gmu_registers;
> +	int nr_gmu_registers;
> +
> +	struct a6xx_gpu_state_obj *registers;
> +	int nr_registers;
> +
> +	struct a6xx_gpu_state_obj *shaders;
> +	int nr_shaders;
> +
> +	struct a6xx_gpu_state_obj *clusters;
> +	int nr_clusters;
> +
> +	struct a6xx_gpu_state_obj *dbgahb_clusters;
> +	int nr_dbgahb_clusters;
> +
> +	struct a6xx_gpu_state_obj *indexed_regs;
> +	int nr_indexed_regs;
> +
> +	struct a6xx_gpu_state_obj *debugbus;
> +	int nr_debugbus;
> +
> +	struct a6xx_gpu_state_obj *vbif_debugbus;
> +
> +	struct a6xx_gpu_state_obj *cx_debugbus;
> +	int nr_cx_debugbus;
> +};
> +
> +static inline int CRASHDUMP_WRITE(u64 *in, u32 reg, u32 val)
> +{
> +	in[0] = val;
> +	in[1] = (((u64) reg) << 44 | (1 << 21) | 1);
> +
> +	return 2;
> +}
> +
> +static inline int CRASHDUMP_READ(u64 *in, u32 reg, u32 dwords, u64 target)
> +{
> +	in[0] = target;
> +	in[1] = (((u64) reg) << 44 | dwords);
> +
> +	return 2;
> +}
> +
> +static inline int CRASHDUMP_FINI(u64 *in)
> +{
> +	in[0] = 0;
> +	in[1] = 0;
> +
> +	return 2;
> +}
> +
> +struct a6xx_crashdumper {
> +	void *ptr;
> +	struct drm_gem_object *bo;
> +	u64 iova;
> +};
> +
> +/*
> + * Allocate 1MB for the crashdumper scratch region - 8k for the script and
> + * the rest for the data
> + */
> +#define A6XX_CD_DATA_OFFSET 8192
> +#define A6XX_CD_DATA_SIZE  (SZ_1M - 8192)
> +
> +static int a6xx_crashdumper_init(struct msm_gpu *gpu,
> +		struct a6xx_crashdumper *dumper)
> +{
> +	dumper->ptr = msm_gem_kernel_new_locked(gpu->dev,
> +		SZ_1M, MSM_BO_UNCACHED, gpu->aspace,
> +		&dumper->bo, &dumper->iova);
> +
> +	return IS_ERR(dumper->ptr) ? PTR_ERR(dumper->ptr) : 0;
> +}
> +
> +static int a6xx_crashdumper_run(struct msm_gpu *gpu,
> +		struct a6xx_crashdumper *dumper)
> +{
> +	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
> +	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
> +	u32 val;
> +	int ret;
> +
> +	if (IS_ERR_OR_NULL(dumper->ptr))
> +		return -EINVAL;
> +
> +	if (!a6xx_gmu_sptprac_is_on(&a6xx_gpu->gmu))
> +		return -EINVAL;
> +
> +	/* Make sure all pending memory writes are posted */
> +	wmb();
> +
> +	gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE_LO,
> +		REG_A6XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
> +
> +	gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 1);
> +
> +	ret = gpu_poll_timeout(gpu, REG_A6XX_CP_CRASH_DUMP_STATUS, val,
> +		val & 0x02, 100, 10000);
> +
> +	gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 0);
> +
> +	return ret;
> +}
> +
> +static void a6xx_crashdumper_free(struct msm_gpu *gpu,
> +		struct a6xx_crashdumper *dumper)
> +{
> +	msm_gem_put_iova(dumper->bo, gpu->aspace);
> +	msm_gem_put_vaddr(dumper->bo);
> +
> +	drm_gem_object_unreference(dumper->bo);
> +}
> +
> +/* read a value from the GX debug bus */
> +static int debugbus_read(struct msm_gpu *gpu, u32 block, u32 offset,
> +		u32 *data)
> +{
> +	u32 reg = A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_INDEX(offset) |
> +		A6XX_DBGC_CFG_DBGBUS_SEL_D_PING_BLK_SEL(block);
> +
> +	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_A, reg);
> +	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_B, reg);
> +	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_C, reg);
> +	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_SEL_D, reg);
> +
> +	/* Wait 1 us to make sure the data is flowing */
> +	udelay(1);
> +
> +	data[0] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF2);
> +	data[1] = gpu_read(gpu, REG_A6XX_DBGC_CFG_DBGBUS_TRACE_BUF1);
> +
> +	return 2;
> +}
> +
> +#define cxdbg_write(ptr, offset, val) \
> +	msm_writel((val), (ptr) + ((offset) << 2))
> +
> +#define cxdbg_read(ptr, offset) \
> +	msm_readl((ptr) + ((offset) << 2))
> +
> +/* read a value from the CX debug bus */
> +static int cx_debugbus_read(void *__iomem cxdbg, u32 block, u32 offset,
> +		u32 *data)
> +{
> +	u32 reg = A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_INDEX(offset) |
> +		A6XX_CX_DBGC_CFG_DBGBUS_SEL_A_PING_BLK_SEL(block);
> +
> +	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_A, reg);
> +	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_B, reg);
> +	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_C, reg);
> +	cxdbg_write(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_SEL_D, reg);
> +
> +	/* Wait 1 us to make sure the data is flowing */
> +	udelay(1);
> +
> +	data[0] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF2);
> +	data[1] = cxdbg_read(cxdbg, REG_A6XX_CX_DBGC_CFG_DBGBUS_TRACE_BUF1);
> +
> +	return 2;
> +}
> +
> +/* Read a chunk of data from the VBIF debug bus */
> +static int vbif_debugbus_read(struct msm_gpu *gpu, u32 ctrl0, u32 ctrl1,
> +		u32 reg, int count, u32 *data)
> +{
> +	int i;
> +
> +	gpu_write(gpu, ctrl0, reg);
> +
> +	for (i = 0; i < count; i++) {
> +		gpu_write(gpu, ctrl1, i);
> +		data[i] = gpu_read(gpu, REG_A6XX_VBIF_TEST_BUS_OUT);
> +	}
> +
> +	return count;
> +}
> +
> +#define AXI_ARB_BLOCKS 2
> +#define XIN_AXI_BLOCKS 5
> +#define XIN_CORE_BLOCKS 4
> +
> +#define VBIF_DEBUGBUS_BLOCK_SIZE \
> +	((16 * AXI_ARB_BLOCKS) + \
> +	 (18 * XIN_AXI_BLOCKS) + \
> +	 (12 * XIN_CORE_BLOCKS))
> +
> +static void a6xx_get_vbif_debugbus_block(struct msm_gpu *gpu,
> +		struct a6xx_gpu_state_obj *obj)
> +{
> +	u32 clk, *ptr;
> +	int i;
> +
> +	obj->data = kcalloc(VBIF_DEBUGBUS_BLOCK_SIZE, sizeof(u32), GFP_KERNEL);
> +	obj->handle = NULL;
> +
> +	/* Get the current clock setting */
> +	clk = gpu_read(gpu, REG_A6XX_VBIF_CLKON);
> +
> +	/* Force on the bus so we can read it */
> +	gpu_write(gpu, REG_A6XX_VBIF_CLKON,
> +		clk | A6XX_VBIF_CLKON_FORCE_ON_TESTBUS);
> +
> +	/* We will read from BUS2 first, so disable BUS1 */
> +	gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS1_CTRL0, 0);
> +
> +	/* Enable the VBIF bus for reading */
> +	gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS_OUT_CTRL, 1);
> +
> +	ptr = obj->data;
> +
> +	for (i = 0; i < AXI_ARB_BLOCKS; i++)
> +		ptr += vbif_debugbus_read(gpu,
> +			REG_A6XX_VBIF_TEST_BUS2_CTRL0,
> +			REG_A6XX_VBIF_TEST_BUS2_CTRL1,
> +			1 << (i + 16), 16, ptr);
> +
> +	for (i = 0; i < XIN_AXI_BLOCKS; i++)
> +		ptr += vbif_debugbus_read(gpu,
> +			REG_A6XX_VBIF_TEST_BUS2_CTRL0,
> +			REG_A6XX_VBIF_TEST_BUS2_CTRL1,
> +			1 << i, 18, ptr);
> +
> +	/* Stop BUS2 so we can turn on BUS1 */
> +	gpu_write(gpu, REG_A6XX_VBIF_TEST_BUS2_CTRL0, 0);
> +
> +	for (i = 0; i < XIN_CORE_BLOCKS; i++)
> +		ptr += vbif_debugbus_read(gpu,
> +			REG_A6XX_VBIF_TEST_BUS1_CTRL0,
> +			REG_A6XX_VBIF_TEST_BUS1_CTRL1,
> +			1 << i, 12, ptr);
> +
> +	/* Restore the VBIF clock setting */
> +	gpu_write(gpu, REG_A6XX_VBIF_CLKON, clk);
> +}
> +
> +static void a6xx_get_debugbus_block(struct msm_gpu *gpu,
> +		const struct a6xx_debugbus_block *block,
> +		struct a6xx_gpu_state_obj *obj)
> +{
> +	int i;
> +	u32 *ptr;
> +
> +	obj->data = kcalloc(block->count, sizeof(u64), GFP_KERNEL);
> +	if (!obj->data)
> +		return;
> +
> +	obj->handle = block;
> +
> +	for (ptr = obj->data, i = 0; i < block->count; i++)
> +		ptr += debugbus_read(gpu, block->id, i, ptr);
> +}
> +
> +static void a6xx_get_cx_debugbus_block(void __iomem *cxdbg,
> +		const struct a6xx_debugbus_block *block,
> +		struct a6xx_gpu_state_obj *obj)
> +{
> +	int i;
> +	u32 *ptr;
> +
> +	obj->data = kcalloc(block->count, sizeof(u64), GFP_KERNEL);
> +	if (!obj->data)
> +		return;
> +
> +	obj->handle = block;
> +
> +	for (ptr = obj->data, i = 0; i < block->count; i++)
> +		ptr += cx_debugbus_read(cxdbg, block->id, i, ptr);
> +}
> +
> +static void a6xx_get_debugbus(struct msm_gpu *gpu,
> +		struct a6xx_gpu_state *a6xx_state)
> +{
> +	struct resource *res;
> +	void __iomem *cxdbg = NULL;
> +
> +	/* Set up the GX debug bus */
> +
> +	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLT,
> +		A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
> +
> +	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_CNTLM,
> +		A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
> +
> +	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0, 0);
> +	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1, 0);
> +	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2, 0);
> +	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3, 0);
> +
> +	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0, 0x76543210);
> +	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1, 0xFEDCBA98);
> +
> +	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0, 0);
> +	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1, 0);
> +	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2, 0);
> +	gpu_write(gpu, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0);
> +
> +	/* Set up the CX debug bus - it lives elsewhere in the system so do a
> +	 * temporary ioremap for the registers
> +	 */
> +	res = platform_get_resource_byname(gpu->pdev, IORESOURCE_MEM,
> +			"cx_dbgc");
> +
> +	if (res)
> +		cxdbg = ioremap(res->start, resource_size(res));
> +
> +	if (cxdbg) {
> +		cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_CNTLT,
> +			A6XX_DBGC_CFG_DBGBUS_CNTLT_SEGT(0xf));
> +
> +		cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_CNTLM,
> +			A6XX_DBGC_CFG_DBGBUS_CNTLM_ENABLE(0xf));
> +
> +		cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_0, 0);
> +		cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_1, 0);
> +		cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_2, 0);
> +		cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_IVTL_3, 0);
> +
> +		cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_0,
> +			0x76543210);
> +		cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_BYTEL_1,
> +			0xFEDCBA98);
> +
> +		cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_0, 0);
> +		cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_1, 0);
> +		cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_2, 0);
> +		cxdbg_write(cxdbg, REG_A6XX_DBGC_CFG_DBGBUS_MASKL_3, 0);
> +	}
> +
> +	a6xx_state->debugbus = kcalloc(ARRAY_SIZE(a6xx_debugbus_blocks),
> +		sizeof(*a6xx_state->debugbus), GFP_KERNEL);
> +
> +	if (a6xx_state->debugbus) {
> +		int i;
> +
> +		for (i = 0; i < ARRAY_SIZE(a6xx_debugbus_blocks); i++)
> +			a6xx_get_debugbus_block(gpu,
> +				&a6xx_debugbus_blocks[i],
> +				&a6xx_state->debugbus[i]);
> +
> +		a6xx_state->nr_debugbus = ARRAY_SIZE(a6xx_debugbus_blocks);
> +	}
> +
> +	a6xx_state->vbif_debugbus = kzalloc(sizeof(*a6xx_state->vbif_debugbus),
> +		GFP_KERNEL);
> +
> +	if (a6xx_state->vbif_debugbus)
> +		a6xx_get_vbif_debugbus_block(gpu, a6xx_state->vbif_debugbus);
> +
> +	if (cxdbg) {
> +		a6xx_state->cx_debugbus =
> +			kcalloc(ARRAY_SIZE(a6xx_cx_debugbus_blocks),
> +			sizeof(*a6xx_state->cx_debugbus), GFP_KERNEL);
> +
> +		if (a6xx_state->cx_debugbus) {
> +			int i;
> +
> +			for (i = 0; i < ARRAY_SIZE(a6xx_cx_debugbus_blocks); i++)
> +				a6xx_get_cx_debugbus_block(cxdbg,
> +					&a6xx_cx_debugbus_blocks[i],
> +					&a6xx_state->cx_debugbus[i]);
> +
> +			a6xx_state->nr_cx_debugbus =
> +				ARRAY_SIZE(a6xx_cx_debugbus_blocks);
> +		}
> +
> +		iounmap(cxdbg);
> +	}
> +}
> +
> +#define RANGE(reg, a) ((reg)[(a) + 1] - (reg)[(a)] + 1)
> +
> +/* Read a data cluster from behind the AHB aperture */
> +static void a6xx_get_dbgahb_cluster(struct msm_gpu *gpu,
> +		const struct a6xx_dbgahb_cluster *dbgahb,
> +		struct a6xx_gpu_state_obj *obj,
> +		struct a6xx_crashdumper *dumper)
> +{
> +	u64 *in = dumper->ptr;
> +	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
> +	size_t datasize;
> +	int i, regcount = 0;
> +
> +	for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
> +		int j;
> +
> +		in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
> +			(dbgahb->statetype + i * 2) << 8);
> +
> +		for (j = 0; j < dbgahb->count; j += 2) {
> +			int count = RANGE(dbgahb->registers, j);
> +			u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
> +				dbgahb->registers[j] - (dbgahb->base >> 2);
> +
> +			in += CRASHDUMP_READ(in, offset, count, out);
> +
> +			out += count * sizeof(u32);
> +
> +			if (i == 0)
> +				regcount += count;
> +		}
> +	}
> +
> +	CRASHDUMP_FINI(in);
> +
> +	datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
> +
> +	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
> +		return;
> +
> +	if (a6xx_crashdumper_run(gpu, dumper))
> +		return;
> +
> +	obj->handle = dbgahb;
> +	obj->data = kmemdup(dumper->ptr + A6XX_CD_DATA_OFFSET,
> +		datasize, GFP_KERNEL);
> +}
> +
> +static void a6xx_get_dbgahb_clusters(struct msm_gpu *gpu,
> +		struct a6xx_gpu_state *a6xx_state,
> +		struct a6xx_crashdumper *dumper)
> +{
> +	int i;
> +
> +	a6xx_state->dbgahb_clusters = kcalloc(ARRAY_SIZE(a6xx_dbgahb_clusters),
> +		sizeof(*a6xx_state->dbgahb_clusters), GFP_KERNEL);
> +
> +	if (!a6xx_state->dbgahb_clusters)
> +		return;
> +
> +	a6xx_state->nr_dbgahb_clusters = ARRAY_SIZE(a6xx_dbgahb_clusters);
> +
> +	for (i = 0; i < ARRAY_SIZE(a6xx_dbgahb_clusters); i++)
> +		a6xx_get_dbgahb_cluster(gpu, &a6xx_dbgahb_clusters[i],
> +			&a6xx_state->dbgahb_clusters[i], dumper);
> +}
> +
> +/* Read a data cluster from the CP aperture with the crashdumper */
> +static void a6xx_get_cluster(struct msm_gpu *gpu,
> +		const struct a6xx_cluster *cluster,
> +		struct a6xx_gpu_state_obj *obj,
> +		struct a6xx_crashdumper *dumper)
> +{
> +	u64 *in = dumper->ptr;
> +	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
> +	size_t datasize;
> +	int i, regcount = 0;
> +
> +	/* Some clusters need a selector register to be programmed too */
> +	if (cluster->sel_reg)
> +		in += CRASHDUMP_WRITE(in, cluster->sel_reg, cluster->sel_val);
> +
> +	for (i = 0; i < A6XX_NUM_CONTEXTS; i++) {
> +		int j;
> +
> +		in += CRASHDUMP_WRITE(in, REG_A6XX_CP_APERTURE_CNTL_CD,
> +			(cluster->id << 8) | (i << 4) | i);
> +
> +		for (j = 0; j < cluster->count; j += 2) {
> +			int count = RANGE(cluster->registers, j);
> +
> +			in += CRASHDUMP_READ(in, cluster->registers[j],
> +				count, out);
> +
> +			out += count * sizeof(u32);
> +
> +			if (i == 0)
> +				regcount += count;
> +		}
> +	}
> +
> +	CRASHDUMP_FINI(in);
> +
> +	datasize = regcount * A6XX_NUM_CONTEXTS * sizeof(u32);
> +
> +	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
> +		return;
> +
> +	if (a6xx_crashdumper_run(gpu, dumper))
> +		return;
> +
> +	obj->handle = cluster;
> +	obj->data = kmemdup(dumper->ptr + A6XX_CD_DATA_OFFSET,
> +		datasize, GFP_KERNEL);
> +}
> +
> +static void a6xx_get_clusters(struct msm_gpu *gpu,
> +		struct a6xx_gpu_state *a6xx_state,
> +		struct a6xx_crashdumper *dumper)
> +{
> +	int i;
> +
> +	a6xx_state->clusters = kcalloc(ARRAY_SIZE(a6xx_clusters),
> +		sizeof(*a6xx_state->clusters), GFP_KERNEL);
> +
> +	if (!a6xx_state->clusters)
> +		return;
> +
> +	a6xx_state->nr_clusters = ARRAY_SIZE(a6xx_clusters);
> +
> +	for (i = 0; i < ARRAY_SIZE(a6xx_clusters); i++)
> +		a6xx_get_cluster(gpu, &a6xx_clusters[i],
> +			&a6xx_state->clusters[i], dumper);
> +}
> +
> +/* Read a shader / debug block from the HLSQ aperture with the crashdumper */
> +static void a6xx_get_shader_block(struct msm_gpu *gpu,
> +		const struct a6xx_shader_block *block,
> +		struct a6xx_gpu_state_obj *obj,
> +		struct a6xx_crashdumper *dumper)
> +{
> +	u64 *in = dumper->ptr;
> +	size_t datasize = block->size * A6XX_NUM_SHADER_BANKS * sizeof(u32);
> +	int i;
> +
> +	if (WARN_ON(datasize > A6XX_CD_DATA_SIZE))
> +		return;
> +
> +	for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
> +		in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL,
> +			(block->type << 8) | i);
> +
> +		in += CRASHDUMP_READ(in, REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE,
> +			block->size, dumper->iova + A6XX_CD_DATA_OFFSET);
> +	}
> +
> +	CRASHDUMP_FINI(in);
> +
> +	if (a6xx_crashdumper_run(gpu, dumper))
> +		return;
> +
> +	obj->handle = block;
> +	obj->data = kmemdup(dumper->ptr + A6XX_CD_DATA_OFFSET,
> +		datasize, GFP_KERNEL);
> +}
> +
> +static void a6xx_get_shaders(struct msm_gpu *gpu,
> +		struct a6xx_gpu_state *a6xx_state,
> +		struct a6xx_crashdumper *dumper)
> +{
> +	int i;
> +
> +	a6xx_state->shaders = kcalloc(ARRAY_SIZE(a6xx_shader_blocks),
> +		sizeof(*a6xx_state->shaders), GFP_KERNEL);
> +
> +	if (!a6xx_state->shaders)
> +		return;
> +
> +	a6xx_state->nr_shaders = ARRAY_SIZE(a6xx_shader_blocks);
> +
> +	for (i = 0; i < ARRAY_SIZE(a6xx_shader_blocks); i++)
> +		a6xx_get_shader_block(gpu, &a6xx_shader_blocks[i],
> +			&a6xx_state->shaders[i], dumper);
> +}
> +
> +/* Read registers from behind the HLSQ aperture with the crashdumper */
> +static void a6xx_get_crashdumper_hlsq_registers(struct msm_gpu *gpu,
> +		const struct a6xx_registers *regs,
> +		struct a6xx_gpu_state_obj *obj,
> +		struct a6xx_crashdumper *dumper)
> +
> +{
> +	u64 *in = dumper->ptr;
> +	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
> +	int i, regcount = 0;
> +
> +	in += CRASHDUMP_WRITE(in, REG_A6XX_HLSQ_DBG_READ_SEL, regs->val1);
> +
> +	for (i = 0; i < regs->count; i += 2) {
> +		u32 count = RANGE(regs->registers, i);
> +		u32 offset = REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE +
> +			regs->registers[i] - (regs->val0 >> 2);
> +
> +		in += CRASHDUMP_READ(in, offset, count, out);
> +
> +		out += count * sizeof(u32);
> +		regcount += count;
> +	}
> +
> +	CRASHDUMP_FINI(in);
> +
> +	if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
> +		return;
> +
> +	if (a6xx_crashdumper_run(gpu, dumper))
> +		return;
> +
> +	obj->handle = regs;
> +	obj->data = kmemdup(dumper->ptr + A6XX_CD_DATA_OFFSET,
> +		regcount * sizeof(u32), GFP_KERNEL);
> +}
> +
> +/* Read a block of registers using the crashdumper */
> +static void a6xx_get_crashdumper_registers(struct msm_gpu *gpu,
> +		const struct a6xx_registers *regs,
> +		struct a6xx_gpu_state_obj *obj,
> +		struct a6xx_crashdumper *dumper)
> +
> +{
> +	u64 *in = dumper->ptr;
> +	u64 out = dumper->iova + A6XX_CD_DATA_OFFSET;
> +	int i, regcount = 0;
> +
> +	/* Some blocks might need to program a selector register first */
> +	if (regs->val0)
> +		in += CRASHDUMP_WRITE(in, regs->val0, regs->val1);
> +
> +	for (i = 0; i < regs->count; i += 2) {
> +		u32 count = RANGE(regs->registers, i);
> +
> +		in += CRASHDUMP_READ(in, regs->registers[i], count, out);
> +
> +		out += count * sizeof(u32);
> +		regcount += count;
> +	}
> +
> +	CRASHDUMP_FINI(in);
> +
> +	if (WARN_ON((regcount * sizeof(u32)) > A6XX_CD_DATA_SIZE))
> +		return;
> +
> +	if (a6xx_crashdumper_run(gpu, dumper))
> +		return;
> +
> +	obj->handle = regs;
> +	obj->data = kmemdup(dumper->ptr + A6XX_CD_DATA_OFFSET,
> +		regcount * sizeof(u32), GFP_KERNEL);
> +}
> +
> +/* Read a block of registers via AHB */
> +static void a6xx_get_ahb_gpu_registers(struct msm_gpu *gpu,
> +		const struct a6xx_registers *regs,
> +		struct a6xx_gpu_state_obj *obj)
> +{
> +	int i, regcount = 0, index = 0;
> +
> +	for (i = 0; i < regs->count; i += 2)
> +		regcount += RANGE(regs->registers, i);
> +
> +	obj->handle = (const void *) regs;
> +	obj->data = kcalloc(regcount, sizeof(u32), GFP_KERNEL);
> +	if (!obj->data)
> +		return;
> +
> +	for (i = 0; i < regs->count; i += 2) {
> +		u32 count = RANGE(regs->registers, i);
> +		int j;
> +
> +		for (j = 0; j < count; j++)
> +			obj->data[index++] = gpu_read(gpu,
> +				regs->registers[i] + j);
> +	}
> +}
> +
> +/* Read a block of GMU registers */
> +static void _a6xx_get_gmu_registers(struct msm_gpu *gpu,
> +		const struct a6xx_registers *regs,
> +		struct a6xx_gpu_state_obj *obj)
> +{
> +	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
> +	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
> +	struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
> +	int i, regcount = 0, index = 0;
> +
> +	for (i = 0; i < regs->count; i += 2)
> +		regcount += RANGE(regs->registers, i);
> +
> +	obj->handle = (const void *) regs;
> +	obj->data = kcalloc(regcount, sizeof(u32), GFP_KERNEL);
> +	if (!obj->data)
> +		return;
> +
> +	for (i = 0; i < regs->count; i += 2) {
> +		u32 count = RANGE(regs->registers, i);
> +		int j;
> +
> +		for (j = 0; j < count; j++)
> +			obj->data[index++] = gmu_read(gmu,
> +				regs->registers[i] + j);
> +	}
> +}
> +
> +static void a6xx_get_gmu_registers(struct msm_gpu *gpu,
> +		struct a6xx_gpu_state *a6xx_state)
> +{
> +	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
> +	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
> +
> +	a6xx_state->gmu_registers = kcalloc(2,
> +		sizeof(*a6xx_state->gmu_registers), GFP_KERNEL);
> +
> +	if (!a6xx_state->gmu_registers)
> +		return;
> +
> +	a6xx_state->nr_gmu_registers = 2;
> +
> +	/* Get the CX GMU registers from AHB */
> +	_a6xx_get_gmu_registers(gpu, &a6xx_gmu_reglist[0],
> +		&a6xx_state->gmu_registers[0]);
> +
> +	if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
> +		return;
> +
> +	/* Set the fence to ALLOW mode so we can access the registers */
> +	gpu_write(gpu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, 0);
> +
> +	_a6xx_get_gmu_registers(gpu, &a6xx_gmu_reglist[1],
> +		&a6xx_state->gmu_registers[1]);
> +}
> +
> +static void a6xx_get_registers(struct msm_gpu *gpu,
> +		struct a6xx_gpu_state *a6xx_state,
> +		struct a6xx_crashdumper *dumper)
> +{
> +	int i, count = ARRAY_SIZE(a6xx_ahb_reglist) +
> +		ARRAY_SIZE(a6xx_reglist) +
> +		ARRAY_SIZE(a6xx_hlsq_reglist);
> +	int index = 0;
> +
> +	a6xx_state->registers = kcalloc(count, sizeof(*a6xx_state->registers),
> +		GFP_KERNEL);
> +
> +	if (!a6xx_state->registers)
> +		return;
> +
> +	a6xx_state->nr_registers = count;
> +
> +	for (i = 0; i < ARRAY_SIZE(a6xx_ahb_reglist); i++)
> +		a6xx_get_ahb_gpu_registers(gpu,
> +			&a6xx_ahb_reglist[i],
> +			&a6xx_state->registers[index++]);
> +
> +	for (i = 0; i < ARRAY_SIZE(a6xx_reglist); i++)
> +		a6xx_get_crashdumper_registers(gpu,
> +			&a6xx_reglist[i],
> +			&a6xx_state->registers[index++],
> +			dumper);
> +
> +	for (i = 0; i < ARRAY_SIZE(a6xx_hlsq_reglist); i++)
> +		a6xx_get_crashdumper_hlsq_registers(gpu,
> +			&a6xx_hlsq_reglist[i],
> +			&a6xx_state->registers[index++],
> +			dumper);
> +}
> +
> +/* Read a block of data from an indexed register pair */
> +static void a6xx_get_indexed_regs(struct msm_gpu *gpu,
> +		const struct a6xx_indexed_registers *indexed,
> +		struct a6xx_gpu_state_obj *obj)
> +{
> +	int i;
> +
> +	obj->handle = (const void *) indexed;
> +	obj->data = kcalloc(indexed->count, sizeof(u32), GFP_KERNEL);
> +	if (!obj->data)
> +		return;
> +
> +	/* All the indexed banks start at address 0 */
> +	gpu_write(gpu, indexed->addr, 0);
> +
> +	/* Read the data - each read increments the internal address by 1 */
> +	for (i = 0; i < indexed->count; i++)
> +		obj->data[i] = gpu_read(gpu, indexed->data);
> +}
> +
> +static void a6xx_get_indexed_registers(struct msm_gpu *gpu,
> +		struct a6xx_gpu_state *a6xx_state)
> +{
> +	u32 mempool_size;
> +	int count = ARRAY_SIZE(a6xx_indexed_reglist) + 1;
> +	int i;
> +
> +	a6xx_state->indexed_regs = kcalloc(count,
> +		sizeof(a6xx_state->indexed_regs), GFP_KERNEL);
> +	if (!a6xx_state->indexed_regs)
> +		return;
> +
> +	for (i = 0; i < ARRAY_SIZE(a6xx_indexed_reglist); i++)
> +		a6xx_get_indexed_regs(gpu, &a6xx_indexed_reglist[i],
> +			&a6xx_state->indexed_regs[i]);
> +
> +	/* Set the CP mempool size to 0 to stabilize it while dumping */
> +	mempool_size = gpu_read(gpu, REG_A6XX_CP_MEM_POOL_SIZE);
> +	gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, 0);
> +
> +	/* Get the contents of the CP mempool */
> +	a6xx_get_indexed_regs(gpu, &a6xx_cp_mempool_indexed,
> +		&a6xx_state->indexed_regs[i]);
> +
> +	/*
> +	 * Offset 0x2000 in the mempool is the size - copy the saved size over
> +	 * so the data is consistent
> +	 */
> +	a6xx_state->indexed_regs[i].data[0x2000] = mempool_size;
> +
> +	/* Restore the size in the hardware */
> +	gpu_write(gpu, REG_A6XX_CP_MEM_POOL_SIZE, mempool_size);
> +
> +	a6xx_state->nr_indexed_regs = count;
> +}
> +
> +struct msm_gpu_state *a6xx_gpu_state_get(struct msm_gpu *gpu)
> +{
> +	struct a6xx_crashdumper dumper = { 0 };
> +	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
> +	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
> +	struct a6xx_gpu_state *a6xx_state = kzalloc(sizeof(*a6xx_state),
> +		GFP_KERNEL);
> +
> +	if (!a6xx_state)
> +		return ERR_PTR(-ENOMEM);
> +
> +	/* Get the generic state from the adreno core */
> +	adreno_gpu_state_get(gpu, &a6xx_state->base);
> +
> +	a6xx_get_gmu_registers(gpu, a6xx_state);
> +
> +	/* If GX isn't on the rest of the data isn't going to be accessible */
> +	if (!a6xx_gmu_gx_is_on(&a6xx_gpu->gmu))
> +		return &a6xx_state->base;
> +
> +	/* Get the banks of indexed registers */
> +	a6xx_get_indexed_registers(gpu, a6xx_state);
> +
> +	/* Try to initialize the crashdumper */
> +	if (!a6xx_crashdumper_init(gpu, &dumper)) {
> +		a6xx_get_registers(gpu, a6xx_state, &dumper);
> +		a6xx_get_shaders(gpu, a6xx_state, &dumper);
> +		a6xx_get_clusters(gpu, a6xx_state, &dumper);
> +		a6xx_get_dbgahb_clusters(gpu, a6xx_state, &dumper);
> +
> +		a6xx_crashdumper_free(gpu, &dumper);
> +	}
> +
> +	a6xx_get_debugbus(gpu, a6xx_state);
> +
> +	return  &a6xx_state->base;
> +}
> +
> +void a6xx_gpu_state_destroy(struct kref *kref)
> +{
> +	struct msm_gpu_state *state = container_of(kref,
> +			struct msm_gpu_state, ref);
> +	struct a6xx_gpu_state *a6xx_state = container_of(state,
> +			struct a6xx_gpu_state, base);
> +	int i;
> +
> +	for (i = 0; i < a6xx_state->nr_gmu_registers; i++)
> +		kfree(a6xx_state->gmu_registers[i].data);
> +
> +	kfree(a6xx_state->gmu_registers);
> +
> +	for (i = 0; i < a6xx_state->nr_registers; i++)
> +		kfree(a6xx_state->registers[i].data);
> +
> +	kfree(a6xx_state->registers);
> +
> +	for (i = 0; i < a6xx_state->nr_shaders; i++)
> +		kfree(a6xx_state->shaders[i].data);
> +
> +	kfree(a6xx_state->shaders);
> +
> +	for (i = 0; i < a6xx_state->nr_clusters; i++)
> +		kfree(a6xx_state->clusters[i].data);
> +
> +	kfree(a6xx_state->clusters);
> +
> +	for (i = 0; i < a6xx_state->nr_dbgahb_clusters; i++)
> +		kfree(a6xx_state->dbgahb_clusters[i].data);
> +
> +	kfree(a6xx_state->dbgahb_clusters);
> +
> +	for (i = 0; i < a6xx_state->nr_indexed_regs; i++)
> +		kfree(a6xx_state->indexed_regs[i].data);
> +
> +	kfree(a6xx_state->indexed_regs);
> +
> +	for (i = 0; i < a6xx_state->nr_debugbus; i++)
> +		kfree(a6xx_state->debugbus[i].data);
> +
> +	kfree(a6xx_state->debugbus);
> +
> +	if (a6xx_state->vbif_debugbus)
> +		kfree(a6xx_state->vbif_debugbus->data);
> +
> +	kfree(a6xx_state->vbif_debugbus);
> +
> +	for (i = 0; i < a6xx_state->nr_cx_debugbus; i++)
> +		kfree(a6xx_state->cx_debugbus[i].data);
> +
> +	kfree(a6xx_state->cx_debugbus);
nit - remove the extra line
There is a leak here - we need to free the base class
	adreno_gpu_state_destroy(state);
> +
> +
> +	kfree(a6xx_state);
> +}
> +
> +int a6xx_gpu_state_put(struct msm_gpu_state *state)
> +{
> +	if (IS_ERR_OR_NULL(state))
> +		return 1;
> +
> +	return kref_put(&state->ref, a6xx_gpu_state_destroy);
> +}
> +
> +static void a6xx_show_registers(const u32 *registers, u32 *data, size_t count,
> +		struct drm_printer *p)
> +{
> +	int i, index = 0;
> +
> +	if (!data)
> +		return;
> +
> +	for (i = 0; i < count; i += 2) {
> +		u32 count = RANGE(registers, i);
> +		u32 offset = registers[i];
> +		int j;
> +
> +		for (j = 0; j < count; index++, offset++, j++) {
> +			if (data[index] == 0xdeafbead)
> +				continue;
> +
> +			drm_printf(p, "  - { offset: 0x%06x, value: 0x%08x }\n",
> +				offset << 2, data[index]);
> +		}
> +	}
> +}
> +
> +static void print_ascii85(struct drm_printer *p, size_t len, u32 *data)
> +{
> +	char out[ASCII85_BUFSZ];
> +	long i, l, datalen = 0;
> +
> +	for (i = 0; i < len >> 2; i++) {
> +		if (data[i])
> +			datalen = (i << 2) + 1;
This should be
			datalen = (i + 1) << 2;
> +	}
> +
> +	if (datalen == 0)
> +		return;
> +
> +	drm_puts(p, "    data: !!ascii85 |\n");
> +	drm_puts(p, "      ");
> +
> +
> +	l = ascii85_encode_len(datalen);
> +
> +	for (i = 0; i < l; i++)
> +		drm_puts(p, ascii85_encode(data[i], out));
> +
> +	drm_puts(p, "\n");
> +}
> +
> +static void print_name(struct drm_printer *p, const char *fmt, const char *name)
> +{
> +	drm_puts(p, fmt);
> +	drm_puts(p, name);
> +	drm_puts(p, "\n");
> +}
> +
> +static void a6xx_show_shader(struct a6xx_gpu_state_obj *obj,
> +		struct drm_printer *p)
> +{
> +	const struct a6xx_shader_block *block = obj->handle;
> +	int i;
> +
> +	if (!obj->handle)
> +		return;
> +
> +	print_name(p, "  - type: ", block->name);
> +
> +	for (i = 0; i < A6XX_NUM_SHADER_BANKS; i++) {
> +		drm_printf(p, "    - bank: %d\n", i);
> +		drm_printf(p, "      size: %d\n", block->size);
> +
> +		if (!obj->data)
> +			continue;
> +
> +		print_ascii85(p, block->size << 2,
> +			obj->data + (block->size * i));
> +	}
> +}
> +
> +static void a6xx_show_cluster_data(const u32 *registers, int size, u32 *data,
> +		struct drm_printer *p)
> +{
> +	int ctx, index = 0;
> +
> +	for (ctx = 0; ctx < A6XX_NUM_CONTEXTS; ctx++) {
> +		int j;
> +
> +		drm_printf(p, "    - context: %d\n", ctx);
> +
> +		for (j = 0; j < size; j += 2) {
> +			u32 count = RANGE(registers, j);
> +			u32 offset = registers[j];
> +			int k;
> +
> +			for (k = 0; k < count; index++, offset++, k++) {
> +				if (data[index] == 0xdeafbead)
> +					continue;
> +
> +				drm_printf(p, "      - { offset: 0x%06x, value: 0x%08x }\n",
> +					offset << 2, data[index]);
> +			}
> +		}
> +	}
> +}
> +
> +static void a6xx_show_dbgahb_cluster(struct a6xx_gpu_state_obj *obj,
> +		struct drm_printer *p)
> +{
> +	const struct a6xx_dbgahb_cluster *dbgahb = obj->handle;
> +
> +	if (dbgahb) {
> +		print_name(p, "  - cluster-name: ", dbgahb->name);
> +		a6xx_show_cluster_data(dbgahb->registers, dbgahb->count,
> +			obj->data, p);
> +	}
> +}
> +
> +static void a6xx_show_cluster(struct a6xx_gpu_state_obj *obj,
> +		struct drm_printer *p)
> +{
> +	const struct a6xx_cluster *cluster = obj->handle;
> +
> +	if (cluster) {
> +		print_name(p, "  - cluster-name: ", cluster->name);
> +		a6xx_show_cluster_data(cluster->registers, cluster->count,
> +			obj->data, p);
> +	}
> +}
> +
> +static void a6xx_show_indexed_regs(struct a6xx_gpu_state_obj *obj,
> +		struct drm_printer *p)
> +{
> +	const struct a6xx_indexed_registers *indexed = obj->handle;
> +
> +	if (!indexed)
> +		return;
> +
> +	print_name(p, "  - regs-name: ", indexed->name);
> +	drm_printf(p, "    dwords: %d\n", indexed->count);
> +
> +	print_ascii85(p, indexed->count << 2, obj->data);
> +}
> +
> +static void a6xx_show_debugbus_block(const struct a6xx_debugbus_block *block,
> +		u32 *data, struct drm_printer *p)
> +{
> +	if (block) {
> +		print_name(p, "  - debugbus-block: ", block->name);
> +
> +		/*
> +		 * count for regular debugbus data is in quadwords,
> +		 * but print the size in dwords for consistency
> +		 */
> +		drm_printf(p, "    count: %d\n", block->count << 1);
> +
> +		print_ascii85(p, block->count << 3, data);
> +	}
> +}
> +
> +static void a6xx_show_debugbus(struct a6xx_gpu_state *a6xx_state,
> +		struct drm_printer *p)
> +{
> +	int i;
> +
> +	for (i = 0; i < a6xx_state->nr_debugbus; i++) {
> +		struct a6xx_gpu_state_obj *obj = &a6xx_state->debugbus[i];
> +
> +		a6xx_show_debugbus_block(obj->handle, obj->data, p);
> +	}
> +
> +	if (a6xx_state->vbif_debugbus) {
> +		struct a6xx_gpu_state_obj *obj = a6xx_state->vbif_debugbus;
> +
> +		drm_puts(p, "  - debugbus-block: A6XX_DBGBUS_VBIF\n");
> +		drm_printf(p, "    count: %d\n", VBIF_DEBUGBUS_BLOCK_SIZE);
> +
> +		/* vbif debugbus data is in dwords.  Confusing, huh? */
> +		print_ascii85(p, VBIF_DEBUGBUS_BLOCK_SIZE << 2, obj->data);
> +	}
> +
> +	for (i = 0; i < a6xx_state->nr_cx_debugbus; i++) {
> +		struct a6xx_gpu_state_obj *obj = &a6xx_state->cx_debugbus[i];
> +
> +		a6xx_show_debugbus_block(obj->handle, obj->data, p);
> +	}
> +}
> +
> +void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
> +		struct drm_printer *p)
> +{
> +	struct a6xx_gpu_state *a6xx_state = container_of(state,
> +			struct a6xx_gpu_state, base);
> +	int i;
> +
> +	if (IS_ERR_OR_NULL(state))
> +		return;
> +
> +	adreno_show(gpu, state, p);
> +
> +	drm_puts(p, "registers:\n");
> +	for (i = 0; i < a6xx_state->nr_registers; i++) {
> +		struct a6xx_gpu_state_obj *obj = &a6xx_state->registers[i];
> +		const struct a6xx_registers *regs = obj->handle;
> +
> +		if (!obj->handle)
> +			continue;
> +
> +		a6xx_show_registers(regs->registers, obj->data, regs->count, p);
> +	}
> +
> +	drm_puts(p, "registers-gmu:\n");
> +	for (i = 0; i < a6xx_state->nr_gmu_registers; i++) {
> +		struct a6xx_gpu_state_obj *obj = &a6xx_state->gmu_registers[i];
> +		const struct a6xx_registers *regs = obj->handle;
> +
> +		if (!obj->handle)
> +			continue;
> +
> +		a6xx_show_registers(regs->registers, obj->data, regs->count, p);
> +	}
> +
> +	drm_puts(p, "indexed-registers:\n");
> +	for (i = 0; i < a6xx_state->nr_indexed_regs; i++)
> +		a6xx_show_indexed_regs(&a6xx_state->indexed_regs[i], p);
> +
> +	drm_puts(p, "shader-blocks:\n");
> +	for (i = 0; i < a6xx_state->nr_shaders; i++)
> +		a6xx_show_shader(&a6xx_state->shaders[i], p);
> +
> +	drm_puts(p, "clusters:\n");
> +	for (i = 0; i < a6xx_state->nr_clusters; i++)
> +		a6xx_show_cluster(&a6xx_state->clusters[i], p);
> +
> +	for (i = 0; i < a6xx_state->nr_dbgahb_clusters; i++)
> +		a6xx_show_dbgahb_cluster(&a6xx_state->dbgahb_clusters[i], p);
> +
> +	drm_puts(p, "debugbus:\n");
> +	a6xx_show_debugbus(a6xx_state, p);
> +}
> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h
> new file mode 100644
> index 000000000000..68cccfa2870a
> --- /dev/null
> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h
> @@ -0,0 +1,430 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright (c) 2018 The Linux Foundation. All rights reserved. */
> +
> +#ifndef _A6XX_CRASH_DUMP_H_
> +#define _A6XX_CRASH_DUMP_H_
> +
> +#include "a6xx.xml.h"
> +
> +#define A6XX_NUM_CONTEXTS 2
> +#define A6XX_NUM_SHADER_BANKS 3
> +
> +static const u32 a6xx_gras_cluster[] = {
> +	0x8000, 0x8006, 0x8010, 0x8092, 0x8094, 0x809d, 0x80a0, 0x80a6,
> +	0x80af, 0x80f1, 0x8100, 0x8107, 0x8109, 0x8109, 0x8110, 0x8110,
> +	0x8400, 0x840b,
> +};
> +
> +static const u32 a6xx_ps_cluster_rac[] = {
> +	0x8800, 0x8806, 0x8809, 0x8811, 0x8818, 0x881e, 0x8820, 0x8865,
> +	0x8870, 0x8879, 0x8880, 0x8889, 0x8890, 0x8891, 0x8898, 0x8898,
> +	0x88c0, 0x88c1, 0x88d0, 0x88e3, 0x8900, 0x890c, 0x890f, 0x891a,
> +	0x8c00, 0x8c01, 0x8c08, 0x8c10, 0x8c17, 0x8c1f, 0x8c26, 0x8c33,
> +};
> +
> +static const u32 a6xx_ps_cluster_rbp[] = {
> +	0x88f0, 0x88f3, 0x890d, 0x890e, 0x8927, 0x8928, 0x8bf0, 0x8bf1,
> +	0x8c02, 0x8c07, 0x8c11, 0x8c16, 0x8c20, 0x8c25,
> +};
> +
> +static const u32 a6xx_ps_cluster[] = {
> +	0x9200, 0x9216, 0x9218, 0x9236, 0x9300, 0x9306,
> +};
> +
> +static const u32 a6xx_fe_cluster[] = {
> +	0x9300, 0x9306, 0x9800, 0x9806, 0x9b00, 0x9b07, 0xa000, 0xa009,
> +	0xa00e, 0xa0ef, 0xa0f8, 0xa0f8,
> +};
> +
> +static const u32 a6xx_pc_vs_cluster[] = {
> +	0x9100, 0x9108, 0x9300, 0x9306, 0x9980, 0x9981, 0x9b00, 0x9b07,
> +};
> +
> +#define CLUSTER_FE    0
> +#define CLUSTER_SP_VS 1
> +#define CLUSTER_PC_VS 2
> +#define CLUSTER_GRAS  3
> +#define CLUSTER_SP_PS 4
> +#define CLUSTER_PS    5
> +
> +#define CLUSTER(_id, _reg, _sel_reg, _sel_val) \
> +	{ .id = _id, .name = #_id,\
> +		.registers = _reg, \
> +		.count = ARRAY_SIZE(_reg), \
> +		.sel_reg = _sel_reg, .sel_val = _sel_val }
> +
> +static const struct a6xx_cluster {
> +	u32 id;
> +	const char *name;
> +	const u32 *registers;
> +	size_t count;
> +	u32 sel_reg;
> +	u32 sel_val;
> +} a6xx_clusters[] = {
> +	CLUSTER(CLUSTER_GRAS, a6xx_gras_cluster, 0, 0),
> +	CLUSTER(CLUSTER_PS, a6xx_ps_cluster_rac, REG_A6XX_RB_RB_SUB_BLOCK_SEL_CNTL_CD, 0x0),
> +	CLUSTER(CLUSTER_PS, a6xx_ps_cluster_rbp, REG_A6XX_RB_RB_SUB_BLOCK_SEL_CNTL_CD, 0x9),
> +	CLUSTER(CLUSTER_PS, a6xx_ps_cluster, 0, 0),
> +	CLUSTER(CLUSTER_FE, a6xx_fe_cluster, 0, 0),
> +	CLUSTER(CLUSTER_PC_VS, a6xx_pc_vs_cluster, 0, 0),
> +};
> +
> +static const u32 a6xx_sp_vs_hlsq_cluster[] = {
> +	0xb800, 0xb803, 0xb820, 0xb822,
> +};
> +
> +static const u32 a6xx_sp_vs_sp_cluster[] = {
> +	0xa800, 0xa824, 0xa830, 0xa83c, 0xa840, 0xa864, 0xa870, 0xa895,
> +	0xa8a0, 0xa8af, 0xa8c0, 0xa8c3,
> +};
> +
> +static const u32 a6xx_hlsq_duplicate_cluster[] = {
> +	0xbb10, 0xbb11, 0xbb20, 0xbb29,
> +};
> +
> +static const u32 a6xx_hlsq_2d_duplicate_cluster[] = {
> +	0xbd80, 0xbd80,
> +};
> +
> +static const u32 a6xx_sp_duplicate_cluster[] = {
> +	0xab00, 0xab00, 0xab04, 0xab05, 0xab10, 0xab1b, 0xab20, 0xab20,
> +};
> +
> +static const u32 a6xx_tp_duplicate_cluster[] = {
> +	0xb300, 0xb307, 0xb309, 0xb309, 0xb380, 0xb382,
> +};
> +
> +static const u32 a6xx_sp_ps_hlsq_cluster[] = {
> +	0xb980, 0xb980, 0xb982, 0xb987, 0xb990, 0xb99b, 0xb9a0, 0xb9a2,
> +	0xb9c0, 0xb9c9,
> +};
> +
> +static const u32 a6xx_sp_ps_hlsq_2d_cluster[] = {
> +	0xbd80, 0xbd80,
> +};
> +
> +static const u32 a6xx_sp_ps_sp_cluster[] = {
> +	0xa980, 0xa9a8, 0xa9b0, 0xa9bc, 0xa9d0, 0xa9d3, 0xa9e0, 0xa9f3,
> +	0xaa00, 0xaa00, 0xaa30, 0xaa31,
> +};
> +
> +static const u32 a6xx_sp_ps_sp_2d_cluster[] = {
> +	0xacc0, 0xacc0,
> +};
> +
> +static const u32 a6xx_sp_ps_tp_cluster[] = {
> +	0xb180, 0xb183, 0xb190, 0xb191,
> +};
> +
> +static const u32 a6xx_sp_ps_tp_2d_cluster[] = {
> +	0xb4c0, 0xb4d1,
> +};
> +
> +#define CLUSTER_DBGAHB(_id, _base, _type, _reg) \
> +	{ .name = #_id, .statetype = _type, .base = _base, \
> +		.registers = _reg, .count = ARRAY_SIZE(_reg) }
> +
> +static const struct a6xx_dbgahb_cluster {
> +	const char *name;
> +	u32 statetype;
> +	u32 base;
> +	const u32 *registers;
> +	size_t count;
> +} a6xx_dbgahb_clusters[] = {
> +	CLUSTER_DBGAHB(CLUSTER_SP_VS, 0x0002e000, 0x41, a6xx_sp_vs_hlsq_cluster),
> +	CLUSTER_DBGAHB(CLUSTER_SP_VS, 0x0002a000, 0x21, a6xx_sp_vs_sp_cluster),
> +	CLUSTER_DBGAHB(CLUSTER_SP_VS, 0x0002e000, 0x41, a6xx_hlsq_duplicate_cluster),
> +	CLUSTER_DBGAHB(CLUSTER_SP_VS, 0x0002f000, 0x45, a6xx_hlsq_2d_duplicate_cluster),
> +	CLUSTER_DBGAHB(CLUSTER_SP_VS, 0x0002a000, 0x21, a6xx_sp_duplicate_cluster),
> +	CLUSTER_DBGAHB(CLUSTER_SP_VS, 0x0002c000, 0x1, a6xx_tp_duplicate_cluster),
> +	CLUSTER_DBGAHB(CLUSTER_SP_PS, 0x0002e000, 0x42, a6xx_sp_ps_hlsq_cluster),
> +	CLUSTER_DBGAHB(CLUSTER_SP_PS, 0x0002f000, 0x46, a6xx_sp_ps_hlsq_2d_cluster),
> +	CLUSTER_DBGAHB(CLUSTER_SP_PS, 0x0002a000, 0x22, a6xx_sp_ps_sp_cluster),
> +	CLUSTER_DBGAHB(CLUSTER_SP_PS, 0x0002b000, 0x26, a6xx_sp_ps_sp_2d_cluster),
> +	CLUSTER_DBGAHB(CLUSTER_SP_PS, 0x0002c000, 0x2, a6xx_sp_ps_tp_cluster),
> +	CLUSTER_DBGAHB(CLUSTER_SP_PS, 0x0002d000, 0x6, a6xx_sp_ps_tp_2d_cluster),
> +	CLUSTER_DBGAHB(CLUSTER_SP_PS, 0x0002e000, 0x42, a6xx_hlsq_duplicate_cluster),
> +	CLUSTER_DBGAHB(CLUSTER_SP_PS, 0x0002a000, 0x22, a6xx_sp_duplicate_cluster),
> +	CLUSTER_DBGAHB(CLUSTER_SP_PS, 0x0002c000, 0x2, a6xx_tp_duplicate_cluster),
> +};
> +
> +static const u32 a6xx_hlsq_registers[] = {
> +	0xbe00, 0xbe01, 0xbe04, 0xbe05, 0xbe08, 0xbe09, 0xbe10, 0xbe15,
> +	0xbe20, 0xbe23,
> +};
> +
> +static const u32 a6xx_sp_registers[] = {
> +	0xae00, 0xae04, 0xae0c, 0xae0c, 0xae0f, 0xae2b, 0xae30, 0xae32,
> +	0xae35, 0xae35, 0xae3a, 0xae3f, 0xae50, 0xae52,
> +};
> +
> +static const u32 a6xx_tp_registers[] = {
> +	0xb600, 0xb601, 0xb604, 0xb605, 0xb610, 0xb61b, 0xb620, 0xb623,
> +};
> +
> +struct a6xx_registers {
> +	const u32 *registers;
> +	size_t count;
> +	u32 val0;
> +	u32 val1;
> +};
> +
> +#define HLSQ_DBG_REGS(_base, _type, _array) \
> +	{ .val0 = _base, .val1 = _type, .registers = _array, \
> +		.count = ARRAY_SIZE(_array), }
> +
> +static const struct a6xx_registers a6xx_hlsq_reglist[] = {
> +	HLSQ_DBG_REGS(0x0002F800, 0x40, a6xx_hlsq_registers),
> +	HLSQ_DBG_REGS(0x0002B800, 0x20, a6xx_sp_registers),
> +	HLSQ_DBG_REGS(0x0002D800, 0x0, a6xx_tp_registers),
> +};
> +
> +#define SHADER(_type, _size) \
> +	{ .type = _type, .name = #_type, .size = _size }
> +
> +static const struct a6xx_shader_block {
> +	const char *name;
> +	u32 type;
> +	u32 size;
> +} a6xx_shader_blocks[] = {
> +	SHADER(A6XX_TP0_TMO_DATA, 0x200),
> +	SHADER(A6XX_TP0_SMO_DATA, 0x80),
> +	SHADER(A6XX_TP0_MIPMAP_BASE_DATA, 0x3c0),
> +	SHADER(A6XX_TP1_TMO_DATA, 0x200),
> +	SHADER(A6XX_TP1_SMO_DATA, 0x80),
> +	SHADER(A6XX_TP1_MIPMAP_BASE_DATA, 0x3c0),
> +	SHADER(A6XX_SP_INST_DATA, 0x800),
> +	SHADER(A6XX_SP_LB_0_DATA, 0x800),
> +	SHADER(A6XX_SP_LB_1_DATA, 0x800),
> +	SHADER(A6XX_SP_LB_2_DATA, 0x800),
> +	SHADER(A6XX_SP_LB_3_DATA, 0x800),
> +	SHADER(A6XX_SP_LB_4_DATA, 0x800),
> +	SHADER(A6XX_SP_LB_5_DATA, 0x200),
> +	SHADER(A6XX_SP_CB_BINDLESS_DATA, 0x2000),
> +	SHADER(A6XX_SP_CB_LEGACY_DATA, 0x280),
> +	SHADER(A6XX_SP_UAV_DATA, 0x80),
> +	SHADER(A6XX_SP_INST_TAG, 0x80),
> +	SHADER(A6XX_SP_CB_BINDLESS_TAG, 0x80),
> +	SHADER(A6XX_SP_TMO_UMO_TAG, 0x80),
> +	SHADER(A6XX_SP_SMO_TAG, 0x80),
> +	SHADER(A6XX_SP_STATE_DATA, 0x3f),
> +	SHADER(A6XX_HLSQ_CHUNK_CVS_RAM, 0x1c0),
> +	SHADER(A6XX_HLSQ_CHUNK_CPS_RAM, 0x280),
> +	SHADER(A6XX_HLSQ_CHUNK_CVS_RAM_TAG, 0x40),
> +	SHADER(A6XX_HLSQ_CHUNK_CPS_RAM_TAG, 0x40),
> +	SHADER(A6XX_HLSQ_ICB_CVS_CB_BASE_TAG, 0x4),
> +	SHADER(A6XX_HLSQ_ICB_CPS_CB_BASE_TAG, 0x4),
> +	SHADER(A6XX_HLSQ_CVS_MISC_RAM, 0x1c0),
> +	SHADER(A6XX_HLSQ_CPS_MISC_RAM, 0x580),
> +	SHADER(A6XX_HLSQ_INST_RAM, 0x800),
> +	SHADER(A6XX_HLSQ_GFX_CVS_CONST_RAM, 0x800),
> +	SHADER(A6XX_HLSQ_GFX_CPS_CONST_RAM, 0x800),
> +	SHADER(A6XX_HLSQ_CVS_MISC_RAM_TAG, 0x8),
> +	SHADER(A6XX_HLSQ_CPS_MISC_RAM_TAG, 0x4),
> +	SHADER(A6XX_HLSQ_INST_RAM_TAG, 0x80),
> +	SHADER(A6XX_HLSQ_GFX_CVS_CONST_RAM_TAG, 0xc),
> +	SHADER(A6XX_HLSQ_GFX_CPS_CONST_RAM_TAG, 0x10),
> +	SHADER(A6XX_HLSQ_PWR_REST_RAM, 0x28),
> +	SHADER(A6XX_HLSQ_PWR_REST_TAG, 0x14),
> +	SHADER(A6XX_HLSQ_DATAPATH_META, 0x40),
> +	SHADER(A6XX_HLSQ_FRONTEND_META, 0x40),
> +	SHADER(A6XX_HLSQ_INDIRECT_META, 0x40),
> +};
> +
> +static const u32 a6xx_rb_rac_registers[] = {
> +	0x8e04, 0x8e05, 0x8e07, 0x8e08, 0x8e10, 0x8e1c, 0x8e20, 0x8e25,
> +	0x8e28, 0x8e28, 0x8e2c, 0x8e2f, 0x8e50, 0x8e52,
> +};
> +
> +static const u32 a6xx_rb_rbp_registers[] = {
> +	0x8e01, 0x8e01, 0x8e0c, 0x8e0c, 0x8e3b, 0x8e3e, 0x8e40, 0x8e43,
> +	0x8e53, 0x8e5f, 0x8e70, 0x8e77,
> +};
> +
> +static const u32 a6xx_registers[] = {
> +	/* RBBM */
> +	0x0000, 0x0002, 0x0010, 0x0010, 0x0012, 0x0012, 0x0018, 0x001b,
> +	0x001e, 0x0032, 0x0038, 0x003c, 0x0042, 0x0042, 0x0044, 0x0044,
> +	0x0047, 0x0047, 0x0056, 0x0056, 0x00ad, 0x00ae, 0x00b0, 0x00fb,
> +	0x0100, 0x011d, 0x0200, 0x020d, 0x0218, 0x023d, 0x0400, 0x04f9,
> +	0x0500, 0x0500, 0x0505, 0x050b, 0x050e, 0x0511, 0x0533, 0x0533,
> +	0x0540, 0x0555,
> +	/* CP */
> +	0x0800, 0x0808, 0x0810, 0x0813, 0x0820, 0x0821, 0x0823, 0x0824,
> +	0x0826, 0x0827, 0x0830, 0x0833, 0x0840, 0x0843, 0x084f, 0x086f,
> +	0x0880, 0x088a, 0x08a0, 0x08ab, 0x08c0, 0x08c4, 0x08d0, 0x08dd,
> +	0x08f0, 0x08f3, 0x0900, 0x0903, 0x0908, 0x0911, 0x0928, 0x093e,
> +	0x0942, 0x094d, 0x0980, 0x0984, 0x098d, 0x0996, 0x0998, 0x099e,
> +	0x09a0, 0x09a6, 0x09a8, 0x09ae, 0x09b0, 0x09b1, 0x09c2, 0x09c8,
> +	0x0a00, 0x0a03,
> +	/* VSC */
> +	0x0c00, 0x0c04, 0x0c06, 0x0c06, 0x0c10, 0x0cd9, 0x0e00, 0x0e0e,
> +	/* UCHE */
> +	0x0e10, 0x0e13, 0x0e17, 0x0e19, 0x0e1c, 0x0e2b, 0x0e30, 0x0e32,
> +	0x0e38, 0x0e39,
> +	/* GRAS */
> +	0x8600, 0x8601, 0x8610, 0x861b, 0x8620, 0x8620, 0x8628, 0x862b,
> +	0x8630, 0x8637,
> +	/* VPC */
> +	0x9600, 0x9604, 0x9624, 0x9637,
> +	/* PC */
> +	0x9e00, 0x9e01, 0x9e03, 0x9e0e, 0x9e11, 0x9e16, 0x9e19, 0x9e19,
> +	0x9e1c, 0x9e1c, 0x9e20, 0x9e23, 0x9e30, 0x9e31, 0x9e34, 0x9e34,
> +	0x9e70, 0x9e72, 0x9e78, 0x9e79, 0x9e80, 0x9fff,
> +	/* VFD */
> +	0xa600, 0xa601, 0xa603, 0xa603, 0xa60a, 0xa60a, 0xa610, 0xa617,
> +	0xa630, 0xa630,
> +};
> +
> +#define REGS(_array, _sel_reg, _sel_val) \
> +	{ .registers = _array, .count = ARRAY_SIZE(_array), \
> +		.val0 = _sel_reg, .val1 = _sel_val }
> +
> +static const struct a6xx_registers a6xx_reglist[] = {
> +	REGS(a6xx_registers, 0, 0),
> +	REGS(a6xx_rb_rac_registers, REG_A6XX_RB_RB_SUB_BLOCK_SEL_CNTL_CD, 0),
> +	REGS(a6xx_rb_rbp_registers, REG_A6XX_RB_RB_SUB_BLOCK_SEL_CNTL_CD, 9),
> +};
> +
> +static const u32 a6xx_ahb_registers[] = {
> +	/* RBBM_STATUS - RBBM_STATUS3 */
> +	0x210, 0x213,
> +	/* CP_STATUS_1 */
> +	0x825, 0x825,
> +};
> +
> +static const u32 a6xx_vbif_registers[] = {
> +	0x3000, 0x3007, 0x300c, 0x3014, 0x3018, 0x302d, 0x3030, 0x3031,
> +	0x3034, 0x3036, 0x303c, 0x303d, 0x3040, 0x3040, 0x3042, 0x3042,
> +	0x3049, 0x3049, 0x3058, 0x3058, 0x305a, 0x3061, 0x3064, 0x3068,
> +	0x306c, 0x306d, 0x3080, 0x3088, 0x308b, 0x308c, 0x3090, 0x3094,
> +	0x3098, 0x3098, 0x309c, 0x309c, 0x30c0, 0x30c0, 0x30c8, 0x30c8,
> +	0x30d0, 0x30d0, 0x30d8, 0x30d8, 0x30e0, 0x30e0, 0x3100, 0x3100,
> +	0x3108, 0x3108, 0x3110, 0x3110, 0x3118, 0x3118, 0x3120, 0x3120,
> +	0x3124, 0x3125, 0x3129, 0x3129, 0x3131, 0x3131, 0x3154, 0x3154,
> +	0x3156, 0x3156, 0x3158, 0x3158, 0x315a, 0x315a, 0x315c, 0x315c,
> +	0x315e, 0x315e, 0x3160, 0x3160, 0x3162, 0x3162, 0x340c, 0x340c,
> +	0x3410, 0x3410, 0x3800, 0x3801,
> +};
> +
> +static const struct a6xx_registers a6xx_ahb_reglist[] = {
> +	REGS(a6xx_ahb_registers, 0, 0),
> +	REGS(a6xx_vbif_registers, 0, 0),
> +};
> +
> +static const u32 a6xx_gmu_gx_registers[] = {
> +	/* GMU GX */
> +	0x0000, 0x0000, 0x0010, 0x0013, 0x0016, 0x0016, 0x0018, 0x001b,
> +	0x001e, 0x001e, 0x0020, 0x0023, 0x0026, 0x0026, 0x0028, 0x002b,
> +	0x002e, 0x002e, 0x0030, 0x0033, 0x0036, 0x0036, 0x0038, 0x003b,
> +	0x003e, 0x003e, 0x0040, 0x0043, 0x0046, 0x0046, 0x0080, 0x0084,
> +	0x0100, 0x012b, 0x0140, 0x0140,
> +};
> +
> +static const u32 a6xx_gmu_cx_registers[] = {
> +	/* GMU CX */
> +	0x4c00, 0x4c07, 0x4c10, 0x4c12, 0x4d00, 0x4d00, 0x4d07, 0x4d0a,
> +	0x5000, 0x5004, 0x5007, 0x5008, 0x500b, 0x500c, 0x500f, 0x501c,
> +	0x5024, 0x502a, 0x502d, 0x5030, 0x5040, 0x5053, 0x5087, 0x5089,
> +	0x50a0, 0x50a2, 0x50a4, 0x50af, 0x50c0, 0x50c3, 0x50d0, 0x50d0,
> +	0x50e4, 0x50e4, 0x50e8, 0x50ec, 0x5100, 0x5103, 0x5140, 0x5140,
> +	0x5142, 0x5144, 0x514c, 0x514d, 0x514f, 0x5151, 0x5154, 0x5154,
> +	0x5157, 0x5158, 0x515d, 0x515d, 0x5162, 0x5162, 0x5164, 0x5165,
> +	0x5180, 0x5186, 0x5190, 0x519e, 0x51c0, 0x51c0, 0x51c5, 0x51cc,
> +	0x51e0, 0x51e2, 0x51f0, 0x51f0, 0x5200, 0x5201,
> +	/* GPU RSCC */
> +	0x8c8c, 0x8c8c, 0x8d01, 0x8d02, 0x8f40, 0x8f42, 0x8f44, 0x8f47,
> +	0x8f4c, 0x8f87, 0x8fec, 0x8fef, 0x8ff4, 0x902f, 0x9094, 0x9097,
> +	0x909c, 0x90d7, 0x913c, 0x913f, 0x9144, 0x917f,
> +	/* GMU AO */
> +	0x9300, 0x9316, 0x9400, 0x9400,
> +	/* GPU CC */
> +	0x9800, 0x9812, 0x9840, 0x9852, 0x9c00, 0x9c04, 0x9c07, 0x9c0b,
> +	0x9c15, 0x9c1c, 0x9c1e, 0x9c2d, 0x9c3c, 0x9c3d, 0x9c3f, 0x9c40,
> +	0x9c42, 0x9c49, 0x9c58, 0x9c5a, 0x9d40, 0x9d5e, 0xa000, 0xa002,
> +	0xa400, 0xa402, 0xac00, 0xac02, 0xb000, 0xb002, 0xb400, 0xb402,
> +	0xb800, 0xb802,
> +	/* GPU CC ACD */
> +	0xbc00, 0xbc16, 0xbc20, 0xbc27,
> +};
> +
> +static const struct a6xx_registers a6xx_gmu_reglist[] = {
> +	REGS(a6xx_gmu_cx_registers, 0, 0),
> +	REGS(a6xx_gmu_gx_registers, 0, 0),
> +};
> +
> +static const struct a6xx_indexed_registers {
> +	const char *name;
> +	u32 addr;
> +	u32 data;
> +	u32 count;
> +} a6xx_indexed_reglist[] = {
> +	{ "CP_SEQ_STAT", REG_A6XX_CP_SQE_STAT_ADDR,
> +		REG_A6XX_CP_SQE_STAT_DATA, 0x33 },
> +	{ "CP_DRAW_STATE", REG_A6XX_CP_DRAW_STATE_ADDR,
> +		REG_A6XX_CP_DRAW_STATE_DATA, 0x100 },
> +	{ "CP_UCODE_DBG_DATA", REG_A6XX_CP_SQE_UCODE_DBG_ADDR,
> +		REG_A6XX_CP_SQE_UCODE_DBG_DATA, 0x6000 },
> +	{ "CP_ROQ", REG_A6XX_CP_ROQ_DBG_ADDR,
> +		REG_A6XX_CP_ROQ_DBG_DATA, 0x400 },
> +};
> +
> +static const struct a6xx_indexed_registers a6xx_cp_mempool_indexed = {
> +	"CP_MEMPOOOL", REG_A6XX_CP_MEM_POOL_DBG_ADDR,
> +		REG_A6XX_CP_MEM_POOL_DBG_DATA, 0x2060,
> +};
> +
> +#define DEBUGBUS(_id, _count) { .id = _id, .name = #_id, .count = _count }
> +
> +static const struct a6xx_debugbus_block {
> +	const char *name;
> +	u32 id;
> +	u32 count;
> +} a6xx_debugbus_blocks[] = {
> +	DEBUGBUS(A6XX_DBGBUS_CP, 0x100),
> +	DEBUGBUS(A6XX_DBGBUS_RBBM, 0x100),
> +	DEBUGBUS(A6XX_DBGBUS_HLSQ, 0x100),
> +	DEBUGBUS(A6XX_DBGBUS_UCHE, 0x100),
> +	DEBUGBUS(A6XX_DBGBUS_DPM, 0x100),
> +	DEBUGBUS(A6XX_DBGBUS_TESS, 0x100),
> +	DEBUGBUS(A6XX_DBGBUS_PC, 0x100),
> +	DEBUGBUS(A6XX_DBGBUS_VFDP, 0x100),
> +	DEBUGBUS(A6XX_DBGBUS_VPC, 0x100),
> +	DEBUGBUS(A6XX_DBGBUS_TSE, 0x100),
> +	DEBUGBUS(A6XX_DBGBUS_RAS, 0x100),
> +	DEBUGBUS(A6XX_DBGBUS_VSC, 0x100),
> +	DEBUGBUS(A6XX_DBGBUS_COM, 0x100),
> +	DEBUGBUS(A6XX_DBGBUS_LRZ, 0x100),
> +	DEBUGBUS(A6XX_DBGBUS_A2D, 0x100),
> +	DEBUGBUS(A6XX_DBGBUS_CCUFCHE, 0x100),
> +	DEBUGBUS(A6XX_DBGBUS_RBP, 0x100),
> +	DEBUGBUS(A6XX_DBGBUS_DCS, 0x100),
> +	DEBUGBUS(A6XX_DBGBUS_DBGC, 0x100),
> +	DEBUGBUS(A6XX_DBGBUS_GMU_GX, 0x100),
> +	DEBUGBUS(A6XX_DBGBUS_TPFCHE, 0x100),
> +	DEBUGBUS(A6XX_DBGBUS_GPC, 0x100),
> +	DEBUGBUS(A6XX_DBGBUS_LARC, 0x100),
> +	DEBUGBUS(A6XX_DBGBUS_HLSQ_SPTP, 0x100),
> +	DEBUGBUS(A6XX_DBGBUS_RB_0, 0x100),
> +	DEBUGBUS(A6XX_DBGBUS_RB_1, 0x100),
> +	DEBUGBUS(A6XX_DBGBUS_UCHE_WRAPPER, 0x100),
> +	DEBUGBUS(A6XX_DBGBUS_CCU_0, 0x100),
> +	DEBUGBUS(A6XX_DBGBUS_CCU_1, 0x100),
> +	DEBUGBUS(A6XX_DBGBUS_VFD_0, 0x100),
> +	DEBUGBUS(A6XX_DBGBUS_VFD_1, 0x100),
> +	DEBUGBUS(A6XX_DBGBUS_VFD_2, 0x100),
> +	DEBUGBUS(A6XX_DBGBUS_VFD_3, 0x100),
> +	DEBUGBUS(A6XX_DBGBUS_SP_0, 0x100),
> +	DEBUGBUS(A6XX_DBGBUS_SP_1, 0x100),
> +	DEBUGBUS(A6XX_DBGBUS_TPL1_0, 0x100),
> +	DEBUGBUS(A6XX_DBGBUS_TPL1_1, 0x100),
> +	DEBUGBUS(A6XX_DBGBUS_TPL1_2, 0x100),
> +	DEBUGBUS(A6XX_DBGBUS_TPL1_3, 0x100),
> +};
> +
> +static const struct a6xx_debugbus_block a6xx_cx_debugbus_blocks[] = {
> +	DEBUGBUS(A6XX_DBGBUS_GMU_CX, 0x100),
> +	DEBUGBUS(A6XX_DBGBUS_CX, 0x100),
> +};
> +
> +#endif
> 

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
Linux Foundation Collaborative Project