[Nouveau] [PATCH] pmu/gk20a: PMU boot support.
Ilia Mirkin
imirkin at alum.mit.edu
Wed Mar 11 10:10:49 PDT 2015
Hi Deepak,
There's... a lot of stuff going on here. Can you describe the goal of
this patch (which could then be used as the patch commit message)? The
current one basically boils down to "Add support for loading PMU", but
merely loading the fw into a fuc engine is just a handful lines of
code. Also, except in rare cases, it's customary to split up patches
of this size into smaller, more reviewable chunks, which add on bits
of functionality as they go.
>From what I can tell, you're adding the kernel-side interface for a
hypothetical (and presumably closed-source) PMU blob that NVIDIA will
supply. In essence, the blob is expected to implement a RTOS which
runs on the PMU's falcon CPU. There are a bunch of API's implemented
by this blob that the host can call, but it also does things on its
own. For the kernel side, each of these API calls should probably be a
separate patch (after an initial "just load it and do nothing" style
patch). Or perhaps have the infrastructure that you add first and then
something that implements the API calls.
Cheers,
-ilia
On Wed, Mar 11, 2015 at 2:33 AM, Deepak Goyal <dgoyal at nvidia.com> wrote:
> It adds PMU boot support.It loads PMU
> firmware into PMU falcon.RM/Kernel driver
> receives INIT ack (through interrupt mechanism)
> from PMU when PMU boots with success.
>
> Signed-off-by: Deepak Goyal <dgoyal at nvidia.com>
> ---
> drm/nouveau/include/nvkm/subdev/pmu.h | 26 +-
> drm/nouveau/nvkm/subdev/pmu/base.c | 108 ++
> drm/nouveau/nvkm/subdev/pmu/gk20a.c | 2131 ++++++++++++++++++++++++++++++++-
> drm/nouveau/nvkm/subdev/pmu/gk20a.h | 369 ++++++
> drm/nouveau/nvkm/subdev/pmu/priv.h | 264 ++++
> 5 files changed, 2884 insertions(+), 14 deletions(-)
> create mode 100644 drm/nouveau/nvkm/subdev/pmu/gk20a.h
>
> diff --git a/drm/nouveau/include/nvkm/subdev/pmu.h b/drm/nouveau/include/nvkm/subdev/pmu.h
> index 7b86acc634a0..659b4e0ba02b 100644
> --- a/drm/nouveau/include/nvkm/subdev/pmu.h
> +++ b/drm/nouveau/include/nvkm/subdev/pmu.h
> @@ -1,7 +1,20 @@
> #ifndef __NVKM_PMU_H__
> #define __NVKM_PMU_H__
> #include <core/subdev.h>
> +#include <core/device.h>
> +#include <subdev/mmu.h>
> +#include <linux/debugfs.h>
>
> +struct pmu_buf_desc {
> + struct nvkm_gpuobj *pmubufobj;
> + struct nvkm_vma pmubufvma;
> + size_t size;
> +};
> +struct pmu_priv_vm {
> + struct nvkm_gpuobj *mem;
> + struct nvkm_gpuobj *pgd;
> + struct nvkm_vm *vm;
> +};
> struct nvkm_pmu {
> struct nvkm_subdev base;
>
> @@ -20,9 +33,20 @@ struct nvkm_pmu {
> u32 message;
> u32 data[2];
> } recv;
> -
> + wait_queue_head_t init_wq;
> + bool gr_initialised;
> + struct dentry *debugfs;
> + struct pmu_buf_desc *pg_buf;
> + struct pmu_priv_vm *pmuvm;
> int (*message)(struct nvkm_pmu *, u32[2], u32, u32, u32, u32);
> void (*pgob)(struct nvkm_pmu *, bool);
> + int (*pmu_mutex_acquire)(struct nvkm_pmu *, u32 id, u32 *token);
> + int (*pmu_mutex_release)(struct nvkm_pmu *, u32 id, u32 *token);
> + int (*pmu_load_norm)(struct nvkm_pmu *pmu, u32 *load);
> + int (*pmu_load_update)(struct nvkm_pmu *pmu);
> + void (*pmu_reset_load_counters)(struct nvkm_pmu *pmu);
> + void (*pmu_get_load_counters)(struct nvkm_pmu *pmu, u32 *busy_cycles,
> + u32 *total_cycles);
> };
>
> static inline struct nvkm_pmu *
> diff --git a/drm/nouveau/nvkm/subdev/pmu/base.c b/drm/nouveau/nvkm/subdev/pmu/base.c
> index 054b2d2eec35..6afd389b9764 100644
> --- a/drm/nouveau/nvkm/subdev/pmu/base.c
> +++ b/drm/nouveau/nvkm/subdev/pmu/base.c
> @@ -25,6 +25,114 @@
>
> #include <subdev/timer.h>
>
> +/* init allocator struct */
> +int nvkm_pmu_allocator_init(struct nvkm_pmu_allocator *allocator,
> + const char *name, u32 start, u32 len)
> +{
> + memset(allocator, 0, sizeof(struct nvkm_pmu_allocator));
> +
> + strncpy(allocator->name, name, 32);
> +
> + allocator->base = start;
> + allocator->limit = start + len - 1;
> +
> + allocator->bitmap = kcalloc(BITS_TO_LONGS(len), sizeof(long),
> + GFP_KERNEL);
> + if (!allocator->bitmap)
> + return -ENOMEM;
> +
> + allocator_dbg(allocator, "%s : base %d, limit %d",
> + allocator->name, allocator->base);
> +
> + init_rwsem(&allocator->rw_sema);
> +
> + allocator->alloc = nvkm_pmu_allocator_block_alloc;
> + allocator->free = nvkm_pmu_allocator_block_free;
> +
> + return 0;
> +}
> +
> +/* destroy allocator, free all remaining blocks if any */
> +void nvkm_pmu_allocator_destroy(struct nvkm_pmu_allocator *allocator)
> +{
> + down_write(&allocator->rw_sema);
> +
> + kfree(allocator->bitmap);
> +
> + memset(allocator, 0, sizeof(struct nvkm_pmu_allocator));
> +}
> +
> +/*
> + * *addr != ~0 for fixed address allocation. if *addr == 0, base addr is
> + * returned to caller in *addr.
> + *
> + * contiguous allocation, which allocates one block of
> + * contiguous address.
> +*/
> +int nvkm_pmu_allocator_block_alloc(struct nvkm_pmu_allocator *allocator,
> + u32 *addr, u32 len, u32 align)
> +{
> + unsigned long _addr;
> +
> + allocator_dbg(allocator, "[in] addr %d, len %d", *addr, len);
> +
> + if ((*addr != 0 && *addr < allocator->base) || /* check addr range */
> + *addr + len > allocator->limit || /* check addr range */
> + *addr & (align - 1) || /* check addr alignment */
> + len == 0) /* check len */
> + return -EINVAL;
> +
> + len = ALIGN(len, align);
> + if (!len)
> + return -ENOMEM;
> +
> + down_write(&allocator->rw_sema);
> +
> + _addr = bitmap_find_next_zero_area(allocator->bitmap,
> + allocator->limit - allocator->base + 1,
> + *addr ? (*addr - allocator->base) : 0,
> + len,
> + align - 1);
> + if ((_addr > allocator->limit - allocator->base + 1) ||
> + (*addr && *addr != (_addr + allocator->base))) {
> + up_write(&allocator->rw_sema);
> + return -ENOMEM;
> + }
> +
> + bitmap_set(allocator->bitmap, _addr, len);
> + *addr = allocator->base + _addr;
> +
> + up_write(&allocator->rw_sema);
> +
> + allocator_dbg(allocator, "[out] addr %d, len %d", *addr, len);
> +
> + return 0;
> +}
> +
> +/* free all blocks between start and end */
> +int nvkm_pmu_allocator_block_free(struct nvkm_pmu_allocator *allocator,
> + u32 addr, u32 len, u32 align)
> +{
> + allocator_dbg(allocator, "[in] addr %d, len %d", addr, len);
> +
> + if (addr + len > allocator->limit || /* check addr range */
> + addr < allocator->base ||
> + addr & (align - 1)) /* check addr alignment */
> + return -EINVAL;
> +
> + len = ALIGN(len, align);
> + if (!len)
> + return -EINVAL;
> +
> + down_write(&allocator->rw_sema);
> + bitmap_clear(allocator->bitmap, addr - allocator->base, len);
> + up_write(&allocator->rw_sema);
> +
> + allocator_dbg(allocator, "[out] addr %d, len %d", addr, len);
> +
> + return 0;
> +}
> +
> void
> nvkm_pmu_pgob(struct nvkm_pmu *pmu, bool enable)
> {
> diff --git a/drm/nouveau/nvkm/subdev/pmu/gk20a.c b/drm/nouveau/nvkm/subdev/pmu/gk20a.c
> index a49934bbe637..0fd2530301a3 100644
> --- a/drm/nouveau/nvkm/subdev/pmu/gk20a.c
> +++ b/drm/nouveau/nvkm/subdev/pmu/gk20a.c
> @@ -20,21 +20,67 @@
> * DEALINGS IN THE SOFTWARE.
> */
> #include "priv.h"
> +#include "gk20a.h"
> +#include <core/client.h>
> +#include <core/gpuobj.h>
> +#include <subdev/bar.h>
> +#include <subdev/fb.h>
> +#include <subdev/mc.h>
> +#include <subdev/timer.h>
> +#include <subdev/mmu.h>
> +#include <subdev/pmu.h>
> +#include <engine/falcon.h>
>
> +#include <linux/delay.h> /* for mdelay */
> +#include <linux/firmware.h>
> +#include <linux/clk.h>
> +#include <linux/module.h>
> +#include <linux/debugfs.h>
> +#include <linux/dma-mapping.h>
> +#include <linux/uaccess.h>
> #include <subdev/clk.h>
> #include <subdev/timer.h>
> #include <subdev/volt.h>
>
> #define BUSY_SLOT 0
> #define CLK_SLOT 7
> +#define GK20A_PMU_UCODE_IMAGE "gpmu_ucode.bin"
> +
> +static int falc_trace_show(struct seq_file *s, void *data);
> +static int falc_trace_open(struct inode *inode, struct file *file)
> +{
> + return single_open(file, falc_trace_show, inode->i_private);
> +}
> +static const struct file_operations falc_trace_fops = {
> + .open = falc_trace_open,
> + .read = seq_read,
> + .llseek = seq_lseek,
> + .release = single_release,
> +};
> +struct pmu_priv_vm pmuvm;
> +const struct firmware *pmufw;
> +
> +static void gk20a_pmu_isr(struct nvkm_pmu *ppmu);
> +static void pmu_process_message(struct work_struct *work);
> +
> +static int
> +gk20a_pmu_init_vm(struct nvkm_pmu *ppmu, const struct firmware *fw);
> +static void
> +gk20a_pmu_dump_firmware_info(struct nvkm_pmu *ppmu, const struct firmware *fw);
> +
> +static int
> +gk20a_pmu_load_firmware(struct nvkm_pmu *ppmu, const struct firmware **pfw);
> +static int gk20a_init_pmu_setup_sw(struct nvkm_pmu *ppmu);
> +static int gk20a_init_pmu_setup_hw1(struct nvkm_pmu *ppmu, struct nvkm_mc *pmc);
> +static void gk20a_pmu_intr(struct nvkm_subdev *subdev);
>
> +static void gk20a_pmu_pgob(struct nvkm_pmu *ppmu, bool enable);
> struct gk20a_pmu_dvfs_data {
> int p_load_target;
> int p_load_max;
> int p_smooth;
> unsigned int avg_load;
> };
> -
> struct gk20a_pmu_priv {
> struct nvkm_pmu base;
> struct nvkm_alarm alarm;
> @@ -46,7 +92,30 @@ struct gk20a_pmu_dvfs_dev_status {
> unsigned long busy;
> int cur_state;
> };
> -
> +int gk20a_pmu_debugfs_init(struct nvkm_pmu *ppmu)
> +{
> + struct dentry *d;
> + ppmu->debugfs = debugfs_create_dir("PMU", NULL);
> + if (!ppmu->debugfs)
> + goto err_out;
> + nv_debug(ppmu, "PMU directory created with success\n");
> + d = debugfs_create_file(
> + "falc_trace", 0644, ppmu->debugfs, ppmu,
> + &falc_trace_fops);
> + if (!d)
> + goto err_out;
> + return 0;
> +err_out:
> + pr_err("%s: Failed to make debugfs node\n", __func__);
> + debugfs_remove_recursive(ppmu->debugfs);
> + return -ENOMEM;
> +}
> +void gk20a_pmu_release_firmware(struct nvkm_pmu *ppmu,
> + const struct firmware *pfw)
> +{
> + nv_debug(ppmu, "firmware released\n");
> + release_firmware(pfw);
> +}
> static int
> gk20a_pmu_dvfs_target(struct gk20a_pmu_priv *priv, int *state)
> {
> @@ -164,31 +233,145 @@ gk20a_pmu_fini(struct nvkm_object *object, bool suspend)
> {
> struct nvkm_pmu *pmu = (void *)object;
> struct gk20a_pmu_priv *priv = (void *)pmu;
> -
> + nv_wr32(pmu, 0x10a014, 0x00000060);
> + flush_work(&pmu->recv.work);
> nvkm_timer_alarm_cancel(priv, &priv->alarm);
>
> return nvkm_subdev_fini(&pmu->base, suspend);
> }
> +static bool find_hex_in_string(char *strings, u32 *hex_pos)
> +{
> + u32 i = 0, j = strlen(strings);
> + for (; i < j; i++) {
> + if (strings[i] == '%')
> + if (strings[i + 1] == 'x' || strings[i + 1] == 'X') {
> + *hex_pos = i;
> + return true;
> + }
> + }
> + *hex_pos = -1;
> + return false;
> +}
> +static int falc_trace_show(struct seq_file *s, void *data)
> +{
> + struct nvkm_pmu *ppmu = s->private;
> + struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu);
> + struct pmu_desc *pmu = &impl->pmudata;
> + u32 i = 0, j = 0, k, l, m;
> + char part_str[40];
> + u32 data1;
> + char *log_data = kmalloc(GK20A_PMU_TRACE_BUFSIZE, GFP_KERNEL);
> + char *trace = log_data;
> + u32 *trace1 = (u32 *)log_data;
> + for (i = 0; i < GK20A_PMU_TRACE_BUFSIZE; i += 4) {
> + data1 = nv_ro32(pmu->trace_buf.pmubufobj, 0x0000 + i);
> + memcpy(log_data + i, (void *)(&data1), 32);
> + }
> + for (i = 0; i < GK20A_PMU_TRACE_BUFSIZE; i += 0x40) {
> + for (j = 0; j < 0x40; j++)
> + if (trace1[(i / 4) + j])
> + break;
> + if (j == 0x40)
> + goto out;
> + seq_printf(s, "Index %x: ", trace1[(i / 4)]);
> + l = 0;
> + m = 0;
> + while (find_hex_in_string((trace+i+20+m), &k)) {
> + if (k >= 40)
> + break;
> + strncpy(part_str, (trace+i+20+m), k);
> + part_str[k] = 0;
> + seq_printf(s, "%s0x%x", part_str,
> + trace1[(i / 4) + 1 + l]);
> + l++;
> + m += k + 2;
> + }
> + seq_printf(s, "%s", (trace+i+20+m));
> + }
> +out:
> + kfree(log_data);
> + return 0;
> +}
>
> int
> gk20a_pmu_init(struct nvkm_object *object)
> {
> - struct nvkm_pmu *pmu = (void *)object;
> - struct gk20a_pmu_priv *priv = (void *)pmu;
> + struct nvkm_pmu *ppmu = (void *)object;
> + struct nvkm_mc *pmc = nvkm_mc(object);
> + struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu);
> + struct pmu_desc *pmu;
> + struct gk20a_pmu_priv *priv;
> + struct pmu_gk20a_data *gk20adata;
> int ret;
>
> - ret = nvkm_subdev_init(&pmu->base);
> + pmu = &impl->pmudata;
> +
> + nv_subdev(ppmu)->intr = gk20a_pmu_intr;
> +
> + mutex_init(&pmu->isr_mutex);
> + mutex_init(&pmu->pmu_copy_lock);
> + mutex_init(&pmu->pmu_seq_lock);
> +
> + if (pmufw == NULL) {
> + ret = gk20a_pmu_load_firmware(ppmu, &pmufw);
> + if (ret < 0) {
> + nv_error(ppmu, "failed to load pmu fimware\n");
> + return ret;
> + }
> + nv_debug(ppmu, "loading firmware sucessful\n");
> + ret = gk20a_pmu_init_vm(ppmu, pmufw);
> + if (ret < 0) {
> + nv_error(ppmu, "failed to map pmu fw to va space\n");
> + goto init_vm_err;
> + }
> + }
> + pmu->desc = (struct pmu_ucode_desc *)pmufw->data;
> + gk20a_pmu_dump_firmware_info(ppmu, pmufw);
> +
> + if (pmu->desc->app_version != APP_VERSION_GK20A) {
> + nv_error(ppmu,
> + "PMU code version not supported version: %d\n",
> + pmu->desc->app_version);
> + ret = -EINVAL;
> + goto app_ver_err;
> + }
> + gk20adata = kzalloc(sizeof(*gk20adata), GFP_KERNEL);
> + if (!gk20adata) {
> + ret = -ENOMEM;
> + goto err;
> + }
> +
> + pmu->pmu_chip_data = (void *)gk20adata;
> +
> + pmu->remove_support = gk20a_remove_pmu_support;
> +
> + ret = gk20a_init_pmu_setup_sw(ppmu);
> if (ret)
> - return ret;
> + goto err;
> +
> + pmu->pmu_state = PMU_STATE_STARTING;
> + ret = gk20a_init_pmu_setup_hw1(ppmu, pmc);
> + if (ret)
> + goto err;
> +
> + priv = (void *)ppmu;
>
> - pmu->pgob = nvkm_pmu_pgob;
> + ret = nvkm_subdev_init(&ppmu->base);
> + if (ret)
> + goto err;
> +
> + ppmu->pgob = nvkm_pmu_pgob;
>
> - /* init pwr perf counter */
> - nv_wr32(pmu, 0x10a504 + (BUSY_SLOT * 0x10), 0x00200001);
> - nv_wr32(pmu, 0x10a50c + (BUSY_SLOT * 0x10), 0x00000002);
> - nv_wr32(pmu, 0x10a50c + (CLK_SLOT * 0x10), 0x00000003);
> + /* init pmu perf counter */
> + nv_wr32(ppmu, 0x10a504 + (BUSY_SLOT * 0x10), 0x00200001);
> + nv_wr32(ppmu, 0x10a50c + (BUSY_SLOT * 0x10), 0x00000002);
> + nv_wr32(ppmu, 0x10a50c + (CLK_SLOT * 0x10), 0x00000003);
>
> - nvkm_timer_alarm(pmu, 2000000000, &priv->alarm);
> + nvkm_timer_alarm(ppmu, 2000000000, &priv->alarm);
> +err:
> +init_vm_err:
> +app_ver_err:
> + gk20a_pmu_release_firmware(ppmu, pmufw);
> return ret;
> }
>
> @@ -226,4 +409,1926 @@ gk20a_pmu_oclass = &(struct nvkm_pmu_impl) {
> .init = gk20a_pmu_init,
> .fini = gk20a_pmu_fini,
> },
> + .base.handle = NV_SUBDEV(PMU, 0xea),
> + .pgob = gk20a_pmu_pgob,
> }.base;
> +void pmu_copy_from_dmem(struct pmu_desc *pmu,
> + u32 src, u8 *dst, u32 size, u8 port)
> +{
> + u32 i, words, bytes;
> + u32 data, addr_mask;
> + u32 *dst_u32 = (u32 *)dst;
> + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> + impl_from_pmu(pmu));
> +
> + if (size == 0) {
> + nv_error(ppmu, "size is zero\n");
> + goto out;
> + }
> +
> + if (src & 0x3) {
> + nv_error(ppmu, "src (0x%08x) not 4-byte aligned\n", src);
> + goto out;
> + }
> +
> + mutex_lock(&pmu->pmu_copy_lock);
> +
> + words = size >> 2;
> + bytes = size & 0x3;
> +
> + addr_mask = (0x3f << 2) | 0xff << 8;
> +
> + src &= addr_mask;
> +
> + nv_wr32(ppmu, (0x10a1c0 + (port * 8)), (src | (0x1 << 25)));
> +
> + for (i = 0; i < words; i++) {
> + dst_u32[i] = nv_rd32(ppmu, (0x0010a1c4 + port * 8));
> + nv_debug(ppmu, "0x%08x\n", dst_u32[i]);
> + }
> + if (bytes > 0) {
> + data = nv_rd32(ppmu, (0x0010a1c4 + port * 8));
> + nv_debug(ppmu, "0x%08x\n", data);
> +
> + for (i = 0; i < bytes; i++)
> + dst[(words << 2) + i] = ((u8 *)&data)[i];
> + }
> + mutex_unlock(&pmu->pmu_copy_lock);
> +out:
> + nv_debug(ppmu, "exit %s\n", __func__);
> +}
> +
> +void pmu_copy_to_dmem(struct pmu_desc *pmu,
> + u32 dst, u8 *src, u32 size, u8 port)
> +{
> + u32 i, words, bytes;
> + u32 data, addr_mask;
> + u32 *src_u32 = (u32 *)src;
> + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> + impl_from_pmu(pmu));
> +
> + if (size == 0) {
> + nv_error(ppmu, "size is zero\n");
> + goto out;
> + }
> +
> + if (dst & 0x3) {
> + nv_error(ppmu, "dst (0x%08x) not 4-byte aligned\n", dst);
> + goto out;
> + }
> +
> + mutex_lock(&pmu->pmu_copy_lock);
> +
> + words = size >> 2;
> + bytes = size & 0x3;
> +
> + addr_mask = (0x3f << 2) | 0xff << 8;
> +
> + dst &= addr_mask;
> +
> + nv_wr32(ppmu, (0x10a1c0 + (port * 8)), (dst | (0x1 << 24)));
> +
> + for (i = 0; i < words; i++) {
> + nv_wr32(ppmu, (0x10a1c4 + (port * 8)), src_u32[i]);
> + nv_debug(ppmu, "0x%08x\n", src_u32[i]);
> + }
> + if (bytes > 0) {
> + data = 0;
> + for (i = 0; i < bytes; i++)
> + ((u8 *)&data)[i] = src[(words << 2) + i];
> + nv_wr32(ppmu, (0x10a1c4 + (port * 8)), data);
> + nv_debug(ppmu, "0x%08x\n", data);
> + }
> +
> + data = nv_rd32(ppmu, (0x10a1c0 + (port * 8))) & addr_mask;
> + size = ALIGN(size, 4);
> + if (data != dst + size) {
> + nv_error(ppmu, "copy failed. bytes written %d, expected %d",
> + data - dst, size);
> + }
> + mutex_unlock(&pmu->pmu_copy_lock);
> +out:
> + nv_debug(ppmu, "exit %s", __func__);
> +}
> +
> +static int pmu_idle(struct nvkm_pmu *ppmu)
> +{
> + unsigned long end_jiffies = jiffies +
> + msecs_to_jiffies(2000);
> + u32 idle_stat;
> +
> + /* wait for pmu idle */
> + do {
> + idle_stat = nv_rd32(ppmu, 0x0010a04c);
> +
> + if (((idle_stat & 0x01) == 0) &&
> + ((idle_stat >> 1) & 0x7fff) == 0) {
> + break;
> + }
> +
> + if (time_after_eq(jiffies, end_jiffies)) {
> + nv_error(ppmu, "timeout waiting pmu idle : 0x%08x",
> + idle_stat);
> + return -EBUSY;
> + }
> + usleep_range(100, 200);
> + } while (1);
> +
> + return 0;
> +}
> +
> +void pmu_enable_irq(struct nvkm_pmu *ppmu, struct nvkm_mc *pmc,
> + bool enable)
> +{
> +
> + nv_wr32(pmc, 0x00000640,
> + nv_rd32(pmc, 0x00000640) &
> + ~0x1000000);
> + nv_wr32(pmc, 0x00000644,
> + nv_rd32(pmc, 0x00000644) &
> + ~0x1000000);
> + nv_wr32(ppmu, 0x0010a014, 0xff);
> +
> + if (enable) {
> + nv_debug(ppmu, "enable pmu irq\n");
> + /* dest 0=falcon, 1=host; level 0=irq0, 1=irq1
> + nv_wr32(ppmu, 0x0010a01c, 0xff01ff52);
> + 0=disable, 1=enable*/
> +
> + nv_wr32(ppmu, 0x0010a010, 0xff);
> + nv_wr32(pmc, 0x00000640,
> + nv_rd32(pmc, 0x00000640) |
> + 0x1000000);
> + nv_wr32(pmc, 0x00000644,
> + nv_rd32(pmc, 0x00000644) |
> + 0x1000000);
> + } else {
> + nv_debug(ppmu, "disable pmu irq\n");
> + }
> +
> +}
> +
> +static int pmu_enable_hw(struct nvkm_pmu *ppmu, struct nvkm_mc *pmc,
> + bool enable)
> +{
> + u32 reg;
> +
> + if (enable) {
> + int retries = GK20A_IDLE_CHECK_MAX / GK20A_IDLE_CHECK_DEFAULT;
> + /*need a spinlock?*/
> + reg = nv_rd32(pmc, 0x00000200);
> + reg |= 0x2000;
> + nv_wr32(pmc, 0x00000200, reg);
> + nv_rd32(pmc, 0x00000200);
> + do {
> + u32 w = nv_rd32(ppmu, 0x0010a10c) & 0x6;
> +
> + if (!w)
> + return 0;
> +
> + udelay(GK20A_IDLE_CHECK_DEFAULT);
> + } while (--retries);
> +
> + reg = nv_rd32(pmc, 0x00000200);
> + reg &= ~0x2000;
> + nv_wr32(pmc, 0x00000200, reg);
> + nv_error(ppmu, "Falcon mem scrubbing timeout\n");
> +
> + goto error;
> + } else {
> + reg = nv_rd32(pmc, 0x00000200);
> + reg &= ~0x2000;
> + nv_wr32(pmc, 0x00000200, reg);
> + return 0;
> + }
> +error:
> + return -ETIMEDOUT;
> +}
> +
> +static int pmu_enable(struct nvkm_pmu *ppmu, struct nvkm_mc *pmc,
> + bool enable)
> +{
> + u32 pmc_enable;
> + int err;
> +
> + if (!enable) {
> + pmc_enable = nv_rd32(pmc, 0x200);
> + if ((pmc_enable & 0x2000) != 0x0) {
> + pmu_enable_irq(ppmu, pmc, false);
> + pmu_enable_hw(ppmu, pmc, false);
> + }
> + } else {
> + err = pmu_enable_hw(ppmu, pmc, true);
> + if (err)
> + return err;
> +
> + /* TBD: post reset */
> +
> + err = pmu_idle(ppmu);
> + if (err)
> + return err;
> +
> + pmu_enable_irq(ppmu, pmc, true);
> + }
> +
> + return 0;
> +}
> +
> +int pmu_reset(struct nvkm_pmu *ppmu, struct nvkm_mc *pmc)
> +{
> + int err;
> +
> + err = pmu_idle(ppmu);
> + if (err)
> + return err;
> +
> + /* TBD: release pmu hw mutex */
> +
> + err = pmu_enable(ppmu, pmc, false);
> + if (err)
> + return err;
> +
> + err = pmu_enable(ppmu, pmc, true);
> + if (err)
> + return err;
> +
> + return 0;
> +}
> +
> +static int pmu_bootstrap(struct pmu_desc *pmu)
> +{
> + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> + impl_from_pmu(pmu));
> + struct pmu_ucode_desc *desc = pmu->desc;
> + u64 addr_code, addr_data, addr_load;
> + u32 i, blocks, addr_args;
> + u32 *adr_data, *adr_load, *adr_code;
> + struct pmu_cmdline_args_gk20a cmdline_args;
> + struct pmu_priv_vm *ppmuvm = &pmuvm;
> +
> + nv_wr32(ppmu, 0x0010a048,
> + nv_rd32(ppmu, 0x0010a048) | 0x01);
> + /*bind the address*/
> + nv_wr32(ppmu, 0x0010a480,
> + ppmuvm->mem->addr >> 12 |
> + 0x1 << 30 |
> + 0x20000000);
> +
> + /* TBD: load all other surfaces */
> + cmdline_args.falc_trace_size = GK20A_PMU_TRACE_BUFSIZE;
> + cmdline_args.falc_trace_dma_base =
> + u64_lo32(pmu->trace_buf.pmubufvma.offset >> 8);
> + cmdline_args.falc_trace_dma_idx = GK20A_PMU_DMAIDX_VIRT;
> + cmdline_args.cpu_freq_hz = 204;
> + cmdline_args.secure_mode = 0;
> +
> + addr_args = (nv_rd32(ppmu, 0x0010a108) >> 9) & 0x1ff;
> + addr_args = addr_args << GK20A_PMU_DMEM_BLKSIZE2;
> + addr_args -= sizeof(struct pmu_cmdline_args_gk20a);
> + nv_debug(ppmu, "initiating copy to dmem\n");
> + pmu_copy_to_dmem(pmu, addr_args,
> + (u8 *)&cmdline_args,
> + sizeof(struct pmu_cmdline_args_gk20a), 0);
> +
> + nv_wr32(ppmu, 0x0010a1c0, 0x1 << 24);
> +
> +
> + addr_code = u64_lo32((pmu->ucode.pmubufvma.offset +
> + desc->app_start_offset +
> + desc->app_resident_code_offset) >> 8);
> +
> + addr_data = u64_lo32((pmu->ucode.pmubufvma.offset +
> + desc->app_start_offset +
> + desc->app_resident_data_offset) >> 8);
> +
> + addr_load = u64_lo32((pmu->ucode.pmubufvma.offset +
> + desc->bootloader_start_offset) >> 8);
> +
> + adr_code = (u32 *) (&addr_code);
> + adr_load = (u32 *) (&addr_load);
> + adr_data = (u32 *) (&addr_data);
> + nv_wr32(ppmu, 0x0010a1c4, GK20A_PMU_DMAIDX_UCODE);
> + nv_debug(ppmu, "0x%08x\n", GK20A_PMU_DMAIDX_UCODE);
> + nv_wr32(ppmu, 0x0010a1c4, *(adr_code));
> + nv_debug(ppmu, "0x%08x\n", *(adr_code));
> + nv_wr32(ppmu, 0x0010a1c4, desc->app_size);
> + nv_debug(ppmu, "0x%08x\n", desc->app_size);
> + nv_wr32(ppmu, 0x0010a1c4, desc->app_resident_code_size);
> + nv_debug(ppmu, "0x%08x\n", desc->app_resident_code_size);
> + nv_wr32(ppmu, 0x0010a1c4, desc->app_imem_entry);
> + nv_debug(ppmu, "0x%08x\n", desc->app_imem_entry);
> + nv_wr32(ppmu, 0x0010a1c4, *(adr_data));
> + nv_debug(ppmu, "0x%08x\n", *(adr_data));
> + nv_wr32(ppmu, 0x0010a1c4, desc->app_resident_data_size);
> + nv_debug(ppmu, "0x%08x\n", desc->app_resident_data_size);
> + nv_wr32(ppmu, 0x0010a1c4, *(adr_code));
> + nv_debug(ppmu, "0x%08x\n", *(adr_code));
> + nv_wr32(ppmu, 0x0010a1c4, 0x1);
> + nv_debug(ppmu, "0x%08x\n", 1);
> + nv_wr32(ppmu, 0x0010a1c4, addr_args);
> + nv_debug(ppmu, "0x%08x\n", addr_args);
> +
> +
> + nv_wr32(ppmu, 0x0010a110,
> + *(adr_load) - (desc->bootloader_imem_offset >> 8));
> +
> + blocks = ((desc->bootloader_size + 0xFF) & ~0xFF) >> 8;
> +
> + for (i = 0; i < blocks; i++) {
> + nv_wr32(ppmu, 0x0010a114,
> + desc->bootloader_imem_offset + (i << 8));
> + nv_wr32(ppmu, 0x0010a11c,
> + desc->bootloader_imem_offset + (i << 8));
> + nv_wr32(ppmu, 0x0010a118,
> + 0x01 << 4 |
> + 0x06 << 8 |
> + ((GK20A_PMU_DMAIDX_UCODE & 0x07) << 12));
> + }
> +
> +
> + nv_wr32(ppmu, 0x0010a104,
> + (0xffffffff & desc->bootloader_entry_point));
> +
> + nv_wr32(ppmu, 0x0010a100, 0x1 << 1);
> +
> + nv_wr32(ppmu, 0x0010a080, desc->app_version);
> +
> + return 0;
> +}
> +
> +void pmu_seq_init(struct pmu_desc *pmu)
> +{
> + u32 i;
> +
> + memset(pmu->seq, 0,
> + sizeof(struct pmu_sequence) * PMU_MAX_NUM_SEQUENCES);
> + memset(pmu->pmu_seq_tbl, 0,
> + sizeof(pmu->pmu_seq_tbl));
> +
> + for (i = 0; i < PMU_MAX_NUM_SEQUENCES; i++)
> + pmu->seq[i].id = i;
> +}
> +
> +static int pmu_seq_acquire(struct pmu_desc *pmu,
> + struct pmu_sequence **pseq)
> +{
> + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> + impl_from_pmu(pmu));
> + struct pmu_sequence *seq;
> + u32 index;
> +
> + mutex_lock(&pmu->pmu_seq_lock);
> + index = find_first_zero_bit(pmu->pmu_seq_tbl,
> + sizeof(pmu->pmu_seq_tbl));
> + if (index >= sizeof(pmu->pmu_seq_tbl)) {
> + nv_error(ppmu,
> + "no free sequence available");
> + mutex_unlock(&pmu->pmu_seq_lock);
> + return -EAGAIN;
> + }
> + set_bit(index, pmu->pmu_seq_tbl);
> + mutex_unlock(&pmu->pmu_seq_lock);
> +
> + seq = &pmu->seq[index];
> + seq->state = PMU_SEQ_STATE_PENDING;
> +
> + *pseq = seq;
> + return 0;
> +}
> +
> +static void pmu_seq_release(struct pmu_desc *pmu,
> + struct pmu_sequence *seq)
> +{
> + seq->state = PMU_SEQ_STATE_FREE;
> + seq->desc = PMU_INVALID_SEQ_DESC;
> + seq->callback = NULL;
> + seq->cb_params = NULL;
> + seq->msg = NULL;
> + seq->out_payload = NULL;
> + seq->in_gk20a.alloc.dmem.size = 0;
> + seq->out_gk20a.alloc.dmem.size = 0;
> + clear_bit(seq->id, pmu->pmu_seq_tbl);
> +}
> +
> +static int pmu_queue_init(struct pmu_desc *pmu,
> + u32 id, struct pmu_init_msg_pmu_gk20a *init)
> +{
> + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> + impl_from_pmu(pmu));
> + struct pmu_queue *queue = &pmu->queue[id];
> +
> + queue->id = id;
> + queue->index = init->queue_info[id].index;
> + queue->offset = init->queue_info[id].offset;
> + queue->size = init->queue_info[id].size;
> + queue->mutex_id = id;
> + mutex_init(&queue->mutex);
> +
> + nv_debug(ppmu, "queue %d: index %d, offset 0x%08x, size 0x%08x",
> + id, queue->index, queue->offset, queue->size);
> +
> + return 0;
> +}
> +
> +static int pmu_queue_head(struct pmu_desc *pmu, struct pmu_queue *queue,
> + u32 *head, bool set)
> +{
> + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> + impl_from_pmu(pmu));
> +
> + BUG_ON(!head);
> +
> + if (PMU_IS_COMMAND_QUEUE(queue->id)) {
> +
> + if (queue->index >= 0x00000004)
> + return -EINVAL;
> +
> + if (!set)
> + *head = nv_rd32(ppmu, 0x0010a4a0 + (queue->index * 4)) &
> + 0xffffffff;
> + else
> + nv_wr32(ppmu,
> + (0x0010a4a0 + (queue->index * 4)),
> + (*head & 0xffffffff));
> + } else {
> + if (!set)
> + *head = nv_rd32(ppmu, 0x0010a4c8) & 0xffffffff;
> + else
> + nv_wr32(ppmu, 0x0010a4c8, (*head & 0xffffffff));
> + }
> +
> + return 0;
> +}
> +
> +static int pmu_queue_tail(struct pmu_desc *pmu, struct pmu_queue *queue,
> + u32 *tail, bool set)
> +{
> + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> + impl_from_pmu(pmu));
> +
> + BUG_ON(!tail);
> +
> + if (PMU_IS_COMMAND_QUEUE(queue->id)) {
> +
> + if (queue->index >= 0x00000004)
> + return -EINVAL;
> +
> + if (!set)
> + *tail = nv_rd32(ppmu, 0x0010a4b0 + (queue->index * 4)) &
> + 0xffffffff;
> + else
> + nv_wr32(ppmu, (0x0010a4b0 + (queue->index * 4)),
> + (*tail & 0xffffffff));
> + } else {
> + if (!set)
> + *tail = nv_rd32(ppmu, 0x0010a4cc) & 0xffffffff;
> + else
> + nv_wr32(ppmu, 0x0010a4cc, (*tail & 0xffffffff));
> + }
> +
> + return 0;
> +}
> +
> +static inline void pmu_queue_read(struct pmu_desc *pmu,
> + u32 offset, u8 *dst, u32 size)
> +{
> + pmu_copy_from_dmem(pmu, offset, dst, size, 0);
> +}
> +
> +static inline void pmu_queue_write(struct pmu_desc *pmu,
> + u32 offset, u8 *src, u32 size)
> +{
> + pmu_copy_to_dmem(pmu, offset, src, size, 0);
> +}
> +
> +int pmu_mutex_acquire(struct nvkm_pmu *ppmu, u32 id, u32 *token)
> +{
> + struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu);
> + struct pmu_desc *pmu = &impl->pmudata;
> + struct pmu_mutex *mutex;
> + u32 data, owner, max_retry;
> +
> + if (!pmu->initialized)
> + return -EINVAL;
> +
> + BUG_ON(!token);
> + BUG_ON(!PMU_MUTEX_ID_IS_VALID(id));
> + BUG_ON(id > pmu->mutex_cnt);
> +
> + mutex = &pmu->mutex[id];
> +
> + owner = nv_rd32(ppmu, 0x0010a580 + (mutex->index * 4)) & 0xff;
> +
> + if (*token != PMU_INVALID_MUTEX_OWNER_ID && *token == owner) {
> + BUG_ON(mutex->ref_cnt == 0);
> + nv_debug(ppmu, "already acquired by owner : 0x%08x", *token);
> + mutex->ref_cnt++;
> + return 0;
> + }
> +
> + max_retry = 40;
> + do {
> + data = nv_rd32(ppmu, 0x0010a488) & 0xff;
> + if (data == 0x00000000 ||
> + data == 0x000000ff) {
> + nv_warn(ppmu,
> + "fail to generate mutex token: val 0x%08x",
> + owner);
> + usleep_range(20, 40);
> + continue;
> + }
> +
> + owner = data;
> + nv_wr32(ppmu, (0x0010a580 + mutex->index * 4),
> + owner & 0xff);
> +
> + data = nv_rd32(ppmu, 0x0010a580 + (mutex->index * 4));
> +
> + if (owner == data) {
> + mutex->ref_cnt = 1;
> + nv_debug(ppmu, "mutex acquired: id=%d, token=0x%x",
> + mutex->index, *token);
> + *token = owner;
> + goto out;
> + } else {
> + nv_debug(ppmu, "fail to acquire mutex idx=0x%08x",
> + mutex->index);
> +
> + nv_mask(ppmu, 0x0010a48c, 0xff, (owner & 0xff));
> +
> + usleep_range(20, 40);
> + continue;
> + }
> + } while (max_retry-- > 0);
> +
> + return -EBUSY;
> +out:
> + return 0;
> +}
> +
> +int pmu_mutex_release(struct nvkm_pmu *ppmu, u32 id, u32 *token)
> +{
> + struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu);
> + struct pmu_desc *pmu = &impl->pmudata;
> + struct pmu_mutex *mutex;
> + u32 owner;
> +
> + if (!pmu->initialized)
> + return -EINVAL;
> +
> + BUG_ON(!token);
> + BUG_ON(!PMU_MUTEX_ID_IS_VALID(id));
> + BUG_ON(id > pmu->mutex_cnt);
> +
> + mutex = &pmu->mutex[id];
> +
> + owner = nv_rd32(ppmu, 0x0010a580 + (mutex->index * 4)) & 0xff;
> +
> + if (*token != owner) {
> + nv_error(ppmu,
> + "requester 0x%08x NOT match owner 0x%08x",
> + *token, owner);
> + return -EINVAL;
> + }
> +
> + if (--mutex->ref_cnt > 0)
> + return -EBUSY;
> +
> + nv_wr32(ppmu, 0x0010a580 + (mutex->index * 4), 0x00);
> +
> + nv_mask(ppmu, 0x0010a48c, 0xff, (owner & 0xff));
> +
> + nv_debug(ppmu, "mutex released: id=%d, token=0x%x",
> + mutex->index, *token);
> +
> + return 0;
> +}
> +
> +static int pmu_queue_lock(struct pmu_desc *pmu,
> + struct pmu_queue *queue)
> +{
> + int ret;
> + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> + impl_from_pmu(pmu));
> +
> + if (PMU_IS_MESSAGE_QUEUE(queue->id))
> + return 0;
> +
> + if (PMU_IS_SW_COMMAND_QUEUE(queue->id)) {
> + mutex_lock(&queue->mutex);
> + return 0;
> + }
> +
> + ret = pmu_mutex_acquire(ppmu, queue->mutex_id, &queue->mutex_lock);
> + return ret;
> +}
> +
> +static int pmu_queue_unlock(struct pmu_desc *pmu,
> + struct pmu_queue *queue)
> +{
> + int ret;
> + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> + impl_from_pmu(pmu));
> +
> + if (PMU_IS_MESSAGE_QUEUE(queue->id))
> + return 0;
> +
> + if (PMU_IS_SW_COMMAND_QUEUE(queue->id)) {
> + mutex_unlock(&queue->mutex);
> + return 0;
> + }
> +
> + ret = pmu_mutex_release(ppmu, queue->mutex_id, &queue->mutex_lock);
> + return ret;
> +}
> +
> +/* called by pmu_read_message, no lock */
> +static bool pmu_queue_is_empty(struct pmu_desc *pmu,
> + struct pmu_queue *queue)
> +{
> + u32 head, tail;
> +
> + pmu_queue_head(pmu, queue, &head, QUEUE_GET);
> + if (queue->opened && queue->oflag == OFLAG_READ)
> + tail = queue->position;
> + else
> + pmu_queue_tail(pmu, queue, &tail, QUEUE_GET);
> +
> + return head == tail;
> +}
> +
> +static bool pmu_queue_has_room(struct pmu_desc *pmu,
> + struct pmu_queue *queue, u32 size, bool *need_rewind)
> +{
> + u32 head, tail, free;
> + bool rewind = false;
> +
> + size = ALIGN(size, QUEUE_ALIGNMENT);
> +
> + pmu_queue_head(pmu, queue, &head, QUEUE_GET);
> + pmu_queue_tail(pmu, queue, &tail, QUEUE_GET);
> +
> + if (head >= tail) {
> + free = queue->offset + queue->size - head;
> + free -= PMU_CMD_HDR_SIZE;
> +
> + if (size > free) {
> + rewind = true;
> + head = queue->offset;
> + }
> + }
> +
> + if (head < tail)
> + free = tail - head - 1;
> +
> + if (need_rewind)
> + *need_rewind = rewind;
> +
> + return size <= free;
> +}
> +
> +static int pmu_queue_push(struct pmu_desc *pmu,
> + struct pmu_queue *queue, void *data, u32 size)
> +{
> +
> + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> + impl_from_pmu(pmu));
> + if (!queue->opened && queue->oflag == OFLAG_WRITE) {
> + nv_error(ppmu, "queue not opened for write\n");
> + return -EINVAL;
> + }
> +
> + pmu_queue_write(pmu, queue->position, data, size);
> + queue->position += ALIGN(size, QUEUE_ALIGNMENT);
> + return 0;
> +}
> +
> +static int pmu_queue_pop(struct pmu_desc *pmu,
> + struct pmu_queue *queue, void *data, u32 size,
> + u32 *bytes_read)
> +{
> + u32 head, tail, used;
> + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> + impl_from_pmu(pmu));
> +
> + *bytes_read = 0;
> +
> + if (!queue->opened && queue->oflag == OFLAG_READ) {
> + nv_error(ppmu, "queue not opened for read\n");
> + return -EINVAL;
> + }
> +
> + pmu_queue_head(pmu, queue, &head, QUEUE_GET);
> + tail = queue->position;
> +
> + if (head == tail)
> + return 0;
> +
> + if (head > tail)
> + used = head - tail;
> + else
> + used = queue->offset + queue->size - tail;
> +
> + if (size > used) {
> + nv_warn(ppmu, "queue size smaller than request read\n");
> + size = used;
> + }
> +
> + pmu_queue_read(pmu, tail, data, size);
> + queue->position += ALIGN(size, QUEUE_ALIGNMENT);
> + *bytes_read = size;
> + return 0;
> +}
> +
> +static void pmu_queue_rewind(struct pmu_desc *pmu,
> + struct pmu_queue *queue)
> +{
> + struct pmu_cmd cmd;
> + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> + impl_from_pmu(pmu));
> +
> +
> + if (!queue->opened) {
> + nv_error(ppmu, "queue not opened\n");
> + goto out;
> + }
> +
> + if (queue->oflag == OFLAG_WRITE) {
> + cmd.hdr.unit_id = PMU_UNIT_REWIND;
> + cmd.hdr.size = PMU_CMD_HDR_SIZE;
> + pmu_queue_push(pmu, queue, &cmd, cmd.hdr.size);
> + nv_debug(ppmu, "queue %d rewinded\n", queue->id);
> + }
> +
> + queue->position = queue->offset;
> +out:
> + nv_debug(ppmu, "exit %s\n", __func__);
> +}
> +
> +/* open for read and lock the queue */
> +static int pmu_queue_open_read(struct pmu_desc *pmu,
> + struct pmu_queue *queue)
> +{
> + int err;
> +
> + err = pmu_queue_lock(pmu, queue);
> + if (err)
> + return err;
> +
> + if (queue->opened)
> + BUG();
> +
> + pmu_queue_tail(pmu, queue, &queue->position, QUEUE_GET);
> + queue->oflag = OFLAG_READ;
> + queue->opened = true;
> +
> + return 0;
> +}
> +
> +/* open for write and lock the queue
> + make sure there's enough free space for the write */
> +static int pmu_queue_open_write(struct pmu_desc *pmu,
> + struct pmu_queue *queue, u32 size)
> +{
> + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> + impl_from_pmu(pmu));
> + bool rewind = false;
> + int err;
> +
> + err = pmu_queue_lock(pmu, queue);
> + if (err)
> + return err;
> +
> + if (queue->opened)
> + BUG();
> +
> + if (!pmu_queue_has_room(pmu, queue, size, &rewind)) {
> + nv_error(ppmu, "queue full");
> + pmu_queue_unlock(pmu, queue);
> + return -EAGAIN;
> + }
> +
> + pmu_queue_head(pmu, queue, &queue->position, QUEUE_GET);
> + queue->oflag = OFLAG_WRITE;
> + queue->opened = true;
> +
> + if (rewind)
> + pmu_queue_rewind(pmu, queue);
> +
> + return 0;
> +}
> +
> +/* close and unlock the queue */
> +static int pmu_queue_close(struct pmu_desc *pmu,
> + struct pmu_queue *queue, bool commit)
> +{
> + if (!queue->opened)
> + return 0;
> +
> + if (commit) {
> + if (queue->oflag == OFLAG_READ) {
> + pmu_queue_tail(pmu, queue,
> + &queue->position, QUEUE_SET);
> + } else {
> + pmu_queue_head(pmu, queue,
> + &queue->position, QUEUE_SET);
> + }
> + }
> +
> + queue->opened = false;
> +
> + pmu_queue_unlock(pmu, queue);
> +
> + return 0;
> +}
> +
> +int pmu_wait_message_cond(struct pmu_desc *pmu, u32 timeout,
> + u32 *var, u32 val)
> +{
> + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> + impl_from_pmu(pmu));
> + unsigned long end_jiffies = jiffies + msecs_to_jiffies(timeout);
> + unsigned long delay = GK20A_IDLE_CHECK_DEFAULT;
> +
> + do {
> + if (*var == val)
> + return 0;
> +
> + if (nv_rd32(ppmu, 0x0010a008))
> + gk20a_pmu_isr(ppmu);
> +
> + usleep_range(delay, delay * 2);
> + delay = min_t(u32, delay << 1, GK20A_IDLE_CHECK_MAX);
> + } while (time_before(jiffies, end_jiffies));
> +
> + return -ETIMEDOUT;
> +}
> +
> +void pmu_dump_falcon_stats(struct pmu_desc *pmu)
> +{
> + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> + impl_from_pmu(pmu));
> + int i;
> +
> + nv_debug(ppmu, "pmu_falcon_os_r : %d\n",
> + nv_rd32(ppmu, 0x0010a080));
> + nv_debug(ppmu, "pmu_falcon_cpuctl_r : 0x%x\n",
> + nv_rd32(ppmu, 0x0010a100));
> + nv_debug(ppmu, "pmu_falcon_idlestate_r : 0x%x\n",
> + nv_rd32(ppmu, 0x0010a04c));
> + nv_debug(ppmu, "pmu_falcon_mailbox0_r : 0x%x\n",
> + nv_rd32(ppmu, 0x0010a040));
> + nv_debug(ppmu, "pmu_falcon_mailbox1_r : 0x%x\n",
> + nv_rd32(ppmu, 0x0010a044));
> + nv_debug(ppmu, "pmu_falcon_irqstat_r : 0x%x\n",
> + nv_rd32(ppmu, 0x0010a008));
> + nv_debug(ppmu, "pmu_falcon_irqmode_r : 0x%x\n",
> + nv_rd32(ppmu, 0x0010a00c));
> + nv_debug(ppmu, "pmu_falcon_irqmask_r : 0x%x\n",
> + nv_rd32(ppmu, 0x0010a018));
> + nv_debug(ppmu, "pmu_falcon_irqdest_r : 0x%x\n",
> + nv_rd32(ppmu, 0x0010a01c));
> +
> + for (i = 0; i < 0x0000000c; i++)
> + nv_debug(ppmu, "pmu_pmu_mailbox_r(%d) : 0x%x\n",
> + i, nv_rd32(ppmu, 0x0010a450 + i*4));
> +
> + for (i = 0; i < 0x00000004; i++)
> + nv_debug(ppmu, "pmu_pmu_debug_r(%d) : 0x%x\n",
> + i, nv_rd32(ppmu, 0x0010a5c0 + i*4));
> +
> + for (i = 0; i < 6/*NV_Ppmu_FALCON_ICD_IDX_RSTAT__SIZE_1*/; i++) {
> + nv_wr32(ppmu, 0x0010a200,
> + 0xe |
> + (i & 0x1f) << 8);
> + nv_debug(ppmu, "pmu_rstat (%d) : 0x%x\n",
> + i, nv_rd32(ppmu, 0x0010a20c));
> + }
> +
> + i = nv_rd32(ppmu, 0x0010a7b0);
> + nv_debug(ppmu, "pmu_pmu_bar0_error_status_r : 0x%x\n", i);
> + if (i != 0) {
> + nv_debug(ppmu, "pmu_pmu_bar0_addr_r : 0x%x\n",
> + nv_rd32(ppmu, 0x0010a7a0));
> + nv_debug(ppmu, "pmu_pmu_bar0_data_r : 0x%x\n",
> + nv_rd32(ppmu, 0x0010a7a4));
> + nv_debug(ppmu, "pmu_pmu_bar0_timeout_r : 0x%x\n",
> + nv_rd32(ppmu, 0x0010a7a8));
> + nv_debug(ppmu, "pmu_pmu_bar0_ctl_r : 0x%x\n",
> + nv_rd32(ppmu, 0x0010a7ac));
> + }
> +
> + i = nv_rd32(ppmu, 0x0010a988);
> + nv_debug(ppmu, "pmu_pmu_bar0_fecs_error_r : 0x%x\n", i);
> +
> + i = nv_rd32(ppmu, 0x0010a16c);
> + nv_debug(ppmu, "pmu_falcon_exterrstat_r : 0x%x\n", i);
> + if (((i >> 31) & 0x1)) {
> + nv_debug(ppmu, "pmu_falcon_exterraddr_r : 0x%x\n",
> + nv_rd32(ppmu, 0x0010a168));
> + /*nv_debug(ppmu, "pmc_enable : 0x%x\n",
> + nv_rd32(pmc, 0x00000200));*/
> + }
> +
> + nv_debug(ppmu, "pmu_falcon_engctl_r : 0x%x\n",
> + nv_rd32(ppmu, 0x0010a0a4));
> + nv_debug(ppmu, "pmu_falcon_curctx_r : 0x%x\n",
> + nv_rd32(ppmu, 0x0010a050));
> + nv_debug(ppmu, "pmu_falcon_nxtctx_r : 0x%x\n",
> + nv_rd32(ppmu, 0x0010a054));
> +
> + nv_wr32(ppmu, 0x0010a200,
> + 0x8 |
> + ((PMU_FALCON_REG_IMB & 0x1f) << 8));
> + nv_debug(ppmu, "PMU_FALCON_REG_IMB : 0x%x\n",
> + nv_rd32(ppmu, 0x0010a20c));
> +
> + nv_wr32(ppmu, 0x0010a200,
> + 0x8 |
> + ((PMU_FALCON_REG_DMB & 0x1f) << 8));
> + nv_debug(ppmu, "PMU_FALCON_REG_DMB : 0x%x\n",
> + nv_rd32(ppmu, 0x0010a20c));
> +
> + nv_wr32(ppmu, 0x0010a200,
> + 0x8 |
> + ((PMU_FALCON_REG_CSW & 0x1f) << 8));
> + nv_debug(ppmu, "PMU_FALCON_REG_CSW : 0x%x\n",
> + nv_rd32(ppmu, 0x0010a20c));
> +
> + nv_wr32(ppmu, 0x0010a200,
> + 0x8 |
> + ((PMU_FALCON_REG_CTX & 0x1f) << 8));
> + nv_debug(ppmu, "PMU_FALCON_REG_CTX : 0x%x\n",
> + nv_rd32(ppmu, 0x0010a20c));
> +
> + nv_wr32(ppmu, 0x0010a200,
> + 0x8 |
> + ((PMU_FALCON_REG_EXCI & 0x1f) << 8));
> + nv_debug(ppmu, "PMU_FALCON_REG_EXCI : 0x%x\n",
> + nv_rd32(ppmu, 0x0010a20c));
> +
> + for (i = 0; i < 4; i++) {
> + nv_wr32(ppmu, 0x0010a200,
> + 0x8 |
> + ((PMU_FALCON_REG_PC & 0x1f) << 8));
> + nv_debug(ppmu, "PMU_FALCON_REG_PC : 0x%x\n",
> + nv_rd32(ppmu, 0x0010a20c));
> +
> + nv_wr32(ppmu, 0x0010a200,
> + 0x8 |
> + ((PMU_FALCON_REG_SP & 0x1f) << 8));
> + nv_debug(ppmu, "PMU_FALCON_REG_SP : 0x%x\n",
> + nv_rd32(ppmu, 0x0010a20c));
> + }
> +
> + /* PMU may crash due to FECS crash. Dump FECS status */
> + /*gk20a_fecs_dump_falcon_stats(g);*/
> +}
> +
> +static bool pmu_validate_cmd(struct pmu_desc *pmu, struct pmu_cmd *cmd,
> + struct pmu_msg *msg, struct pmu_payload *payload,
> + u32 queue_id)
> +{
> + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> + impl_from_pmu(pmu));
> + struct pmu_queue *queue;
> + u32 in_size, out_size;
> +
> + nv_debug(ppmu, "pmu validate cmd\n");
> + pmu_dump_falcon_stats(pmu);
> +
> + if (!PMU_IS_SW_COMMAND_QUEUE(queue_id))
> + goto invalid_cmd;
> +
> + queue = &pmu->queue[queue_id];
> + if (cmd->hdr.size < PMU_CMD_HDR_SIZE)
> + goto invalid_cmd;
> +
> + if (cmd->hdr.size > (queue->size >> 1))
> + goto invalid_cmd;
> +
> + if (msg != NULL && msg->hdr.size < PMU_MSG_HDR_SIZE)
> + goto invalid_cmd;
> +
> + if (!PMU_UNIT_ID_IS_VALID(cmd->hdr.unit_id))
> + goto invalid_cmd;
> +
> + if (payload == NULL)
> + return true;
> +
> + if (payload->in.buf == NULL && payload->out.buf == NULL)
> + goto invalid_cmd;
> +
> + if ((payload->in.buf != NULL && payload->in.size == 0) ||
> + (payload->out.buf != NULL && payload->out.size == 0))
> + goto invalid_cmd;
> +
> + in_size = PMU_CMD_HDR_SIZE;
> + if (payload->in.buf) {
> + in_size += payload->in.offset;
> + in_size += sizeof(struct pmu_allocation_gk20a);
> + }
> +
> + out_size = PMU_CMD_HDR_SIZE;
> + if (payload->out.buf) {
> + out_size += payload->out.offset;
> + out_size += sizeof(struct pmu_allocation_gk20a);
> + }
> +
> + if (in_size > cmd->hdr.size || out_size > cmd->hdr.size)
> + goto invalid_cmd;
> +
> +
> + if ((payload->in.offset != 0 && payload->in.buf == NULL) ||
> + (payload->out.offset != 0 && payload->out.buf == NULL))
> + goto invalid_cmd;
> +
> + return true;
> +
> +invalid_cmd:
> + nv_error(ppmu, "invalid pmu cmd :\n"
> + "queue_id=%d,\n"
> + "cmd_size=%d, cmd_unit_id=%d, msg=%p, msg_size=%d,\n"
> + "payload in=%p, in_size=%d, in_offset=%d,\n"
> + "payload out=%p, out_size=%d, out_offset=%d",
> + queue_id, cmd->hdr.size, cmd->hdr.unit_id,
> + msg, msg ? msg->hdr.unit_id : ~0,
> + &payload->in, payload->in.size, payload->in.offset,
> + &payload->out, payload->out.size, payload->out.offset);
> +
> + return false;
> +}
> +
> +static int pmu_write_cmd(struct pmu_desc *pmu, struct pmu_cmd *cmd,
> + u32 queue_id, unsigned long timeout)
> +{
> + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> + impl_from_pmu(pmu));
> + struct pmu_queue *queue;
> + unsigned long end_jiffies = jiffies +
> + msecs_to_jiffies(timeout);
> + int err;
> +
> + nv_debug(ppmu, "pmu write cmd\n");
> +
> + queue = &pmu->queue[queue_id];
> +
> + do {
> + err = pmu_queue_open_write(pmu, queue, cmd->hdr.size);
> + if (err == -EAGAIN && time_before(jiffies, end_jiffies))
> + usleep_range(1000, 2000);
> + else
> + break;
> + } while (1);
> +
> + if (err)
> + goto clean_up;
> +
> + pmu_queue_push(pmu, queue, cmd, cmd->hdr.size);
> +
> + err = pmu_queue_close(pmu, queue, true);
> +
> +clean_up:
> + if (err)
> + nv_error(ppmu,
> + "fail to write cmd to queue %d", queue_id);
> + else
> + nv_debug(ppmu, "cmd writing done");
> +
> + return err;
> +}
> +
> +int gk20a_pmu_cmd_post(struct nvkm_pmu *ppmu, struct pmu_cmd *cmd,
> + struct pmu_msg *msg, struct pmu_payload *payload,
> + u32 queue_id, pmu_callback callback, void *cb_param,
> + u32 *seq_desc, unsigned long timeout)
> +{
> + struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu);
> + struct pmu_desc *pmu = &impl->pmudata;
> + struct pmu_sequence *seq;
> + struct pmu_allocation_gk20a *in = NULL, *out = NULL;
> + int err;
> +
> + BUG_ON(!cmd);
> + BUG_ON(!seq_desc);
> + BUG_ON(!pmu->pmu_ready);
> + nv_debug(ppmu, "Post CMD\n");
> + if (!pmu_validate_cmd(pmu, cmd, msg, payload, queue_id))
> + return -EINVAL;
> +
> + err = pmu_seq_acquire(pmu, &seq);
> + if (err)
> + return err;
> +
> + cmd->hdr.seq_id = seq->id;
> +
> + cmd->hdr.ctrl_flags = 0;
> + cmd->hdr.ctrl_flags |= PMU_CMD_FLAGS_STATUS;
> + cmd->hdr.ctrl_flags |= PMU_CMD_FLAGS_INTR;
> +
> + seq->callback = callback;
> + seq->cb_params = cb_param;
> + seq->msg = msg;
> + seq->out_payload = NULL;
> + seq->desc = pmu->next_seq_desc++;
> +
> + if (payload)
> + seq->out_payload = payload->out.buf;
> +
> + *seq_desc = seq->desc;
> +
> + if (payload && payload->in.offset != 0) {
> + in = (struct pmu_allocation_gk20a *)((u8 *)&cmd->cmd +
> + payload->in.offset);
> +
> + if (payload->in.buf != payload->out.buf)
> + in->alloc.dmem.size = (u16)payload->in.size;
> + else
> + in->alloc.dmem.size =
> + (u16)max(payload->in.size, payload->out.size);
> +
> + err = pmu->dmem.alloc(&pmu->dmem,
> + (void *)&in->alloc.dmem.offset,
> + in->alloc.dmem.size,
> + PMU_DMEM_ALLOC_ALIGNMENT);
> + if (err)
> + goto clean_up;
> +
> + pmu_copy_to_dmem(pmu, (in->alloc.dmem.offset),
> + payload->in.buf, payload->in.size, 0);
> + seq->in_gk20a.alloc.dmem.size = in->alloc.dmem.size;
> + seq->in_gk20a.alloc.dmem.offset = in->alloc.dmem.offset;
> + }
> +
> + if (payload && payload->out.offset != 0) {
> + out = (struct pmu_allocation_gk20a *)((u8 *)&cmd->cmd +
> + payload->out.offset);
> + out->alloc.dmem.size = (u16)payload->out.size;
> +
> + if (payload->out.buf != payload->in.buf) {
> + err = pmu->dmem.alloc(&pmu->dmem,
> + (void *)&out->alloc.dmem.offset,
> + out->alloc.dmem.size,
> + PMU_DMEM_ALLOC_ALIGNMENT);
> + if (err)
> + goto clean_up;
> + } else {
> + BUG_ON(in == NULL);
> + out->alloc.dmem.offset = in->alloc.dmem.offset;
> + }
> +
> + seq->out_gk20a.alloc.dmem.size = out->alloc.dmem.size;
> + seq->out_gk20a.alloc.dmem.offset = out->alloc.dmem.offset;
> + }
> +
> + seq->state = PMU_SEQ_STATE_USED;
> + err = pmu_write_cmd(pmu, cmd, queue_id, timeout);
> + if (err)
> + seq->state = PMU_SEQ_STATE_PENDING;
> +
> + nv_debug(ppmu, "cmd posted\n");
> +
> + return 0;
> +
> +clean_up:
> + nv_debug(ppmu, "cmd post failed\n");
> + if (in)
> + pmu->dmem.free(&pmu->dmem,
> + in->alloc.dmem.offset,
> + in->alloc.dmem.size,
> + PMU_DMEM_ALLOC_ALIGNMENT);
> + if (out)
> + pmu->dmem.free(&pmu->dmem,
> + out->alloc.dmem.offset,
> + out->alloc.dmem.size,
> + PMU_DMEM_ALLOC_ALIGNMENT);
> +
> + pmu_seq_release(pmu, seq);
> + return err;
> +}
> +
> +void gk20a_pmu_isr(struct nvkm_pmu *ppmu)
> +{
> + struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu);
> + struct pmu_desc *pmu = &impl->pmudata;
> + struct nvkm_mc *pmc = nvkm_mc(ppmu);
> + struct pmu_queue *queue;
> + u32 intr, mask;
> + bool recheck = false;
> + if (!pmu->isr_enabled)
> + goto out;
> +
> + mask = nv_rd32(ppmu, 0x0010a018) &
> + nv_rd32(ppmu, 0x0010a01c);
> +
> + intr = nv_rd32(ppmu, 0x0010a008) & mask;
> +
> + nv_debug(ppmu, "received falcon interrupt: 0x%08x", intr);
> + pmu_enable_irq(ppmu, pmc, false);
> + if (!intr || pmu->pmu_state == PMU_STATE_OFF) {
> + nv_wr32(ppmu, 0x0010a004, intr);
> + nv_error(ppmu, "pmu state off\n");
> + pmu_enable_irq(ppmu, pmc, true);
> + goto out;
> + }
> + if (intr & 0x10) {
> + nv_error(ppmu,
> + "pmu halt intr not implemented");
> + pmu_dump_falcon_stats(pmu);
> + }
> + if (intr & 0x20) {
> + nv_error(ppmu,
> + "pmu exterr intr not implemented. Clearing interrupt.");
> + pmu_dump_falcon_stats(pmu);
> +
> + nv_wr32(ppmu, 0x0010a16c,
> + nv_rd32(ppmu, 0x0010a16c) &
> + ~(0x1 << 31));
> + }
> + if (intr & 0x40) {
> + nv_debug(ppmu, "scheduling work\n");
> + schedule_work(&pmu->isr_workq);
> + pmu_enable_irq(ppmu, pmc, true);
> + recheck = true;
> + }
> +
> + if (recheck) {
> + queue = &pmu->queue[PMU_MESSAGE_QUEUE];
> + if (!pmu_queue_is_empty(pmu, queue))
> + nv_wr32(ppmu, 0x0010a000, 0x40);
> + } else {
> + pmu_enable_irq(ppmu, pmc, true);
> + }
> +
> + pmu_enable_irq(ppmu, pmc, true);
> + nv_wr32(ppmu, 0x0010a004, intr);
> +out:
> + nv_debug(ppmu, "irq handled\n");
> +}
> +
> +static int
> +gk20a_pmu_init_vm(struct nvkm_pmu *ppmu, const struct firmware *fw)
> +{
> + int ret = 0;
> + struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu);
> + struct pmu_desc *pmu = &impl->pmudata;
> + u32 *ucode_image;
> + struct pmu_ucode_desc *desc = (struct pmu_ucode_desc *)fw->data;
> + int i;
> + struct pmu_priv_vm *ppmuvm = &pmuvm;
> + struct nvkm_device *device = nv_device(&ppmu->base);
> + struct nvkm_vm *vm;
> + u64 pmu_area_len = 300*1024;
> +
> + ppmu->pmuvm = &pmuvm;
> + ppmu->pg_buf = &pmu->pg_buf;
> + pmu->pmu = ppmu;
> + /* mem for inst blk*/
> + ret = nvkm_gpuobj_new(nv_object(ppmu), NULL, 0x1000, 0, 0,
> + &ppmuvm->mem);
> + if (ret)
> + goto instblk_alloc_err;
> +
> + /* mem for pgd*/
> + ret = nvkm_gpuobj_new(nv_object(ppmu), NULL, 0x8000, 0, 0,
> + &ppmuvm->pgd);
> + if (ret)
> + goto pgd_alloc_err;
> +
> + /*allocate virtual memory range*/
> + ret = nvkm_vm_new(device, 0, pmu_area_len, 0, &vm);
> + if (ret)
> + goto virt_alloc_err;
> +
> + atomic_inc(&vm->engref[NVDEV_SUBDEV_PMU]);
> + /*update VM with pgd */
> +
> + ret = nvkm_vm_ref(vm, &ppmuvm->vm, ppmuvm->pgd);
> + if (ret)
> + goto virt_alloc_err;
> +
> + /*update pgd in inst blk */
> + nv_wo32(ppmuvm->mem, 0x0200, lower_32_bits(ppmuvm->pgd->addr));
> + nv_wo32(ppmuvm->mem, 0x0204, upper_32_bits(ppmuvm->pgd->addr));
> + nv_wo32(ppmuvm->mem, 0x0208, lower_32_bits(pmu_area_len - 1));
> + nv_wo32(ppmuvm->mem, 0x020c, upper_32_bits(pmu_area_len - 1));
> +
> + /* allocate memory for pmu fw to be copied to*/
> + ret = nvkm_gpuobj_new(nv_object(ppmu), NULL,
> + GK20A_PMU_UCODE_SIZE_MAX, 0x1000, 0, &pmu->ucode.pmubufobj);
> + if (ret)
> + goto fw_alloc_err;
> +
> + ucode_image = (u32 *)((u32)desc + desc->descriptor_size);
> + for (i = 0; i < (desc->app_start_offset + desc->app_size) >> 2; i++) {
> + nv_wo32(pmu->ucode.pmubufobj, i << 2, ucode_image[i]);
> + pr_info("writing 0x%08x\n", ucode_image[i]);
> + }
> + /* map allocated memory into GMMU */
> + ret = nvkm_gpuobj_map_vm(nv_gpuobj(pmu->ucode.pmubufobj), vm,
> + NV_MEM_ACCESS_RW,
> + &pmu->ucode.pmubufvma);
> + if (ret)
> + goto map_err;
> +
> + nv_debug(ppmu, "%s function end\n", __func__);
> + return ret;
> +map_err:
> + nvkm_gpuobj_destroy(pmu->ucode.pmubufobj);
> +virt_alloc_err:
> +fw_alloc_err:
> + nvkm_gpuobj_destroy(ppmuvm->pgd);
> +pgd_alloc_err:
> + nvkm_gpuobj_destroy(ppmuvm->mem);
> +instblk_alloc_err:
> + return ret;
> +
> +}
> +
> +static int
> +gk20a_pmu_load_firmware(struct nvkm_pmu *ppmu, const struct firmware **pfw)
> +{
> + struct nvkm_device *dev;
> + char name[32];
> +
> + dev = nv_device(ppmu);
> +
> + snprintf(name, sizeof(name), "nvidia/tegra124/%s",
> + GK20A_PMU_UCODE_IMAGE);
> +
> + return request_firmware(pfw, name, nv_device_base(dev));
> +}
> +
> +static void
> +gk20a_pmu_dump_firmware_info(struct nvkm_pmu *ppmu,
> + const struct firmware *fw)
> +{
> + struct pmu_ucode_desc *desc = (struct pmu_ucode_desc *)fw->data;
> +
> + nv_debug(ppmu, "GK20A PMU firmware information\n");
> + nv_debug(ppmu, "descriptor size = %u\n", desc->descriptor_size);
> + nv_debug(ppmu, "image size = %u\n", desc->image_size);
> + nv_debug(ppmu, "app_version = 0x%08x\n", desc->app_version);
> + nv_debug(ppmu, "date = %s\n", desc->date);
> + nv_debug(ppmu, "bootloader_start_offset = 0x%08x\n",
> + desc->bootloader_start_offset);
> + nv_debug(ppmu, "bootloader_size = 0x%08x\n", desc->bootloader_size);
> + nv_debug(ppmu, "bootloader_imem_offset = 0x%08x\n",
> + desc->bootloader_imem_offset);
> + nv_debug(ppmu, "bootloader_entry_point = 0x%08x\n",
> + desc->bootloader_entry_point);
> + nv_debug(ppmu, "app_start_offset = 0x%08x\n", desc->app_start_offset);
> + nv_debug(ppmu, "app_size = 0x%08x\n", desc->app_size);
> + nv_debug(ppmu, "app_imem_offset = 0x%08x\n", desc->app_imem_offset);
> + nv_debug(ppmu, "app_imem_entry = 0x%08x\n", desc->app_imem_entry);
> + nv_debug(ppmu, "app_dmem_offset = 0x%08x\n", desc->app_dmem_offset);
> + nv_debug(ppmu, "app_resident_code_offset = 0x%08x\n",
> + desc->app_resident_code_offset);
> + nv_debug(ppmu, "app_resident_code_size = 0x%08x\n",
> + desc->app_resident_code_size);
> + nv_debug(ppmu, "app_resident_data_offset = 0x%08x\n",
> + desc->app_resident_data_offset);
> + nv_debug(ppmu, "app_resident_data_size = 0x%08x\n",
> + desc->app_resident_data_size);
> + nv_debug(ppmu, "nb_overlays = %d\n", desc->nb_overlays);
> +
> + nv_debug(ppmu, "compressed = %u\n", desc->compressed);
> +}
> +
> +static int pmu_process_init_msg(struct pmu_desc *pmu,
> + struct pmu_msg *msg)
> +{
> + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> + impl_from_pmu(pmu));
> + struct pmu_init_msg_pmu_gk20a *init;
> + struct pmu_sha1_gid_data gid_data;
> + u32 i, tail = 0;
> +
> + tail = nv_rd32(ppmu, 0x0010a4cc) & 0xffffffff;
> +
> + pmu_copy_from_dmem(pmu, tail,
> + (u8 *)&msg->hdr, PMU_MSG_HDR_SIZE, 0);
> +
> + if (msg->hdr.unit_id != PMU_UNIT_INIT) {
> + nv_error(ppmu,
> + "expecting init msg");
> + return -EINVAL;
> + }
> +
> + pmu_copy_from_dmem(pmu, tail + PMU_MSG_HDR_SIZE,
> + (u8 *)&msg->msg, msg->hdr.size - PMU_MSG_HDR_SIZE, 0);
> +
> + if (msg->msg.init.msg_type != PMU_INIT_MSG_TYPE_PMU_INIT) {
> + nv_error(ppmu,
> + "expecting init msg");
> + return -EINVAL;
> + }
> +
> + tail += ALIGN(msg->hdr.size, PMU_DMEM_ALIGNMENT);
> + nv_wr32(ppmu, 0x0010a4cc,
> + tail & 0xffffffff);
> +
> + init = &msg->msg.init.pmu_init_gk20a;
> + if (!pmu->gid_info.valid) {
> +
> + pmu_copy_from_dmem(pmu,
> + init->sw_managed_area_offset,
> + (u8 *)&gid_data,
> + sizeof(struct pmu_sha1_gid_data), 0);
> +
> + pmu->gid_info.valid =
> + (*(u32 *)gid_data.signature == PMU_SHA1_GID_SIGNATURE);
> +
> + if (pmu->gid_info.valid) {
> +
> + BUG_ON(sizeof(pmu->gid_info.gid) !=
> + sizeof(gid_data.gid));
> +
> + memcpy(pmu->gid_info.gid, gid_data.gid,
> + sizeof(pmu->gid_info.gid));
> + }
> + }
> +
> + for (i = 0; i < PMU_QUEUE_COUNT; i++)
> + pmu_queue_init(pmu, i, init);
> +
> + if (!pmu->dmem.alloc)
> + nvkm_pmu_allocator_init(&pmu->dmem, "gk20a_pmu_dmem",
> + init->sw_managed_area_offset,
> + init->sw_managed_area_size);
> +
> + pmu->pmu_ready = true;
> + pmu->pmu_state = PMU_STATE_INIT_RECEIVED;
> +
> + return 0;
> +}
> +
> +static bool pmu_read_message(struct pmu_desc *pmu, struct pmu_queue *queue,
> + struct pmu_msg *msg, int *status)
> +{
> + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> + impl_from_pmu(pmu));
> + u32 read_size, bytes_read;
> + int err;
> +
> + *status = 0;
> +
> + if (pmu_queue_is_empty(pmu, queue))
> + return false;
> +
> + err = pmu_queue_open_read(pmu, queue);
> + if (err) {
> + nv_error(ppmu,
> + "fail to open queue %d for read", queue->id);
> + *status = err;
> + return false;
> + }
> +
> + err = pmu_queue_pop(pmu, queue, &msg->hdr,
> + PMU_MSG_HDR_SIZE, &bytes_read);
> + if (err || bytes_read != PMU_MSG_HDR_SIZE) {
> + nv_error(ppmu,
> + "fail to read msg from queue %d", queue->id);
> + *status = err | -EINVAL;
> + goto clean_up;
> + }
> +
> + if (msg->hdr.unit_id == PMU_UNIT_REWIND) {
> + pmu_queue_rewind(pmu, queue);
> + /* read again after rewind */
> + err = pmu_queue_pop(pmu, queue, &msg->hdr,
> + PMU_MSG_HDR_SIZE, &bytes_read);
> + if (err || bytes_read != PMU_MSG_HDR_SIZE) {
> + nv_error(ppmu,
> + "fail to read msg from queue %d", queue->id);
> + *status = err | -EINVAL;
> + goto clean_up;
> + }
> + }
> +
> + if (!PMU_UNIT_ID_IS_VALID(msg->hdr.unit_id)) {
> + nv_error(ppmu,
> + "read invalid unit_id %d from queue %d",
> + msg->hdr.unit_id, queue->id);
> + *status = -EINVAL;
> + goto clean_up;
> + }
> +
> + if (msg->hdr.size > PMU_MSG_HDR_SIZE) {
> + read_size = msg->hdr.size - PMU_MSG_HDR_SIZE;
> + err = pmu_queue_pop(pmu, queue, &msg->msg,
> + read_size, &bytes_read);
> + if (err || bytes_read != read_size) {
> + nv_error(ppmu,
> + "fail to read msg from queue %d", queue->id);
> + *status = err;
> + goto clean_up;
> + }
> + }
> +
> + err = pmu_queue_close(pmu, queue, true);
> + if (err) {
> + nv_error(ppmu,
> + "fail to close queue %d", queue->id);
> + *status = err;
> + return false;
> + }
> +
> + return true;
> +
> +clean_up:
> + err = pmu_queue_close(pmu, queue, false);
> + if (err)
> + nv_error(ppmu,
> + "fail to close queue %d", queue->id);
> + return false;
> +}
> +
> +static int pmu_response_handle(struct pmu_desc *pmu,
> + struct pmu_msg *msg)
> +{
> + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> + impl_from_pmu(pmu));
> + struct pmu_sequence *seq;
> + int ret = 0;
> +
> + nv_debug(ppmu, "handling pmu response\n");
> + seq = &pmu->seq[msg->hdr.seq_id];
> + if (seq->state != PMU_SEQ_STATE_USED &&
> + seq->state != PMU_SEQ_STATE_CANCELLED) {
> + nv_error(ppmu,
> + "msg for an unknown sequence %d", seq->id);
> + return -EINVAL;
> + }
> +
> + if (msg->hdr.unit_id == PMU_UNIT_RC &&
> + msg->msg.rc.msg_type == PMU_RC_MSG_TYPE_UNHANDLED_CMD) {
> + nv_error(ppmu,
> + "unhandled cmd: seq %d", seq->id);
> + } else if (seq->state != PMU_SEQ_STATE_CANCELLED) {
> + if (seq->msg) {
> + if (seq->msg->hdr.size >= msg->hdr.size) {
> + memcpy(seq->msg, msg, msg->hdr.size);
> + if (seq->out_gk20a.alloc.dmem.size != 0) {
> + pmu_copy_from_dmem(pmu,
> + seq->out_gk20a.alloc.dmem.offset,
> + seq->out_payload,
> + seq->out_gk20a.alloc.dmem.size, 0);
> + }
> + } else {
> + nv_error(ppmu,
> + "sequence %d msg buffer too small",
> + seq->id);
> + }
> + }
> + } else
> + seq->callback = NULL;
> + if (seq->in_gk20a.alloc.dmem.size != 0)
> + pmu->dmem.free(&pmu->dmem,
> + seq->in_gk20a.alloc.dmem.offset,
> + seq->in_gk20a.alloc.dmem.size,
> + PMU_DMEM_ALLOC_ALIGNMENT);
> + if (seq->out_gk20a.alloc.dmem.size != 0)
> + pmu->dmem.free(&pmu->dmem,
> + seq->out_gk20a.alloc.dmem.offset,
> + seq->out_gk20a.alloc.dmem.size,
> + PMU_DMEM_ALLOC_ALIGNMENT);
> +
> + if (seq->callback)
> + seq->callback(ppmu, msg, seq->cb_params, seq->desc, ret);
> +
> + pmu_seq_release(pmu, seq);
> +
> + /* TBD: notify client waiting for available dmem */
> + nv_debug(ppmu, "pmu response processed\n");
> +
> + return 0;
> +}
> +
> +int pmu_wait_message_cond(struct pmu_desc *pmu, u32 timeout,
> + u32 *var, u32 val);
> +
> +
> +static int pmu_handle_event(struct pmu_desc *pmu, struct pmu_msg *msg)
> +{
> + int err = 0;
> + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> + impl_from_pmu(pmu));
> +
> + switch (msg->hdr.unit_id) {
> + case PMU_UNIT_PERFMON:
> + nv_debug(ppmu, "init perfmon event generated\n");
> + break;
> + default:
> + nv_debug(ppmu, "default event generated\n");
> + break;
> + }
> +
> + return err;
> +}
> +
> +void pmu_process_message(struct work_struct *work)
> +{
> + struct pmu_desc *pmu = container_of(work, struct pmu_desc, isr_workq);
> + struct pmu_msg msg;
> + int status;
> + struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> + impl_from_pmu(pmu));
> + struct nvkm_mc *pmc = nvkm_mc(ppmu);
> +
> + mutex_lock(&pmu->isr_mutex);
> + if (unlikely(!pmu->pmu_ready)) {
> + nv_debug(ppmu, "processing init msg\n");
> + pmu_process_init_msg(pmu, &msg);
> + mutex_unlock(&pmu->isr_mutex);
> + pmu_enable_irq(ppmu, pmc, true);
> + goto out;
> + }
> +
> + while (pmu_read_message(pmu,
> + &pmu->queue[PMU_MESSAGE_QUEUE], &msg, &status)) {
> +
> + nv_debug(ppmu, "read msg hdr:\n"
> + "unit_id = 0x%08x, size = 0x%08x,\n"
> + "ctrl_flags = 0x%08x, seq_id = 0x%08x\n",
> + msg.hdr.unit_id, msg.hdr.size,
> + msg.hdr.ctrl_flags, msg.hdr.seq_id);
> +
> + msg.hdr.ctrl_flags &= ~PMU_CMD_FLAGS_PMU_MASK;
> +
> + if (msg.hdr.ctrl_flags == PMU_CMD_FLAGS_EVENT)
> + pmu_handle_event(pmu, &msg);
> + else
> + pmu_response_handle(pmu, &msg);
> + }
> + mutex_unlock(&pmu->isr_mutex);
> + pmu_enable_irq(ppmu, pmc, true);
> +out:
> + nv_debug(ppmu, "exit %s\n", __func__);
> +}
> +
> +int gk20a_pmu_destroy(struct nvkm_pmu *ppmu, struct nvkm_mc *pmc)
> +{
> + struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu);
> + struct pmu_desc *pmu = &impl->pmudata;
> +
> + /* make sure the pending operations are finished before we continue */
> + cancel_work_sync(&pmu->isr_workq);
> + pmu->initialized = false;
> +
> + mutex_lock(&pmu->isr_mutex);
> + pmu_enable(ppmu, pmc, false);
> + pmu->isr_enabled = false;
> + mutex_unlock(&pmu->isr_mutex);
> +
> + pmu->pmu_state = PMU_STATE_OFF;
> + pmu->pmu_ready = false;
> + pmu->zbc_ready = false;
> +
> + return 0;
> +}
> +
> +int gk20a_pmu_load_norm(struct nvkm_pmu *ppmu, u32 *load)
> +{
> + struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu);
> + struct pmu_desc *pmu = &impl->pmudata;
> + *load = pmu->load_shadow;
> + return 0;
> +}
> +
> +int gk20a_pmu_load_update(struct nvkm_pmu *ppmu)
> +{
> + struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu);
> + struct pmu_desc *pmu = &impl->pmudata;
> + u16 _load = 0;
> +
> + pmu_copy_from_dmem(pmu, pmu->sample_buffer, (u8 *)&_load, 2, 0);
> + pmu->load_shadow = _load / 10;
> + pmu->load_avg = (((9*pmu->load_avg) + pmu->load_shadow) / 10);
> +
> + return 0;
> +}
> +
> +void gk20a_pmu_get_load_counters(struct nvkm_pmu *ppmu, u32 *busy_cycles,
> + u32 *total_cycles)
> +{
> + /*todo if (!g->power_on || gk20a_busy(g->dev)) {
> + *busy_cycles = 0;
> + *total_cycles = 0;
> + return;
> + }*/
> +
> + *busy_cycles = nv_rd32(ppmu, 0x0010a508 + 16) & 0x7fffffff;
> + /*todormb();*/
> + *total_cycles = nv_rd32(ppmu, 0x0010a508 + 32) & 0x7fffffff;
> + /*todogk20a_idle(g->dev);*/
> +}
> +
> +void gk20a_pmu_reset_load_counters(struct nvkm_pmu *ppmu)
> +{
> + u32 reg_val = 1 << 31;
> +
> + /*todoif (!g->power_on || gk20a_busy(g->dev))
> + return;*/
> +
> + nv_wr32(ppmu, 0x0010a508 + 32, reg_val);
> + /*todowmb()*/;
> + nv_wr32(ppmu, 0x0010a508 + 16, reg_val);
> + /*todogk20a_idle(g->dev);*/
> +}
> +
> +static int gk20a_init_pmu_setup_hw1(struct nvkm_pmu *ppmu, struct nvkm_mc *pmc)
> +{
> + struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu);
> + struct pmu_desc *pmu = &impl->pmudata;
> + int err;
> +
> + mutex_lock(&pmu->isr_mutex);
> + pmu_reset(ppmu, pmc);
> + pmu->isr_enabled = true;
> + mutex_unlock(&pmu->isr_mutex);
> +
> + /* setup apertures - virtual */
> + nv_wr32(ppmu, 0x10a600 + 0 * 4, 0x0);
> + nv_wr32(ppmu, 0x10a600 + 1 * 4, 0x0);
> + /* setup apertures - physical */
> + nv_wr32(ppmu, 0x10a600 + 2 * 4, 0x4 | 0x0);
> + nv_wr32(ppmu, 0x10a600 + 3 * 4, 0x4 | 0x1);
> + nv_wr32(ppmu, 0x10a600 + 4 * 4, 0x4 | 0x2);
> +
> + /* TBD: load pmu ucode */
> + err = pmu_bootstrap(pmu);
> + if (err)
> + return err;
> +
> + return 0;
> +
> +}
> +
> +static int gk20a_init_pmu_setup_sw(struct nvkm_pmu *ppmu)
> +{
> + struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu);
> + struct pmu_desc *pmu = &impl->pmudata;
> + struct pmu_priv_vm *ppmuvm = &pmuvm;
> + int i, err = 0;
> + int ret = 0;
> +
> +
> + if (pmu->sw_ready) {
> +
> + for (i = 0; i < pmu->mutex_cnt; i++) {
> + pmu->mutex[i].id = i;
> + pmu->mutex[i].index = i;
> + }
> + pmu_seq_init(pmu);
> +
> + nv_debug(ppmu, "skipping init\n");
> + goto skip_init;
> + }
> +
> + /* no infoRom script from vbios? */
> +
> + /* TBD: sysmon subtask */
> +
> + pmu->mutex_cnt = 0x00000010;
> + pmu->mutex = kzalloc(pmu->mutex_cnt *
> + sizeof(struct pmu_mutex), GFP_KERNEL);
> + if (!pmu->mutex) {
> + err = -ENOMEM;
> + nv_error(ppmu, "not enough space ENOMEM\n");
> + goto err;
> + }
> +
> + for (i = 0; i < pmu->mutex_cnt; i++) {
> + pmu->mutex[i].id = i;
> + pmu->mutex[i].index = i;
> + }
> +
> + pmu->seq = kzalloc(PMU_MAX_NUM_SEQUENCES *
> + sizeof(struct pmu_sequence), GFP_KERNEL);
> + if (!pmu->seq) {
> + err = -ENOMEM;
> + nv_error(ppmu, "not enough space ENOMEM\n");
> + goto err_free_mutex;
> + }
> +
> + pmu_seq_init(pmu);
> +
> + INIT_WORK(&pmu->isr_workq, pmu_process_message);
> + init_waitqueue_head(&ppmu->init_wq);
> + ppmu->gr_initialised = false;
> +
> + /* allocate memory for pmu fw area */
> + ret = nvkm_gpuobj_new(nv_object(ppmu), NULL, GK20A_PMU_SEQ_BUF_SIZE,
> + 0x1000, 0, &pmu->seq_buf.pmubufobj);
> + if (ret)
> + return ret;
> + ret = nvkm_gpuobj_new(nv_object(ppmu), NULL, GK20A_PMU_TRACE_BUFSIZE,
> + 0, 0, &pmu->trace_buf.pmubufobj);
> + if (ret)
> + return ret;
> + /* map allocated memory into GMMU */
> + ret = nvkm_gpuobj_map_vm(nv_gpuobj(pmu->seq_buf.pmubufobj),
> + ppmuvm->vm,
> + NV_MEM_ACCESS_RW,
> + &pmu->seq_buf.pmubufvma);
> + if (ret)
> + return ret;
> + ret = nvkm_gpuobj_map_vm(nv_gpuobj(pmu->trace_buf.pmubufobj),
> + ppmuvm->vm,
> + NV_MEM_ACCESS_RW,
> + &pmu->trace_buf.pmubufvma);
> + if (ret)
> + return ret;
> +
> + /* TBD: remove this if ZBC save/restore is handled by PMU
> + * end an empty ZBC sequence for now */
> + nv_wo32(pmu->seq_buf.pmubufobj, 0, 0x16);
> + nv_wo32(pmu->seq_buf.pmubufobj, 1, 0x00);
> + nv_wo32(pmu->seq_buf.pmubufobj, 2, 0x01);
> + nv_wo32(pmu->seq_buf.pmubufobj, 3, 0x00);
> + nv_wo32(pmu->seq_buf.pmubufobj, 4, 0x00);
> + nv_wo32(pmu->seq_buf.pmubufobj, 5, 0x00);
> + nv_wo32(pmu->seq_buf.pmubufobj, 6, 0x00);
> + nv_wo32(pmu->seq_buf.pmubufobj, 7, 0x00);
> +
> + pmu->seq_buf.size = GK20A_PMU_SEQ_BUF_SIZE;
> + ret = gk20a_pmu_debugfs_init(ppmu);
> + if (ret)
> + return ret;
> +
> + pmu->sw_ready = true;
> +
> +skip_init:
> + return 0;
> +err_free_mutex:
> + kfree(pmu->mutex);
> +err:
> + return err;
> +}
> +
> +static void
> +gk20a_pmu_pgob(struct nvkm_pmu *ppmu, bool enable)
> +{
> + /*
> + nv_mask(ppmu, 0x000200, 0x00001000, 0x00000000);
> + nv_rd32(ppmu, 0x000200);
> + nv_mask(ppmu, 0x000200, 0x08000000, 0x08000000);
> +
> + msleep(50);
> +
> + nv_mask(ppmu, 0x000200, 0x08000000, 0x00000000);
> + nv_mask(ppmu, 0x000200, 0x00001000, 0x00001000);
> + nv_rd32(ppmu, 0x000200);
> + */
> +}
> +
> +static void gk20a_pmu_intr(struct nvkm_subdev *subdev)
> +{
> + struct nvkm_pmu *ppmu = nvkm_pmu(subdev);
> +
> + gk20a_pmu_isr(ppmu);
> +}
> +
> +void gk20a_remove_pmu_support(struct pmu_desc *pmu)
> +{
> + nvkm_pmu_allocator_destroy(&pmu->dmem);
> +}
> +
> +int gk20a_message(struct nvkm_pmu *ppmu, u32 reply[2],
> + u32 process, u32 message, u32 data0, u32 data1)
> +{
> + return -EPERM;
> +}
> +
> +int
> +gk20a_pmu_create_(struct nvkm_object *parent,
> + struct nvkm_object *engine,
> + struct nvkm_oclass *oclass, int length, void **pobject)
> +{
> + struct nvkm_pmu *ppmu;
> + struct nvkm_device *device = nv_device(parent);
> + int ret;
> +
> + ret = nvkm_subdev_create_(parent, engine, oclass, 0, "PPMU",
> + "pmu", length, pobject);
> + ppmu = *pobject;
> + if (ret)
> + return ret;
> +
> + ret = nv_device_get_irq(device, true);
> +
> + ppmu->message = gk20a_message;
> + ppmu->pgob = gk20a_pmu_pgob;
> + ppmu->pmu_mutex_acquire = pmu_mutex_acquire;
> + ppmu->pmu_mutex_release = pmu_mutex_release;
> + ppmu->pmu_load_norm = gk20a_pmu_load_norm;
> + ppmu->pmu_load_update = gk20a_pmu_load_update;
> + ppmu->pmu_reset_load_counters = gk20a_pmu_reset_load_counters;
> + ppmu->pmu_get_load_counters = gk20a_pmu_get_load_counters;
> +
> + return 0;
> +}
> +
> +
> +
> diff --git a/drm/nouveau/nvkm/subdev/pmu/gk20a.h b/drm/nouveau/nvkm/subdev/pmu/gk20a.h
> new file mode 100644
> index 000000000000..a084d6d518b4
> --- /dev/null
> +++ b/drm/nouveau/nvkm/subdev/pmu/gk20a.h
> @@ -0,0 +1,369 @@
> +#ifndef __NVKM_pmu_GK20A_H__
> +#define __NVKM_pmu_GK20A_H__
> +
> +/*
> + * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> + * DEALINGS IN THE SOFTWARE.
> + */
> +void pmu_setup_hw(struct pmu_desc *pmu);
> +void gk20a_remove_pmu_support(struct pmu_desc *pmu);
> +#define gk20a_pmu_create(p, e, o, d) \
> + gk20a_pmu_create_((p), (e), (o), sizeof(**d), (void **)d)
> +
> +int gk20a_pmu_create_(struct nvkm_object *, struct nvkm_object *,
> + struct nvkm_oclass *, int, void **);
> +/* defined by pmu hw spec */
> +#define GK20A_PMU_VA_SIZE (512 * 1024 * 1024)
> +#define GK20A_PMU_UCODE_SIZE_MAX (256 * 1024)
> +#define GK20A_PMU_SEQ_BUF_SIZE 4096
> +/* idle timeout */
> +#define GK20A_IDLE_CHECK_DEFAULT 100 /* usec */
> +#define GK20A_IDLE_CHECK_MAX 5000 /* usec */
> +
> +/* so far gk20a has two engines: gr and ce2(gr_copy) */
> +enum {
> + ENGINE_GR_GK20A = 0,
> + ENGINE_CE2_GK20A = 1,
> + ENGINE_INVAL_GK20A
> +};
> +
> +#define ZBC_MASK(i) (~(~(0) << ((i)+1)) & 0xfffe)
> +
> +#define APP_VERSION_GK20A 17997577
> +
> +enum {
> + GK20A_PMU_DMAIDX_UCODE = 0,
> + GK20A_PMU_DMAIDX_VIRT = 1,
> + GK20A_PMU_DMAIDX_PHYS_VID = 2,
> + GK20A_PMU_DMAIDX_PHYS_SYS_COH = 3,
> + GK20A_PMU_DMAIDX_PHYS_SYS_NCOH = 4,
> + GK20A_PMU_DMAIDX_RSVD = 5,
> + GK20A_PMU_DMAIDX_PELPG = 6,
> + GK20A_PMU_DMAIDX_END = 7
> +};
> +
> +struct pmu_mem_gk20a {
> + u32 dma_base;
> + u8 dma_offset;
> + u8 dma_idx;
> + u16 fb_size;
> +};
> +
> +struct pmu_dmem {
> + u16 size;
> + u32 offset;
> +};
> +
> +struct pmu_cmdline_args_gk20a {
> + u32 cpu_freq_hz; /* Frequency of the clock driving PMU */
> + u32 falc_trace_size; /* falctrace buffer size (bytes) */
> + u32 falc_trace_dma_base; /* 256-byte block address */
> + u32 falc_trace_dma_idx; /* dmaIdx for DMA operations */
> + u8 secure_mode;
> + struct pmu_mem_gk20a gc6_ctx; /* dmem offset of gc6 context */
> +};
> +
> +#define GK20A_PMU_TRACE_BUFSIZE 0x4000 /* 4K */
> +#define GK20A_PMU_DMEM_BLKSIZE2 8
> +
> +#define GK20A_PMU_UCODE_NB_MAX_OVERLAY 32
> +#define GK20A_PMU_UCODE_NB_MAX_DATE_LENGTH 64
> +
> +struct pmu_ucode_desc {
> + u32 descriptor_size;
> + u32 image_size;
> + u32 tools_version;
> + u32 app_version;
> + char date[GK20A_PMU_UCODE_NB_MAX_DATE_LENGTH];
> + u32 bootloader_start_offset;
> + u32 bootloader_size;
> + u32 bootloader_imem_offset;
> + u32 bootloader_entry_point;
> + u32 app_start_offset;
> + u32 app_size;
> + u32 app_imem_offset;
> + u32 app_imem_entry;
> + u32 app_dmem_offset;
> + u32 app_resident_code_offset; /* Offset from appStartOffset */
> +/* Exact size of the resident code
> + * ( potentially contains CRC inside at the end ) */
> + u32 app_resident_code_size;
> + u32 app_resident_data_offset; /* Offset from appStartOffset */
> +/* Exact size of the resident data
> + * ( potentially contains CRC inside at the end ) */
> + u32 app_resident_data_size;
> + u32 nb_overlays;
> + struct {u32 start; u32 size; } load_ovl[GK20A_PMU_UCODE_NB_MAX_OVERLAY];
> + u32 compressed;
> +};
> +
> +#define PMU_UNIT_REWIND (0x00)
> +#define PMU_UNIT_PG (0x03)
> +#define PMU_UNIT_INIT (0x07)
> +#define PMU_UNIT_PERFMON (0x12)
> +#define PMU_UNIT_THERM (0x1B)
> +#define PMU_UNIT_RC (0x1F)
> +#define PMU_UNIT_NULL (0x20)
> +#define PMU_UNIT_END (0x23)
> +
> +#define PMU_UNIT_TEST_START (0xFE)
> +#define PMU_UNIT_END_SIM (0xFF)
> +#define PMU_UNIT_TEST_END (0xFF)
> +
> +#define PMU_UNIT_ID_IS_VALID(id) \
> + (((id) < PMU_UNIT_END) || ((id) >= PMU_UNIT_TEST_START))
> +
> +#define PMU_DMEM_ALLOC_ALIGNMENT (32)
> +#define PMU_DMEM_ALIGNMENT (4)
> +
> +#define PMU_CMD_FLAGS_PMU_MASK (0xF0)
> +
> +#define PMU_CMD_FLAGS_STATUS BIT(0)
> +#define PMU_CMD_FLAGS_INTR BIT(1)
> +#define PMU_CMD_FLAGS_EVENT BIT(2)
> +#define PMU_CMD_FLAGS_WATERMARK BIT(3)
> +
> +struct pmu_hdr {
> + u8 unit_id;
> + u8 size;
> + u8 ctrl_flags;
> + u8 seq_id;
> +};
> +#define PMU_MSG_HDR_SIZE sizeof(struct pmu_hdr)
> +#define PMU_CMD_HDR_SIZE sizeof(struct pmu_hdr)
> +
> +
> +struct pmu_allocation_gk20a {
> + struct {
> + struct pmu_dmem dmem;
> + struct pmu_mem_gk20a fb;
> + } alloc;
> +};
> +
> +enum {
> + PMU_INIT_MSG_TYPE_PMU_INIT = 0,
> +};
> +
> +struct pmu_init_msg_pmu_gk20a {
> + u8 msg_type;
> + u8 pad;
> + u16 os_debug_entry_point;
> +
> + struct {
> + u16 size;
> + u16 offset;
> + u8 index;
> + u8 pad;
> + } queue_info[PMU_QUEUE_COUNT];
> +
> + u16 sw_managed_area_offset;
> + u16 sw_managed_area_size;
> +};
> +
> +struct pmu_init_msg {
> + union {
> + u8 msg_type;
> + struct pmu_init_msg_pmu_gk20a pmu_init_gk20a;
> + };
> +};
> +
> +
> +enum {
> + PMU_RC_MSG_TYPE_UNHANDLED_CMD = 0,
> +};
> +
> +struct pmu_rc_msg_unhandled_cmd {
> + u8 msg_type;
> + u8 unit_id;
> +};
> +
> +struct pmu_rc_msg {
> + u8 msg_type;
> + struct pmu_rc_msg_unhandled_cmd unhandled_cmd;
> +};
> +
> +/* PERFMON */
> +#define PMU_DOMAIN_GROUP_PSTATE 0
> +#define PMU_DOMAIN_GROUP_GPC2CLK 1
> +#define PMU_DOMAIN_GROUP_NUM 2
> +struct pmu_perfmon_counter_gk20a {
> + u8 index;
> + u8 flags;
> + u8 group_id;
> + u8 valid;
> + u16 upper_threshold; /* units of 0.01% */
> + u16 lower_threshold; /* units of 0.01% */
> +};
> +struct pmu_zbc_cmd {
> + u8 cmd_type;
> + u8 pad;
> + u16 entry_mask;
> +};
> +
> +/* PERFMON MSG */
> +enum {
> + PMU_PERFMON_MSG_ID_INCREASE_EVENT = 0,
> + PMU_PERFMON_MSG_ID_DECREASE_EVENT = 1,
> + PMU_PERFMON_MSG_ID_INIT_EVENT = 2,
> + PMU_PERFMON_MSG_ID_ACK = 3
> +};
> +
> +struct pmu_perfmon_msg_generic {
> + u8 msg_type;
> + u8 state_id;
> + u8 group_id;
> + u8 data;
> +};
> +
> +struct pmu_perfmon_msg {
> + union {
> + u8 msg_type;
> + struct pmu_perfmon_msg_generic gen;
> + };
> +};
> +
> +
> +struct pmu_cmd {
> + struct pmu_hdr hdr;
> + union {
> + struct pmu_zbc_cmd zbc;
> + } cmd;
> +};
> +
> +struct pmu_msg {
> + struct pmu_hdr hdr;
> + union {
> + struct pmu_init_msg init;
> + struct pmu_perfmon_msg perfmon;
> + struct pmu_rc_msg rc;
> + } msg;
> +};
> +
> +/* write by sw, read by pmu, protected by sw mutex lock */
> +#define PMU_COMMAND_QUEUE_HPQ 0
> +/* write by sw, read by pmu, protected by sw mutex lock */
> +#define PMU_COMMAND_QUEUE_LPQ 1
> +/* write by pmu, read by sw, accessed by interrupt handler, no lock */
> +#define PMU_MESSAGE_QUEUE 4
> +#define PMU_QUEUE_COUNT 5
> +
> +enum {
> + PMU_MUTEX_ID_RSVD1 = 0,
> + PMU_MUTEX_ID_GPUSER,
> + PMU_MUTEX_ID_GPMUTEX,
> + PMU_MUTEX_ID_I2C,
> + PMU_MUTEX_ID_RMLOCK,
> + PMU_MUTEX_ID_MSGBOX,
> + PMU_MUTEX_ID_FIFO,
> + PMU_MUTEX_ID_PG,
> + PMU_MUTEX_ID_GR,
> + PMU_MUTEX_ID_CLK,
> + PMU_MUTEX_ID_RSVD6,
> + PMU_MUTEX_ID_RSVD7,
> + PMU_MUTEX_ID_RSVD8,
> + PMU_MUTEX_ID_RSVD9,
> + PMU_MUTEX_ID_INVALID
> +};
> +
> +#define PMU_IS_COMMAND_QUEUE(id) \
> + ((id) < PMU_MESSAGE_QUEUE)
> +
> +#define PMU_IS_SW_COMMAND_QUEUE(id) \
> + (((id) == PMU_COMMAND_QUEUE_HPQ) || \
> + ((id) == PMU_COMMAND_QUEUE_LPQ))
> +
> +#define PMU_IS_MESSAGE_QUEUE(id) \
> + ((id) == PMU_MESSAGE_QUEUE)
> +
> +enum {
> + OFLAG_READ = 0,
> + OFLAG_WRITE
> +};
> +
> +#define QUEUE_SET (true)
> + /*todo find how to get cpu_pa*/
> +#define QUEUE_GET (false)
> +
> +#define QUEUE_ALIGNMENT (4)
> +
> +#define PMU_PGENG_GR_BUFFER_IDX_INIT (0)
> +#define PMU_PGENG_GR_BUFFER_IDX_ZBC (1)
> +#define PMU_PGENG_GR_BUFFER_IDX_FECS (2)
> +
> +enum {
> + PMU_DMAIDX_UCODE = 0,
> + PMU_DMAIDX_VIRT = 1,
> + PMU_DMAIDX_PHYS_VID = 2,
> + PMU_DMAIDX_PHYS_SYS_COH = 3,
> + PMU_DMAIDX_PHYS_SYS_NCOH = 4,
> + PMU_DMAIDX_RSVD = 5,
> + PMU_DMAIDX_PELPG = 6,
> + PMU_DMAIDX_END = 7
> +};
> +
> +#define PMU_MUTEX_ID_IS_VALID(id) \
> + ((id) < PMU_MUTEX_ID_INVALID)
> +
> +#define PMU_INVALID_MUTEX_OWNER_ID (0)
> +
> +struct pmu_mutex {
> + u32 id;
> + u32 index;
> + u32 ref_cnt;
> +};
> +
> +
> +#define PMU_INVALID_SEQ_DESC (~0)
> +
> +enum {
> + PMU_SEQ_STATE_FREE = 0,
> + PMU_SEQ_STATE_PENDING,
> + PMU_SEQ_STATE_USED,
> + PMU_SEQ_STATE_CANCELLED
> +};
> +
> +struct pmu_payload {
> + struct {
> + void *buf;
> + u32 offset;
> + u32 size;
> + } in, out;
> +};
> +
> +typedef void (*pmu_callback)(struct nvkm_pmu *, struct pmu_msg *, void *,
> +u32, u32);
> +
> +struct pmu_sequence {
> + u8 id;
> + u32 state;
> + u32 desc;
> + struct pmu_msg *msg;
> + struct pmu_allocation_gk20a in_gk20a;
> + struct pmu_allocation_gk20a out_gk20a;
> + u8 *out_payload;
> + pmu_callback callback;
> + void *cb_params;
> +};
> +struct pmu_gk20a_data {
> + struct pmu_perfmon_counter_gk20a perfmon_counter_gk20a;
> + u32 perfmon_state_id[PMU_DOMAIN_GROUP_NUM];
> +};
> +
> +#endif /*_GK20A_H__*/
> diff --git a/drm/nouveau/nvkm/subdev/pmu/priv.h b/drm/nouveau/nvkm/subdev/pmu/priv.h
> index 998410563bfd..c4686e418582 100644
> --- a/drm/nouveau/nvkm/subdev/pmu/priv.h
> +++ b/drm/nouveau/nvkm/subdev/pmu/priv.h
> @@ -2,7 +2,91 @@
> #define __NVKM_PMU_PRIV_H__
> #include <subdev/pmu.h>
> #include <subdev/pmu/fuc/os.h>
> +#include <core/object.h>
> +#include <core/device.h>
> +#include <core/parent.h>
> +#include <core/mm.h>
> +#include <linux/rwsem.h>
> +#include <linux/slab.h>
> +#include <subdev/mmu.h>
> +#include <core/gpuobj.h>
>
> +static inline u32 u64_hi32(u64 n)
> +{
> + return (u32)((n >> 32) & ~(u32)0);
> +}
> +
> +static inline u32 u64_lo32(u64 n)
> +{
> + return (u32)(n & ~(u32)0);
> +}
> +
> +/* #define ALLOCATOR_DEBUG */
> +
> +/* main struct */
> +struct nvkm_pmu_allocator {
> +
> + char name[32]; /* name for allocator */
> +/*struct rb_root rb_root;*/ /* rb tree root for blocks */
> +
> + u32 base; /* min value of this linear space */
> + u32 limit; /* max value = limit - 1 */
> +
> + unsigned long *bitmap; /* bitmap */
> +
> + struct gk20a_alloc_block *block_first; /* first block in list */
> + struct gk20a_alloc_block *block_recent; /* last visited block */
> +
> + u32 first_free_addr; /* first free addr, non-contigous
> + allocation preferred start,
> + in order to pick up small holes */
> + u32 last_free_addr; /* last free addr, contiguous
> + allocation preferred start */
> + u32 cached_hole_size; /* max free hole size up to
> + last_free_addr */
> + u32 block_count; /* number of blocks */
> +
> + struct rw_semaphore rw_sema; /* lock */
> + struct kmem_cache *block_cache; /* slab cache */
> +
> + /* if enabled, constrain to [base, limit) */
> + struct {
> + bool enable;
> + u32 base;
> + u32 limit;
> + } constraint;
> +
> + int (*alloc)(struct nvkm_pmu_allocator *allocator,
> + u32 *addr, u32 len, u32 align);
> + int (*free)(struct nvkm_pmu_allocator *allocator,
> + u32 addr, u32 len, u32 align);
> +
> +};
> +
> +int nvkm_pmu_allocator_init(struct nvkm_pmu_allocator *allocator,
> + const char *name, u32 base, u32 size);
> +void nvkm_pmu_allocator_destroy(struct nvkm_pmu_allocator *allocator);
> +
> +int nvkm_pmu_allocator_block_alloc(struct nvkm_pmu_allocator *allocator,
> + u32 *addr, u32 len, u32 align);
> +
> +int nvkm_pmu_allocator_block_free(struct nvkm_pmu_allocator *allocator,
> + u32 addr, u32 len, u32 align);
> +
> +#if defined(ALLOCATOR_DEBUG)
> +
> +#define allocator_dbg(alloctor, format, arg...) \
> +do { \
> + if (1) \
> + pr_debug("nvkm_pmu_allocator (%s) %s: " format "\n",\
> + alloctor->name, __func__, ##arg);\
> +} while (0)
> +
> +#else /* ALLOCATOR_DEBUG */
> +
> +#define allocator_dbg(format, arg...)
> +
> +#endif /* ALLOCATOR_DEBUG */
> #define nvkm_pmu_create(p, e, o, d) \
> nvkm_pmu_create_((p), (e), (o), sizeof(**d), (void **)d)
> #define nvkm_pmu_destroy(p) \
> @@ -26,6 +110,179 @@ int _nvkm_pmu_ctor(struct nvkm_object *, struct nvkm_object *,
> int _nvkm_pmu_init(struct nvkm_object *);
> int _nvkm_pmu_fini(struct nvkm_object *, bool);
> void nvkm_pmu_pgob(struct nvkm_pmu *pmu, bool enable);
> +#define PMU_PG_IDLE_THRESHOLD 15000
> +#define PMU_PG_POST_POWERUP_IDLE_THRESHOLD 1000000
> +
> +/* state transition :
> + OFF => [OFF_ON_PENDING optional] => ON_PENDING => ON => OFF
> + ON => OFF is always synchronized */
> +#define PMU_ELPG_STAT_OFF 0 /* elpg is off */
> +#define PMU_ELPG_STAT_ON 1 /* elpg is on */
> +/* elpg is off, ALLOW cmd has been sent, wait for ack */
> +#define PMU_ELPG_STAT_ON_PENDING 2
> +/* elpg is on, DISALLOW cmd has been sent, wait for ack */
> +#define PMU_ELPG_STAT_OFF_PENDING 3
> +/* elpg is off, caller has requested on, but ALLOW
> +cmd hasn't been sent due to ENABLE_ALLOW delay */
> +#define PMU_ELPG_STAT_OFF_ON_PENDING 4
> +
> +/* Falcon Register index */
> +#define PMU_FALCON_REG_R0 (0)
> +#define PMU_FALCON_REG_R1 (1)
> +#define PMU_FALCON_REG_R2 (2)
> +#define PMU_FALCON_REG_R3 (3)
> +#define PMU_FALCON_REG_R4 (4)
> +#define PMU_FALCON_REG_R5 (5)
> +#define PMU_FALCON_REG_R6 (6)
> +#define PMU_FALCON_REG_R7 (7)
> +#define PMU_FALCON_REG_R8 (8)
> +#define PMU_FALCON_REG_R9 (9)
> +#define PMU_FALCON_REG_R10 (10)
> +#define PMU_FALCON_REG_R11 (11)
> +#define PMU_FALCON_REG_R12 (12)
> +#define PMU_FALCON_REG_R13 (13)
> +#define PMU_FALCON_REG_R14 (14)
> +#define PMU_FALCON_REG_R15 (15)
> +#define PMU_FALCON_REG_IV0 (16)
> +#define PMU_FALCON_REG_IV1 (17)
> +#define PMU_FALCON_REG_UNDEFINED (18)
> +#define PMU_FALCON_REG_EV (19)
> +#define PMU_FALCON_REG_SP (20)
> +#define PMU_FALCON_REG_PC (21)
> +#define PMU_FALCON_REG_IMB (22)
> +#define PMU_FALCON_REG_DMB (23)
> +#define PMU_FALCON_REG_CSW (24)
> +#define PMU_FALCON_REG_CCR (25)
> +#define PMU_FALCON_REG_SEC (26)
> +#define PMU_FALCON_REG_CTX (27)
> +#define PMU_FALCON_REG_EXCI (28)
> +#define PMU_FALCON_REG_RSVD0 (29)
> +#define PMU_FALCON_REG_RSVD1 (30)
> +#define PMU_FALCON_REG_RSVD2 (31)
> +#define PMU_FALCON_REG_SIZE (32)
> +
> +/* Choices for pmu_state */
> +#define PMU_STATE_OFF 0 /* PMU is off */
> +#define PMU_STATE_STARTING 1 /* PMU is on, but not booted */
> +#define PMU_STATE_INIT_RECEIVED 2 /* PMU init message received */
> +#define PMU_STATE_ELPG_BOOTING 3 /* PMU is booting */
> +#define PMU_STATE_ELPG_BOOTED 4 /* ELPG is initialized */
> +#define PMU_STATE_LOADING_PG_BUF 5 /* Loading PG buf */
> +#define PMU_STATE_LOADING_ZBC 6 /* Loading ZBC buf */
> +#define PMU_STATE_STARTED 7 /* Fully unitialized */
> +
> +#define PMU_QUEUE_COUNT 5
> +
> +#define PMU_MAX_NUM_SEQUENCES (256)
> +#define PMU_SEQ_BIT_SHIFT (5)
> +#define PMU_SEQ_TBL_SIZE \
> + (PMU_MAX_NUM_SEQUENCES >> PMU_SEQ_BIT_SHIFT)
> +
> +#define PMU_SHA1_GID_SIGNATURE 0xA7C66AD2
> +#define PMU_SHA1_GID_SIGNATURE_SIZE 4
> +
> +#define PMU_SHA1_GID_SIZE 16
> +
> +struct pmu_queue {
> +
> + /* used by hw, for BIOS/SMI queue */
> + u32 mutex_id;
> + u32 mutex_lock;
> + /* used by sw, for LPQ/HPQ queue */
> + struct mutex mutex;
> +
> + /* current write position */
> + u32 position;
> + /* physical dmem offset where this queue begins */
> + u32 offset;
> + /* logical queue identifier */
> + u32 id;
> + /* physical queue index */
> + u32 index;
> + /* in bytes */
> + u32 size;
> +
> + /* open-flag */
> + u32 oflag;
> + bool opened; /* opened implies locked */
> +};
> +
> +struct pmu_sha1_gid {
> + bool valid;
> + u8 gid[PMU_SHA1_GID_SIZE];
> +};
> +
> +struct pmu_sha1_gid_data {
> + u8 signature[PMU_SHA1_GID_SIGNATURE_SIZE];
> + u8 gid[PMU_SHA1_GID_SIZE];
> +};
> +
> +struct pmu_desc {
> +
> + struct pmu_ucode_desc *desc;
> + struct pmu_buf_desc ucode;
> +
> + struct pmu_buf_desc pg_buf;
> + /* TBD: remove this if ZBC seq is fixed */
> + struct pmu_buf_desc seq_buf;
> + struct pmu_buf_desc trace_buf;
> + bool buf_loaded;
> +
> + struct pmu_sha1_gid gid_info;
> +
> + struct pmu_queue queue[PMU_QUEUE_COUNT];
> +
> + struct pmu_sequence *seq;
> + unsigned long pmu_seq_tbl[PMU_SEQ_TBL_SIZE];
> + u32 next_seq_desc;
> +
> + struct pmu_mutex *mutex;
> + u32 mutex_cnt;
> +
> + struct mutex pmu_copy_lock;
> + struct mutex pmu_seq_lock;
> +
> + struct nvkm_pmu_allocator dmem;
> +
> + u32 *ucode_image;
> + bool pmu_ready;
> +
> + u32 zbc_save_done;
> +
> + u32 stat_dmem_offset;
> +
> + u32 elpg_stat;
> +
> + int pmu_state;
> +
> +#define PMU_ELPG_ENABLE_ALLOW_DELAY_MSEC 1 /* msec */
> + struct work_struct isr_workq;
> + struct mutex elpg_mutex; /* protect elpg enable/disable */
> +/* disable -1, enable +1, <=0 elpg disabled, > 0 elpg enabled */
> + int elpg_refcnt;
> +
> + bool initialized;
> +
> + void (*remove_support)(struct pmu_desc *pmu);
> + bool sw_ready;
> + bool perfmon_ready;
> +
> + u32 sample_buffer;
> + u32 load_shadow;
> + u32 load_avg;
> +
> + struct mutex isr_mutex;
> + bool isr_enabled;
> +
> + bool zbc_ready;
> + unsigned long perfmon_events_cnt;
> + bool perfmon_sampling_enabled;
> + u8 pmu_mode;
> + u32 falcon_id;
> + u32 aelpg_param[5];
> + void *pmu_chip_data;
> + struct nvkm_pmu *pmu;
> +};
>
> struct nvkm_pmu_impl {
> struct nvkm_oclass base;
> @@ -39,5 +296,12 @@ struct nvkm_pmu_impl {
> } data;
>
> void (*pgob)(struct nvkm_pmu *, bool);
> + struct pmu_desc pmudata;
> };
> +
> +static inline struct nvkm_pmu *impl_from_pmu(struct pmu_desc *pmu)
> +{
> + return pmu->pmu;
> +}
> +
> #endif
> --
> 1.9.1
>
> _______________________________________________
> Nouveau mailing list
> Nouveau at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/nouveau
More information about the Nouveau
mailing list