[PATCH] nouveau: Load firmware for BSP/VP engines on NV84-NV96, NVA0

Ilia Mirkin imirkin at alum.mit.edu
Tue Jun 4 11:38:01 PDT 2013


On Mon, Jun 3, 2013 at 5:02 AM, Ilia Mirkin <imirkin at alum.mit.edu> wrote:
> These chipsets include the VP2 engine which is composed of a bitstream
> processor (BSP) that decodes H.264 and a video processor (VP) which can
> do iDCT/mo-comp/etc for MPEG1/2, H.264, and VC-1. Both of these are
> driven by separate xtensa chips embedded in the hardware. This patch
> provides the mechanism to load the kernel for the xtensa chips and
> provide the necessary interactions to do the rest of the work.
>
> Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
> ---
>
> This patch applies on top of nouveau/master (16a41bcc8).
>
> This seems to work for me. There was one boot where my userspace
> component didn't work right, but it could just as well be a bug
> there. Subsequent attempts seem to work fine. Note that I'm not
> particularly familiar with any of this stuff, so if something looks
> odd, I probably didn't know any better. I did try to faithfully
> reproduce whatever the blob did. A few questions/thoughts:
>
> 1. There's a LOT of similarity between BSP and VP setup/etc. Is it
>    worth it to create a core/xtensa.c or some such, similar to
>    falcon.c? Since it's only in two places, not that much code, and
>    there _are_ differences, I decided to keep them separate.
>
> 2. Firmware naming. Maarten suggested to use the falcon naming style,
>    which is nv$chipset_fuc$offset. However here, all the chips share
>    the same firmware. Also the offset would be 103 vs 00f, and is a
>    little arbitrary. (And fuc doesn't apply here... xt? xtensa?) I've
>    left it the way I had it: nv84_bsp and nv84_vp.
>
> 3. Firmware load time. I chose to load the fw into memory in the ctor,
>    and then copy it in in init, due to some potentially bogus
>    suspend/resume concerns. Also e.g. mplayer likes to create/destroy
>    decoders at startup a few times. The downside is that ~200KB of
>    memory is gone. Let me know if I should change it to do the
>    request_firmware in init.
>
> There's obviously a userspace piece to this, which I'm still working
> on. But right now I have it working within certain parameters
> (e.g. 1280x544 videos), and I'm relatively confident it can be
> completed without further kernel-side changes.
>
> There's also a hypothetical concern of "what if we create an open
> firmware with a different user API". Ideally there'd be some way to
> expose what kind of firmware is loaded, but I think that can be left
> for "later".

I also happened to notice that NV98, NVA1+ refer to these nv84 engines
(in drivers/gpu/drm/nouveau/core/engine/device/nv50.c). I assume that
means I should create a new nv98.c version of BSP/VP that resembles
the old versions of nv84.c, and point device/nv50.c at those for nv98
and nva1+?

>
>  drivers/gpu/drm/nouveau/core/engine/bsp/nv84.c  | 139 ++++++++++++++++++++++-
>  drivers/gpu/drm/nouveau/core/engine/fifo/nv84.c |   4 +
>  drivers/gpu/drm/nouveau/core/engine/vp/nv84.c   | 140 +++++++++++++++++++++++-
>  drivers/gpu/drm/nouveau/core/subdev/mc/nv50.c   |   1 +
>  drivers/gpu/drm/nouveau/core/subdev/vm/nv50.c   |   2 +
>  5 files changed, 278 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/nouveau/core/engine/bsp/nv84.c b/drivers/gpu/drm/nouveau/core/engine/bsp/nv84.c
> index 1d9f614..04880d9 100644
> --- a/drivers/gpu/drm/nouveau/core/engine/bsp/nv84.c
> +++ b/drivers/gpu/drm/nouveau/core/engine/bsp/nv84.c
> @@ -29,6 +29,10 @@
>
>  struct nv84_bsp_priv {
>         struct nouveau_engine base;
> +       u32 *fw;
> +       long fw_size;
> +       struct nouveau_gpuobj *gpu_fw;
> +       void *vm_gpu_fw;
>  };
>
>  /*******************************************************************************
> @@ -37,6 +41,7 @@ struct nv84_bsp_priv {
>
>  static struct nouveau_oclass
>  nv84_bsp_sclass[] = {
> +       { 0x74b0, &nouveau_object_ofuncs },
>         {},
>  };
>
> @@ -44,11 +49,28 @@ nv84_bsp_sclass[] = {
>   * BSP context
>   ******************************************************************************/
>
> +static int
> +nv84_bsp_engctx_ctor(struct nouveau_object *parent,
> +                    struct nouveau_object *engine,
> +                    struct nouveau_oclass *oclass, void *data, u32 size,
> +                    struct nouveau_object **pobject)
> +{
> +       struct nouveau_engctx *engctx;
> +       int ret;
> +
> +       ret = nouveau_engctx_create(parent, engine, oclass, NULL,
> +                                   0x10000, 0x1000,
> +                                   NVOBJ_FLAG_ZERO_ALLOC, &engctx);
> +       *pobject = nv_object(engctx);
> +       return ret;
> +}
> +
> +
>  static struct nouveau_oclass
>  nv84_bsp_cclass = {
>         .handle = NV_ENGCTX(BSP, 0x84),
>         .ofuncs = &(struct nouveau_ofuncs) {
> -               .ctor = _nouveau_engctx_ctor,
> +               .ctor = nv84_bsp_engctx_ctor,
>                 .dtor = _nouveau_engctx_dtor,
>                 .init = _nouveau_engctx_init,
>                 .fini = _nouveau_engctx_fini,
> @@ -61,6 +83,24 @@ nv84_bsp_cclass = {
>   * BSP engine/subdev functions
>   ******************************************************************************/
>
> +static void
> +nv84_bsp_intr(struct nouveau_subdev *subdev)
> +{
> +       struct nv84_bsp_priv *priv = (void *)subdev;
> +       u32 intr, unk104, unk10c, chan;
> +
> +       unk104 = nv_rd32(priv, 0x103d04);
> +       intr = nv_rd32(priv, 0x103c20);
> +       chan = nv_rd32(priv, 0x103c28);
> +       unk10c = nv_rd32(priv, 0x103d0c);
> +       nv_wr32(priv, 0x103c20, intr);
> +       intr = nv_rd32(priv, 0x103c20);
> +       if (unk104 == 0x10001 && unk10c == 0x200 && chan && !intr) {
> +               nv_debug(priv, "Enabling BSP.FIFO_CTRL\n");
> +               nv_mask(priv, 0x103d94, 0, 0x1111); /* FIFO_CTRL */
> +       }
> +}
> +
>  static int
>  nv84_bsp_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
>               struct nouveau_oclass *oclass, void *data, u32 size,
> @@ -68,6 +108,8 @@ nv84_bsp_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
>  {
>         struct nv84_bsp_priv *priv;
>         int ret;
> +       const struct firmware *fw;
> +       struct nouveau_device *device = nv_device(parent);
>
>         ret = nouveau_engine_create(parent, engine, oclass, true,
>                                     "PBSP", "bsp", &priv);
> @@ -78,16 +120,105 @@ nv84_bsp_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
>         nv_subdev(priv)->unit = 0x04008000;
>         nv_engine(priv)->cclass = &nv84_bsp_cclass;
>         nv_engine(priv)->sclass = nv84_bsp_sclass;
> +
> +       ret = request_firmware(&fw, "nouveau/nv84_bsp", &device->pdev->dev);
> +       if (ret) {
> +               nv_warn(priv, "Firmware for NV84 BSP unavailable.\n");
> +               return 0;
> +       }
> +
> +       nv_subdev(priv)->intr = nv84_bsp_intr;
> +
> +       priv->fw = kmemdup(fw->data, fw->size, GFP_KERNEL);
> +       priv->fw_size = fw->size;
> +       release_firmware(fw);
> +       if (!priv->fw)
> +               return -ENOMEM;
> +
>         return 0;
>  }
>
> +static void
> +nv84_bsp_dtor(struct nouveau_object *object)
> +{
> +       struct nv84_bsp_priv *priv = (void *)object;
> +       kfree(priv->fw);
> +}
> +
> +static int
> +nv84_bsp_init(struct nouveau_object *object)
> +{
> +       struct nouveau_device *device = nv_device(object);
> +       struct nv84_bsp_priv *priv = (void *)object;
> +       int i, ret;
> +       u32 tmp;
> +
> +       if (!priv->fw)
> +               return -EINVAL;
> +
> +       ret = nouveau_engine_init(&priv->base);
> +       if (ret)
> +               return ret;
> +
> +       ret = nouveau_gpuobj_new(object, NULL, 0x40000, 0x1000, 0,
> +                                &priv->gpu_fw);
> +       if (ret)
> +               return ret;
> +
> +       tmp = nv_rd32(device, 0x103c20); /* INTR */
> +       if (tmp)
> +               nv_warn(priv, "Unexpected read from XTENSA.INTR: 0x%x", tmp);
> +
> +       nv_wr32(device, 0x103d10, 0x1fffffff); /* ?? */
> +       nv_wr32(device, 0x103d08, 0x0fffffff); /* ?? */
> +
> +       nv_wr32(device, 0x103d28, 0x90044); /* ?? */
> +       nv_mask(device, 0x2090, 0xf0000000, 0x8 << 28); /* PFIFO.UNK90 */
> +       nv_wr32(device, 0x103c20, 0x3f); /* INTR */
> +       nv_wr32(device, 0x103d84, 0x3f); /* INTR_EN */
> +
> +       nv_debug(priv, "Loading firmware to address: 0x%llx\n",
> +                priv->gpu_fw->addr);
> +
> +       for (i = 0; i < priv->fw_size / 4; i++)
> +               nv_wo32(priv->gpu_fw, i * 4, priv->fw[i]);
> +
> +       nv_wr32(device, 0x103cc0, priv->gpu_fw->addr >> 8); /* REGION_BASE */
> +       nv_wr32(device, 0x103cc4, 0x1c); /* XT_REGION_SETUP */
> +       nv_wr32(device, 0x103cc8, priv->gpu_fw->size >> 8); /* REGION_LIMIT */
> +
> +       tmp = nv_rd32(device, 0x0);
> +       nv_wr32(device, 0x103de0, tmp); /* SCRATCH_H2X */
> +
> +       nv_wr32(device, 0x103ce8, 0xf); /* XT_REGION_SETUP */
> +
> +       nv_wr32(device, 0x103c20, 0x3f); /* INTR */
> +       nv_wr32(device, 0x103d84, 0x3f); /* INTR_EN */
> +
> +       return 0;
> +}
> +
> +static int
> +nv84_bsp_fini(struct nouveau_object *object, bool suspend)
> +{
> +       struct nouveau_device *device = nv_device(object);
> +       struct nv84_bsp_priv *priv = (void *)object;
> +
> +       nv_wr32(device, 0x103d84, 0); /* INTR_EN */
> +       nv_wr32(device, 0x103d94, 0); /* FIFO_CTRL */
> +
> +       nouveau_gpuobj_ref(NULL, &priv->gpu_fw);
> +
> +       return nouveau_engine_fini(&priv->base, suspend);
> +}
> +
>  struct nouveau_oclass
>  nv84_bsp_oclass = {
>         .handle = NV_ENGINE(BSP, 0x84),
>         .ofuncs = &(struct nouveau_ofuncs) {
>                 .ctor = nv84_bsp_ctor,
> -               .dtor = _nouveau_engine_dtor,
> -               .init = _nouveau_engine_init,
> -               .fini = _nouveau_engine_fini,
> +               .dtor = nv84_bsp_dtor,
> +               .init = nv84_bsp_init,
> +               .fini = nv84_bsp_fini,
>         },
>  };
> diff --git a/drivers/gpu/drm/nouveau/core/engine/fifo/nv84.c b/drivers/gpu/drm/nouveau/core/engine/fifo/nv84.c
> index 35b94bd..7f53196 100644
> --- a/drivers/gpu/drm/nouveau/core/engine/fifo/nv84.c
> +++ b/drivers/gpu/drm/nouveau/core/engine/fifo/nv84.c
> @@ -56,7 +56,9 @@ nv84_fifo_context_attach(struct nouveau_object *parent,
>         switch (nv_engidx(object->engine)) {
>         case NVDEV_ENGINE_SW   : return 0;
>         case NVDEV_ENGINE_GR   : addr = 0x0020; break;
> +       case NVDEV_ENGINE_VP   : addr = 0x0040; break;
>         case NVDEV_ENGINE_MPEG : addr = 0x0060; break;
> +       case NVDEV_ENGINE_BSP  : addr = 0x0080; break;
>         case NVDEV_ENGINE_CRYPT: addr = 0x00a0; break;
>         case NVDEV_ENGINE_COPY0: addr = 0x00c0; break;
>         default:
> @@ -89,7 +91,9 @@ nv84_fifo_context_detach(struct nouveau_object *parent, bool suspend,
>         switch (nv_engidx(object->engine)) {
>         case NVDEV_ENGINE_SW   : return 0;
>         case NVDEV_ENGINE_GR   : engn = 0; addr = 0x0020; break;
> +       case NVDEV_ENGINE_VP   : engn = 3; addr = 0x0040; break;
>         case NVDEV_ENGINE_MPEG : engn = 1; addr = 0x0060; break;
> +       case NVDEV_ENGINE_BSP  : engn = 5; addr = 0x0080; break;
>         case NVDEV_ENGINE_CRYPT: engn = 4; addr = 0x00a0; break;
>         case NVDEV_ENGINE_COPY0: engn = 2; addr = 0x00c0; break;
>         default:
> diff --git a/drivers/gpu/drm/nouveau/core/engine/vp/nv84.c b/drivers/gpu/drm/nouveau/core/engine/vp/nv84.c
> index 261cd96..c0fa8e7 100644
> --- a/drivers/gpu/drm/nouveau/core/engine/vp/nv84.c
> +++ b/drivers/gpu/drm/nouveau/core/engine/vp/nv84.c
> @@ -29,6 +29,10 @@
>
>  struct nv84_vp_priv {
>         struct nouveau_engine base;
> +       u32 *fw;
> +       long fw_size;
> +       struct nouveau_gpuobj *gpu_fw;
> +       void *vm_gpu_fw;
>  };
>
>  /*******************************************************************************
> @@ -37,6 +41,7 @@ struct nv84_vp_priv {
>
>  static struct nouveau_oclass
>  nv84_vp_sclass[] = {
> +       { 0x7476, &nouveau_object_ofuncs },
>         {},
>  };
>
> @@ -44,11 +49,27 @@ nv84_vp_sclass[] = {
>   * PVP context
>   ******************************************************************************/
>
> +static int
> +nv84_vp_engctx_ctor(struct nouveau_object *parent,
> +                   struct nouveau_object *engine,
> +                   struct nouveau_oclass *oclass, void *data, u32 size,
> +                   struct nouveau_object **pobject)
> +{
> +       struct nouveau_engctx *engctx;
> +       int ret;
> +
> +       ret = nouveau_engctx_create(parent, engine, oclass, NULL,
> +                                   0x10000, 0x1000,
> +                                   NVOBJ_FLAG_ZERO_ALLOC, &engctx);
> +       *pobject = nv_object(engctx);
> +       return ret;
> +}
> +
>  static struct nouveau_oclass
>  nv84_vp_cclass = {
>         .handle = NV_ENGCTX(VP, 0x84),
>         .ofuncs = &(struct nouveau_ofuncs) {
> -               .ctor = _nouveau_engctx_ctor,
> +               .ctor = nv84_vp_engctx_ctor,
>                 .dtor = _nouveau_engctx_dtor,
>                 .init = _nouveau_engctx_init,
>                 .fini = _nouveau_engctx_fini,
> @@ -61,6 +82,24 @@ nv84_vp_cclass = {
>   * PVP engine/subdev functions
>   ******************************************************************************/
>
> +static void
> +nv84_vp_intr(struct nouveau_subdev *subdev)
> +{
> +       struct nv84_vp_priv *priv = (void *)subdev;
> +       u32 intr, unk104, unk10c, chan;
> +
> +       unk104 = nv_rd32(priv, 0xfd04);
> +       intr = nv_rd32(priv, 0xfc20);
> +       chan = nv_rd32(priv, 0xfc28);
> +       unk10c = nv_rd32(priv, 0xfd0c);
> +       nv_wr32(priv, 0xfc20, intr);
> +       intr = nv_rd32(priv, 0xfc20);
> +       if (unk104 == 0x10001 && unk10c == 0x200 && chan && !intr) {
> +               nv_debug(priv, "Enabling VP.FIFO_CTRL\n");
> +               nv_mask(priv, 0xfd94, 0, 0x111); /* FIFO_CTRL */
> +       }
> +}
> +
>  static int
>  nv84_vp_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
>              struct nouveau_oclass *oclass, void *data, u32 size,
> @@ -68,6 +107,8 @@ nv84_vp_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
>  {
>         struct nv84_vp_priv *priv;
>         int ret;
> +       const struct firmware *fw;
> +       struct nouveau_device *device = nv_device(parent);
>
>         ret = nouveau_engine_create(parent, engine, oclass, true,
>                                     "PVP", "vp", &priv);
> @@ -78,16 +119,107 @@ nv84_vp_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
>         nv_subdev(priv)->unit = 0x01020000;
>         nv_engine(priv)->cclass = &nv84_vp_cclass;
>         nv_engine(priv)->sclass = nv84_vp_sclass;
> +       ret = request_firmware(&fw, "nouveau/nv84_vp", &device->pdev->dev);
> +       if (ret) {
> +               nv_warn(priv, "Firmware for NV84 VP unavailable.\n");
> +               return 0;
> +       }
> +
> +       nv_subdev(priv)->intr = nv84_vp_intr;
> +
> +       priv->fw = kmemdup(fw->data, fw->size, GFP_KERNEL);
> +       priv->fw_size = fw->size;
> +       release_firmware(fw);
> +       if (!priv->fw)
> +               return -ENOMEM;
> +
>         return 0;
>  }
>
> +static void
> +nv84_vp_dtor(struct nouveau_object *object)
> +{
> +       struct nv84_vp_priv *priv = (void *)object;
> +       kfree(priv->fw);
> +}
> +
> +static int
> +nv84_vp_init(struct nouveau_object *object)
> +{
> +       struct nouveau_device *device = nv_device(object);
> +       struct nv84_vp_priv *priv = (void *)object;
> +       int i, ret;
> +       u32 tmp;
> +
> +       if (!priv->fw)
> +               return -EINVAL;
> +
> +       ret = nouveau_engine_init(&priv->base);
> +       if (ret)
> +               return ret;
> +
> +       ret = nouveau_gpuobj_new(object, NULL, 0x40000, 0x1000, 0,
> +                                &priv->gpu_fw);
> +       if (ret)
> +               return ret;
> +
> +       tmp = nv_rd32(device, 0xfc20); /* INTR */
> +       if (tmp)
> +               nv_warn(priv, "Unexpected read from XTENSA.INTR: 0x%x", tmp);
> +
> +       nv_mask(device, 0x2090, 0x0000f000, 0x8 << 12); /* PFIFO.UNK90 */
> +       nv_wr32(device, 0xfd10, 0x1fffffff); /* ?? */
> +       nv_wr32(device, 0xfd08, 0x0fffffff); /* ?? */
> +       nv_wr32(device, 0xf010, 0x30); /* ?? */
> +       nv_wr32(device, 0xfd00, 0x4); /* ?? */
> +       nv_mask(device, 0xfd98, 0x10, 0x10); /* ?? */
> +
> +       nv_wr32(device, 0xfd28, 0x9c544); /* ?? */
> +       nv_wr32(device, 0xfc20, 0x3f); /* INTR */
> +       nv_wr32(device, 0xfd84, 0x3f); /* INTR_EN */
> +
> +       nv_debug(priv, "Loading firmware to address: 0x%llx\n",
> +                priv->gpu_fw->addr);
> +
> +       for (i = 0; i < priv->fw_size / 4; i++)
> +               nv_wo32(priv->gpu_fw, i * 4, priv->fw[i]);
> +
> +       nv_wr32(device, 0xfcc0, priv->gpu_fw->addr >> 8); /* XT_REGION_BASE */
> +       nv_wr32(device, 0xfcc4, 0x1c); /* XT_REGION_SETUP */
> +       nv_wr32(device, 0xfcc8, priv->gpu_fw->size >> 8); /* REGION_LIMIT */
> +
> +       tmp = nv_rd32(device, 0x0);
> +       nv_wr32(device, 0xfde0, tmp); /* SCRATCH_H2X */
> +
> +       nv_wr32(device, 0xfce8, 0xf); /* XT_REGION_SETUP */
> +
> +       nv_wr32(device, 0xfc20, 0x3f); /* INTR */
> +       nv_wr32(device, 0xfd84, 0x3f); /* INTR_EN */
> +
> +       return 0;
> +}
> +
> +static int
> +nv84_vp_fini(struct nouveau_object *object, bool suspend)
> +{
> +       struct nouveau_device *device = nv_device(object);
> +       struct nv84_vp_priv *priv = (void *)object;
> +
> +       nv_wr32(device, 0xfd84, 0); /* INTR_EN */
> +       nv_wr32(device, 0xfd94, 0); /* FIFO_CTRL */
> +
> +       nouveau_gpuobj_ref(NULL, &priv->gpu_fw);
> +
> +       return nouveau_engine_fini(&priv->base, suspend);
> +}
> +
>  struct nouveau_oclass
>  nv84_vp_oclass = {
>         .handle = NV_ENGINE(VP, 0x84),
>         .ofuncs = &(struct nouveau_ofuncs) {
>                 .ctor = nv84_vp_ctor,
> -               .dtor = _nouveau_engine_dtor,
> -               .init = _nouveau_engine_init,
> -               .fini = _nouveau_engine_fini,
> +               .dtor = nv84_vp_dtor,
> +               .init = nv84_vp_init,
> +               .fini = nv84_vp_fini,
>         },
>  };
> diff --git a/drivers/gpu/drm/nouveau/core/subdev/mc/nv50.c b/drivers/gpu/drm/nouveau/core/subdev/mc/nv50.c
> index d796924..0cb322a 100644
> --- a/drivers/gpu/drm/nouveau/core/subdev/mc/nv50.c
> +++ b/drivers/gpu/drm/nouveau/core/subdev/mc/nv50.c
> @@ -35,6 +35,7 @@ nv50_mc_intr[] = {
>         { 0x00001000, NVDEV_ENGINE_GR },
>         { 0x00004000, NVDEV_ENGINE_CRYPT },     /* NV84- */
>         { 0x00008000, NVDEV_ENGINE_BSP },       /* NV84- */
> +       { 0x00020000, NVDEV_ENGINE_VP },        /* NV84- */
>         { 0x00100000, NVDEV_SUBDEV_TIMER },
>         { 0x00200000, NVDEV_SUBDEV_GPIO },
>         { 0x04000000, NVDEV_ENGINE_DISP },
> diff --git a/drivers/gpu/drm/nouveau/core/subdev/vm/nv50.c b/drivers/gpu/drm/nouveau/core/subdev/vm/nv50.c
> index 83c62a7..f88287a 100644
> --- a/drivers/gpu/drm/nouveau/core/subdev/vm/nv50.c
> +++ b/drivers/gpu/drm/nouveau/core/subdev/vm/nv50.c
> @@ -168,8 +168,10 @@ nv50_vm_flush(struct nouveau_vm *vm)
>
>                 switch (i) {
>                 case NVDEV_ENGINE_GR   : vme = 0x00; break;
> +               case NVDEV_ENGINE_VP   : vme = 0x01; break;
>                 case NVDEV_SUBDEV_BAR  : vme = 0x06; break;
>                 case NVDEV_ENGINE_MPEG : vme = 0x08; break;
> +               case NVDEV_ENGINE_BSP  : vme = 0x09; break;
>                 case NVDEV_ENGINE_CRYPT: vme = 0x0a; break;
>                 case NVDEV_ENGINE_COPY0: vme = 0x0d; break;
>                 default:
> --
> 1.8.1.5
>


More information about the dri-devel mailing list