[Nouveau] [PATCH 1/4] kernel: nv vpe
Jimmy Rentz
jb17bsome at gmail.com
Thu Aug 5 19:40:29 PDT 2010
This patch includes all the relevant nv vpe kernel support.
This patch applies against the latest nouveau-linux-2.6.
Though, the makefile might need adjusting.
Some notes about the decoder engine:
* It is composed of the mmio control registers, fifo and the
output surfaces.
* The fifo pushbuffer can be allocated from vram or agp.
AGP is not working right now but it should in theory.
* Output surfaces for the luma+chroma data can be only be
allocated from vram.
* Since only one set of mmio control registers exist only one client
app can use the engine at a time. I suppose it might be possible to
support context switching but that might be too slow to be useful.
Client usage:
* Client app calls the vpe channel create ioctl to setup the hw
and fifo pushbuffer.
* Client app creates all the output surfaces via buffer objects.
* Client apps writes a set of cmds to the pushbuffer then calls
the fire ioctl to kick of a decode of a cmd sequence.
* Client app calls the query ioctl to see when an output surface is done
rendering.
Some notes about the kernel implementation:
* Both user and kernel submission of pushbuffers is supported.
I originally implemented the kernel submission via a copy of the
pushbuffer. The user-space pushbuffer was added later for performance
reaons. Though, you still need to call the kernel to fire since mmio
access is not allowed for user-mode.
* The output surface must be pinned in memory until the rendering is done.
A sequence type fence exists that lets you query when a given output surface
is done decoding. This would make it possible to free a surface if you want.
The kernel would then automatically unpin the surface if you replace it later.
Realistically, it wouldn't be smart for performance reasons to free these surfaces.
Signed-off-by: Jimmy Rentz <jb17bsome at gmail.com>
diff --git a/drivers/gpu/drm/nouveau/Makefile b/drivers/gpu/drm/nouveau/Makefile
index 2405d5e..7a6d699 100644
--- a/drivers/gpu/drm/nouveau/Makefile
+++ b/drivers/gpu/drm/nouveau/Makefile
@@ -23,7 +23,7 @@ nouveau-y := nouveau_drv.o nouveau_state.o nouveau_channel.o nouveau_mem.o \
nv04_dac.o nv04_dfp.o nv04_tv.o nv17_tv.o nv17_tv_modes.o \
nv04_crtc.o nv04_display.o nv04_cursor.o nv04_fbcon.o \
nv10_gpio.o nv50_gpio.o \
- nv50_calc.o
+ nv50_calc.o nouveau_vd_vpe.o
nouveau-$(CONFIG_DRM_NOUVEAU_DEBUG) += nouveau_debugfs.o
nouveau-$(CONFIG_COMPAT) += nouveau_ioc32.o
diff --git a/drivers/gpu/drm/nouveau/nouveau_channel.c b/drivers/gpu/drm/nouveau/nouveau_channel.c
index e952c3b..cfbc981 100644
--- a/drivers/gpu/drm/nouveau/nouveau_channel.c
+++ b/drivers/gpu/drm/nouveau/nouveau_channel.c
@@ -336,6 +336,15 @@ nouveau_channel_cleanup(struct drm_device *dev, struct drm_file *file_priv)
if (chan && chan->file_priv == file_priv)
nouveau_channel_free(chan);
}
+
+ if (dev_priv->vpe_channel) {
+ NV_DEBUG(dev, "clearing VPE channel from file_priv\n");
+ struct nouveau_vd_vpe_channel *vpe_channel;
+ vpe_channel = dev_priv->vpe_channel;
+
+ if (vpe_channel->file_priv == file_priv)
+ nouveau_vpe_channel_free(vpe_channel);
+ }
}
int
@@ -437,6 +446,14 @@ struct drm_ioctl_desc nouveau_ioctls[] = {
DRM_IOCTL_DEF(DRM_NOUVEAU_GEM_CPU_PREP, nouveau_gem_ioctl_cpu_prep, DRM_AUTH),
DRM_IOCTL_DEF(DRM_NOUVEAU_GEM_CPU_FINI, nouveau_gem_ioctl_cpu_fini, DRM_AUTH),
DRM_IOCTL_DEF(DRM_NOUVEAU_GEM_INFO, nouveau_gem_ioctl_info, DRM_AUTH),
+ DRM_IOCTL_DEF(DRM_NOUVEAU_VD_VPE_CHANNEL_ALLOC,
+ nouveau_vd_vpe_ioctl_channel_alloc, DRM_AUTH),
+ DRM_IOCTL_DEF(DRM_NOUVEAU_VD_VPE_CHANNEL_FREE,
+ nouveau_vd_vpe_ioctl_channel_free, DRM_AUTH),
+ DRM_IOCTL_DEF(DRM_NOUVEAU_VD_VPE_PUSHBUF_FIRE,
+ nouveau_vd_vpe_ioctl_pushbuf_fire, DRM_AUTH),
+ DRM_IOCTL_DEF(DRM_NOUVEAU_VD_VPE_SURFACE_QUERY,
+ nouveau_vd_vpe_ioctl_surface_query, DRM_AUTH),
};
int nouveau_max_ioctl = DRM_ARRAY_SIZE(nouveau_ioctls);
diff --git a/drivers/gpu/drm/nouveau/nouveau_debugfs.c b/drivers/gpu/drm/nouveau/nouveau_debugfs.c
index 7933de4..cc3387d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_debugfs.c
+++ b/drivers/gpu/drm/nouveau/nouveau_debugfs.c
@@ -117,6 +117,117 @@ nouveau_debugfs_channel_fini(struct nouveau_channel *chan)
}
}
+static
+int nouveau_debugfs_vpe_channel_info(struct seq_file *m, void *data)
+{
+ struct drm_info_node *node = (struct drm_info_node *) m->private;
+ struct nouveau_vd_vpe_channel *chan = node->info_ent->data;
+ int i;
+ uint32_t val;
+
+ seq_printf(m, "cpu fifo state:\n");
+ seq_printf(m, " max: 0x%08x\n", chan->dma.max << 2);
+ seq_printf(m, " cur: 0x%08x\n", chan->dma.cur << 2);
+ seq_printf(m, " put: 0x%08x\n", chan->dma.put << 2);
+ seq_printf(m, " free: 0x%08x\n", chan->dma.free << 2);
+
+ seq_printf(m, "vpe fifo state:\n");
+ seq_printf(m, " config: 0x%08x\n",
+ nv_rd32(chan->dev, NV_VPE_MPEG2_USER_CONFIG));
+ seq_printf(m, " offset: 0x%08x\n",
+ nv_rd32(chan->dev, NV_VPE_MPEG2_USER_OFFSET));
+ seq_printf(m, " size: 0x%08x\n",
+ nv_rd32(chan->dev, NV_VPE_MPEG2_USER_SIZE));
+ seq_printf(m, " get: 0x%08x\n",
+ nv_rd32(chan->dev, NV_VPE_MPEG2_USER_GET));
+ seq_printf(m, " put: 0x%08x\n",
+ nv_rd32(chan->dev, NV_VPE_MPEG2_USER_PUT));
+ seq_printf(m, " get.seq: 0x%08x\n",
+ nv_rd32(chan->dev, NV_VPE_MPEG2_SEQUENCE_GET));
+ seq_printf(m, " put.seq: 0x%08x\n",
+ chan->dma.sequence);
+
+ seq_printf(m, "vpe engine status:\n");
+ seq_printf(m, " engine_config_1: 0x%08x\n",
+ nv_rd32(chan->dev, NV_VPE_MPEG2_ENGINE_CONFIG_1));
+ seq_printf(m, " engine_config_2: 0x%08x\n",
+ nv_rd32(chan->dev, NV_VPE_MPEG2_ENGINE_CONFIG_2));
+ seq_printf(m, " engine_setup_1: 0x%08x\n",
+ nv_rd32(chan->dev, NV_VPE_MPEG2_ENGINE_SETUP_1));
+ seq_printf(m, " engine_setup_2: 0x%08x\n",
+ nv_rd32(chan->dev, NV_VPE_MPEG2_ENGINE_SETUP_2));
+ seq_printf(m, " engine_reader_config: 0x%08x\n",
+ nv_rd32(chan->dev, NV_VPE_MPEG2_ENGINE_READER_CONFIG));
+ seq_printf(m, " engine_processing_status: 0x%08x\n",
+ nv_rd32(chan->dev, NV_VPE_MPEG2_ENGINE_STATUS));
+ seq_printf(m, " engine_status: 0x%08x\n",
+ nv_rd32(chan->dev, NV_VPE_MPEG2_ENGINE_CONTROL));
+
+ seq_printf(m, "vpe decode surface config:\n");
+ val = nv_rd32(chan->dev, NV_VPE_MPEG2_SURFACE_INFO);
+ seq_printf(m, " info: 0x%08X\n",
+ val);
+ val = nv_rd32(chan->dev, NV_VPE_MPEG2_CONTEXT_DIMENSIONS);
+ seq_printf(m, " dimensions: width = %d, height = %d\n",
+ (val >> 16) & 0xFFF, val & 0xFFF);
+
+ seq_printf(m, "vpe decode surface fb offsets:\n");
+ for (i = 0; i < ARRAY_SIZE(chan->surface); i++) {
+ seq_printf(m, " luma.[0x%08X] = 0x%08X\n", i,
+ nv_rd32(chan->dev, NV_VPE_MPEG2_LUMA_SURFACE_OFFSET_GET(i)));
+ seq_printf(m, " chroma.[0x%08X] = 0x%08X\n", i,
+ nv_rd32(chan->dev, NV_VPE_MPEG2_CHROMA_SURFACE_OFFSET_GET(i)));
+ }
+
+ return 0;
+}
+
+int nouveau_debugfs_vpe_channel_init(struct nouveau_vd_vpe_channel *chan)
+{
+ struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
+ struct drm_minor *minor = chan->dev->primary;
+ int ret;
+
+ if (!dev_priv->debugfs.vpe_channel_root) {
+ dev_priv->debugfs.vpe_channel_root =
+ debugfs_create_dir("vpe_channel", minor->debugfs_root);
+ if (!dev_priv->debugfs.vpe_channel_root)
+ return -ENOENT;
+ }
+
+ strcpy(chan->debugfs.name, "0");
+ chan->debugfs.info.name = chan->debugfs.name;
+ chan->debugfs.info.show = nouveau_debugfs_vpe_channel_info;
+ chan->debugfs.info.driver_features = 0;
+ chan->debugfs.info.data = chan;
+
+ ret = drm_debugfs_create_files(&chan->debugfs.info, 1,
+ dev_priv->debugfs.vpe_channel_root,
+ chan->dev->primary);
+ if (ret == 0)
+ chan->debugfs.active = true;
+ return ret;
+}
+
+void
+nouveau_debugfs_vpe_channel_fini(struct nouveau_vd_vpe_channel *chan)
+{
+ struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
+
+ if (!chan->debugfs.active)
+ return;
+
+ drm_debugfs_remove_files(&chan->debugfs.info, 1, chan->dev->primary);
+ chan->debugfs.active = false;
+
+ if (chan == dev_priv->vpe_channel) {
+ debugfs_remove(dev_priv->debugfs.vpe_channel_root);
+ dev_priv->debugfs.vpe_channel_root = NULL;
+ }
+}
+
+
+
static int
nouveau_debugfs_chipset_info(struct seq_file *m, void *data)
{
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index da62e92..150cbf9 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -502,6 +502,38 @@ struct nv04_mode_state {
struct nv04_crtc_reg crtc_reg[2];
};
+struct nouveau_vd_vpe_surface {
+ struct nouveau_bo *luma_bo;
+ struct nouveau_bo *chroma_bo;
+ uint32_t dma_sequence;
+};
+
+struct nouveau_vd_vpe_channel {
+ struct drm_device *dev;
+ struct drm_file *file_priv;
+ uint32_t width;
+ uint32_t height;
+
+ /* Push buffer state */
+ struct {
+ uint32_t max;
+ uint32_t cur;
+ uint32_t put;
+ uint32_t free;
+ uint32_t sequence;
+ /* access via pushbuf_bo */
+ } dma;
+
+ struct nouveau_bo *pushbuf_bo;
+ struct nouveau_vd_vpe_surface surface[8];
+
+ struct {
+ bool active;
+ char name[32];
+ struct drm_info_list info;
+ } debugfs;
+};
+
enum nouveau_card_type {
NV_04 = 0x00,
NV_10 = 0x10,
@@ -626,10 +658,13 @@ struct drm_nouveau_private {
struct {
struct dentry *channel_root;
+ struct dentry *vpe_channel_root;
} debugfs;
struct nouveau_fbdev *nfbdev;
struct apertures_struct *apertures;
+
+ struct nouveau_vd_vpe_channel *vpe_channel;
};
static inline struct drm_nouveau_private *
@@ -667,6 +702,16 @@ nouveau_bo_ref(struct nouveau_bo *ref, struct nouveau_bo **pnvbo)
(ch) = nv->fifos[(id)]; \
} while (0)
+#define NOUVEAU_GET_VPE_CHANNEL_WITH_RETURN(id, ch) do { \
+ struct drm_nouveau_private *nv = dev->dev_private; \
+ if (nv->vpe_channel && (nv->vpe_channel->file_priv != id)) { \
+ NV_ERROR(dev, "pid %d doesn't own vpe channel\n", \
+ DRM_CURRENTPID); \
+ return -EPERM; \
+ } \
+ (ch) = nv->vpe_channel; \
+} while (0)
+
/* nouveau_drv.c */
extern int nouveau_noagp;
extern int nouveau_duallink;
@@ -811,6 +856,8 @@ extern int nouveau_debugfs_init(struct drm_minor *);
extern void nouveau_debugfs_takedown(struct drm_minor *);
extern int nouveau_debugfs_channel_init(struct nouveau_channel *);
extern void nouveau_debugfs_channel_fini(struct nouveau_channel *);
+extern int nouveau_debugfs_vpe_channel_init(struct nouveau_vd_vpe_channel *);
+extern void nouveau_debugfs_vpe_channel_fini(struct nouveau_vd_vpe_channel *);
#else
static inline int
nouveau_debugfs_init(struct drm_minor *minor)
@@ -832,6 +879,17 @@ static inline void
nouveau_debugfs_channel_fini(struct nouveau_channel *chan)
{
}
+
+static inline int
+nouveau_debugfs_vpe_channel_init(struct nouveau_vd_vpe_channel *chan)
+{
+ return 0;
+}
+
+static inline void
+nouveau_debugfs_vpe_channel_fini(struct nouveau_vd_vpe_channel *chan)
+{
+}
#endif
/* nouveau_dma.c */
@@ -1161,6 +1219,17 @@ extern int nouveau_gem_ioctl_cpu_fini(struct drm_device *, void *,
extern int nouveau_gem_ioctl_info(struct drm_device *, void *,
struct drm_file *);
+/* nouveau_vd_vpe.c */
+extern void nouveau_vpe_channel_free(struct nouveau_vd_vpe_channel *);
+extern int nouveau_vd_vpe_ioctl_channel_alloc(struct drm_device *, void *,
+ struct drm_file *);
+extern int nouveau_vd_vpe_ioctl_channel_free(struct drm_device *, void *,
+ struct drm_file *);
+extern int nouveau_vd_vpe_ioctl_pushbuf_fire(struct drm_device *, void *,
+ struct drm_file *);
+extern int nouveau_vd_vpe_ioctl_surface_query(struct drm_device *, void *,
+ struct drm_file *);
+
/* nv10_gpio.c */
int nv10_gpio_get(struct drm_device *dev, enum dcb_gpio_tag tag);
int nv10_gpio_set(struct drm_device *dev, enum dcb_gpio_tag tag, int state);
diff --git a/drivers/gpu/drm/nouveau/nouveau_reg.h b/drivers/gpu/drm/nouveau/nouveau_reg.h
index 9c1056c..3dd8308 100644
--- a/drivers/gpu/drm/nouveau/nouveau_reg.h
+++ b/drivers/gpu/drm/nouveau/nouveau_reg.h
@@ -176,6 +176,37 @@
#define NV04_PTIMER_TIME_1 0x00009410
#define NV04_PTIMER_ALARM_0 0x00009420
+/* The NV VPE MPEG2 control registers that exist on NV40 and NV30 and
+ * some other older boards possibly.*/
+#define NV_VPE_MPEG2_ENGINE_CONFIG_1 0x0000B0E0
+#define NV_VPE_MPEG2_ENGINE_CONFIG_2 0x0000B0E8
+#define NV_VPE_MPEG2_ENGINE_SETUP_1 0x0000B100
+#define NV_VPE_MPEG2_ENGINE_SETUP_2 0x0000B140
+#define NV_VPE_MPEG2_ENGINE_STATUS 0x0000B200
+#define NV_VPE_MPEG2_ENGINE_READER_CONFIG 0x0000B204
+#define NV_VPE_MPEG2_USER_CONFIG 0x0000B300
+# define NV_VPE_MPEG2_USER_NOT_PRESENT 0x020F0200
+# define NV_VPE_MPEG2_USER_PRESENT 0x02001ec1
+# define NV_VPE_MPEG2_USER_VRAM (0 << 16)
+# define NV_VPE_MPEG2_USER_AGP_OR_PCI (1 << 16)
+# define NV_VPE_MPEG2_USER_AGP_OR_PCI_READY (2 << 16)
+/* Complete guess here about pcie.*/
+# define NV_VPE_MPEG2_USER_PCIE (8 << 16)
+#define NV_VPE_MPEG2_UNKNOWN_SETUP_3 0x0000B314
+#define NV_VPE_MPEG2_USER_OFFSET 0x0000B320
+#define NV_VPE_MPEG2_USER_SIZE 0x0000B324
+#define NV_VPE_MPEG2_USER_PUT 0x0000B328
+#define NV_VPE_MPEG2_USER_GET 0x0000B330
+#define NV_VPE_MPEG2_ENGINE_CONTROL 0x0000B32C
+# define NV_VPE_MPEG2_ENGINE_STOP 0
+# define NV_VPE_MPEG2_ENGINE_START 1
+#define NV_VPE_MPEG2_SEQUENCE_GET 0x0000B340
+#define NV_VPE_MPEG2_SURFACE_INFO 0x0000B378
+#define NV_VPE_MPEG2_CONTEXT_DIMENSIONS 0x0000B37C
+#define NV_VPE_MPEG2_LUMA_SURFACE_OFFSET_GET(s) (0x0000B450 + (s * 8))
+#define NV_VPE_MPEG2_CHROMA_SURFACE_OFFSET_GET(s) (0x0000B454 + (s * 8))
+#define NV_VPE_MPEG2_ENGINE_STATUS_1 0x0000B848
+
#define NV04_PGRAPH_DEBUG_0 0x00400080
#define NV04_PGRAPH_DEBUG_1 0x00400084
#define NV04_PGRAPH_DEBUG_2 0x00400088
diff --git a/drivers/gpu/drm/nouveau/nouveau_vd_vpe.c b/drivers/gpu/drm/nouveau/nouveau_vd_vpe.c
new file mode 100644
index 0000000..149f10b
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nouveau_vd_vpe.c
@@ -0,0 +1,1218 @@
+/*
+ * Copyright (C) 2010 Jimmy Rentz
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "drmP.h"
+#include "drm.h"
+
+#include "nouveau_drv.h"
+#include "nouveau_drm.h"
+#include "nouveau_vpe_hw.h"
+
+/* VPE MPEG2 HW notes:
+ * - There is a 64byte fetch size. That is why each set of commands must
+ * be aligned on a 64 byte boundary for firing.
+ * - One fetch of cmds seem to process in 1 microsecond on my nv4e.
+ * However, I presume this can vary based on the hw and nature of commands.
+ * - Each firing of a set of commands must be followed by a small delay.
+ * The main reason is to avoid overwhelming the hw.
+ * The delays below were determined from testing/measuring. I doubt they
+ are perfect and they could be tweaked a bit.*/
+
+/* Channel/Surface init commands should not take long to process.*/
+#define VPE_UDELAY_FIRE_INIT 4
+
+/* Normal firing needs this type of delay.*/
+#define VPE_UDELAY_FIRE_NORMAL 35
+
+/* Need a longer delay at the end of the fifo since it takes longer.*/
+#define VPE_UDELAY_FIRE_END 100
+
+/* Set if you want to validate vpe user cmds.
+ * Otherwise, they are copied asis.
+ * The reason this exists is because a user could set a vpe surface to
+ * point to the visible framebuffer, etc. However, the user could never
+ * make a vpe surface use a gart address since it isn't supported by the
+ * hardware.*/
+/*#define NOUVEAU_VPE_VALIDATE_USER_CMDS*/
+
+/* TODO - Export this from nouveau_gem.c*/
+/* Needed to copy userspace pushbuffers that are sent to the vpe hw.*/
+static inline void *
+_u_memcpya(uint64_t user, unsigned nmemb, unsigned size)
+{
+ void *mem;
+ void __user *userptr = (void __force __user *)(uintptr_t)user;
+
+ mem = kmalloc(nmemb * size, GFP_KERNEL);
+ if (!mem)
+ return ERR_PTR(-ENOMEM);
+
+ if (DRM_COPY_FROM_USER(mem, userptr, nmemb * size)) {
+ kfree(mem);
+ return ERR_PTR(-EFAULT);
+ }
+
+ return mem;
+}
+
+/* Internal */
+static inline void
+nouveau_vpe_cmd_write(struct nouveau_vd_vpe_channel *vpe_channel,
+ uint32_t value)
+{
+ nouveau_bo_wr32(vpe_channel->pushbuf_bo, vpe_channel->dma.cur++,
+ value);
+ vpe_channel->dma.free--;
+
+ if (vpe_channel->dma.cur == vpe_channel->dma.max) {
+ vpe_channel->dma.cur = 0;
+ vpe_channel->dma.free = vpe_channel->dma.max;
+ }
+}
+
+static inline void
+nouveau_vpe_cmd_align(struct nouveau_vd_vpe_channel *vpe_channel)
+{
+ uint32_t nop_count;
+ uint32_t cmd_sequence_count;
+ int i;
+
+ /* Alignment is needed when ending cmd sequences.*/
+ cmd_sequence_count = vpe_channel->dma.cur - vpe_channel->dma.put;
+ nop_count = ALIGN(cmd_sequence_count, NV_VPE_CMD_ALIGNMENT);
+ nop_count -= cmd_sequence_count;
+
+ for (i = 0; i < nop_count; i++)
+ nouveau_vpe_cmd_write(vpe_channel, NV_VPE_CMD_NOP <<
+ NV_VPE_CMD_TYPE_SHIFT);
+}
+
+static inline void
+nouveau_vpe_fire(struct nouveau_vd_vpe_channel *vpe_channel, uint64_t delay)
+{
+ struct drm_device *dev = vpe_channel->dev;
+ uint32_t put;
+
+ DRM_MEMORYBARRIER();
+
+ put = (vpe_channel->dma.cur / NV_VPE_CMD_ALIGNMENT) *
+ NV_VPE_CMD_ALIGNMENT;
+
+ nouveau_bo_rd32(vpe_channel->pushbuf_bo, put);
+
+ nv_wr32(dev, NV_VPE_MPEG2_USER_PUT, put << 2);
+
+ vpe_channel->dma.put = put;
+
+ if (delay)
+ DRM_UDELAY(delay);
+}
+
+static uint32_t
+nouveau_vpe_channel_read_get(struct nouveau_vd_vpe_channel *vpe_channel)
+{
+ struct drm_device *dev = vpe_channel->dev;
+
+ return nv_rd32(dev, NV_VPE_MPEG2_USER_GET) >> 2;
+}
+
+static int
+nouveau_vpe_wait_until_engine_idle(struct nouveau_vd_vpe_channel *vpe_channel)
+{
+ struct drm_device *dev = vpe_channel->dev;
+
+ if (!nouveau_wait_until(dev, 10000000, NV_VPE_MPEG2_ENGINE_STATUS,
+ 0x0FFFFFFF, 0)) {
+ NV_ERROR(dev, "nouveau_vpe_wait_until_engine_idle - engine is not"
+ " idle. status = 0x%08X.\n",
+ nv_rd32(dev, NV_VPE_MPEG2_ENGINE_STATUS));
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int
+nouveau_vpe_channel_wait(struct nouveau_vd_vpe_channel *vpe_channel,
+ uint32_t put)
+{
+ uint32_t get;
+ uint32_t prev_get = 0;
+ bool is_beg = (put == 0) || (vpe_channel->dma.put == 0);
+ uint32_t cnt = 0;
+
+ get = prev_get = nouveau_vpe_channel_read_get(vpe_channel);
+
+ while ((!is_beg && (get < put)) ||
+ (is_beg && (get != 0))) {
+
+ /* reset counter as long as GET is still advancing, this is
+ * to avoid misdetecting a GPU lockup if the GPU happens to
+ * just be processing an operation that takes a long time
+ */
+ get = nouveau_vpe_channel_read_get(vpe_channel);
+ if (get != prev_get) {
+ prev_get = get;
+ cnt = 0;
+ }
+
+ if ((++cnt & 0xff) == 0) {
+ DRM_UDELAY(1);
+ if (cnt > 100000) {
+ NV_ERROR(vpe_channel->dev, "nouveau_vpe_channel_wait - lockup. "
+ "cur = 0x%08X, put = 0x%08X, get = 0x%08X, put.seq = %u,"
+ "get.seq = %u, ec1 = 0x%08X, ec2 = 0x%08X, es = 0x%08X.\n",
+ vpe_channel->dma.cur, put,
+ nouveau_vpe_channel_read_get(vpe_channel),
+ vpe_channel->dma.sequence,
+ nv_rd32(vpe_channel->dev, NV_VPE_MPEG2_SEQUENCE_GET),
+ nv_rd32(vpe_channel->dev, NV_VPE_MPEG2_ENGINE_CONFIG_1),
+ nv_rd32(vpe_channel->dev, NV_VPE_MPEG2_ENGINE_CONFIG_2),
+ nv_rd32(vpe_channel->dev, NV_VPE_MPEG2_ENGINE_STATUS));
+ return -EBUSY;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static void
+nouveau_vpe_cmd_end_sequence_header(struct nouveau_vd_vpe_channel *vpe_channel)
+{
+ nouveau_vpe_cmd_write(vpe_channel, NV_VPE_CMD_END_SEQUENCE <<
+ NV_VPE_CMD_TYPE_SHIFT | NV_VPE_CMD_SEQUENCE << 24);
+
+ nouveau_vpe_cmd_write(vpe_channel, ++vpe_channel->dma.sequence);
+}
+
+static void
+nouveau_vpe_cmd_end_sequence_trailer(struct nouveau_vd_vpe_channel *vpe_channel)
+{
+ nouveau_vpe_cmd_write(vpe_channel, NV_VPE_CMD_END_SEQUENCE <<
+ NV_VPE_CMD_TYPE_SHIFT);
+}
+
+static void
+nouveau_vpe_cmd_end_sequence_finish(struct nouveau_vd_vpe_channel *vpe_channel)
+{
+ nouveau_vpe_cmd_align(vpe_channel);
+ nouveau_vpe_fire(vpe_channel, VPE_UDELAY_FIRE_NORMAL);
+}
+
+#ifndef NOUVEAU_VPE_VALIDATE_USER_CMDS
+static void
+_OUT_RINGp(struct nouveau_vd_vpe_channel *chan, const void *data,
+ unsigned nr_dwords)
+{
+ bool is_iomem;
+ u32 *mem = ttm_kmap_obj_virtual(&chan->pushbuf_bo->kmap, &is_iomem);
+ mem = &mem[chan->dma.cur];
+ if (is_iomem)
+ memcpy_toio((void __force __iomem *)mem, data, nr_dwords * 4);
+ else
+ memcpy(mem, data, nr_dwords * 4);
+ chan->dma.cur += nr_dwords;
+}
+#endif
+
+static int
+nouveau_vpe_cmd_write_user_batch(struct nouveau_vd_vpe_channel *chan,
+ const void *data, unsigned nr_dwords)
+{
+#ifdef NOUVEAU_VPE_VALIDATE_USER_CMDS
+ bool is_iomem;
+ u32 *mem = ttm_kmap_obj_virtual(&chan->pushbuf_bo->kmap, &is_iomem);
+ u32 *user_data = (u32 *) data;
+ uint32_t val;
+ int i;
+ bool in_mb_db = false;
+ bool at_end_mb_db = false;
+
+ mem = &mem[chan->dma.cur];
+
+ for (i = 0; i < nr_dwords; i++) {
+ val = user_data[i];
+
+ if (in_mb_db) {
+ if (at_end_mb_db) {
+ if (val == (NV_VPE_CMD_DCT_SEPARATOR << NV_VPE_CMD_TYPE_SHIFT))
+ at_end_mb_db = false;
+ else
+ in_mb_db = false;
+ } else if (val & NV_VPE_DCT_BLOCK_TERMINATOR)
+ at_end_mb_db = true;
+ }
+ if (!in_mb_db) {
+ switch (val & 0xF0000000) {
+ case NV_VPE_CMD_DCT_SEPARATOR << NV_VPE_CMD_TYPE_SHIFT:
+ in_mb_db = true;
+ at_end_mb_db = false;
+ break;
+ case NV_VPE_CMD_DCT_CHROMA_HEADER << NV_VPE_CMD_TYPE_SHIFT:
+ case NV_VPE_CMD_DCT_LUMA_HEADER << NV_VPE_CMD_TYPE_SHIFT:
+ case NV_VPE_CMD_DCT_COORDINATE << NV_VPE_CMD_TYPE_SHIFT:
+ case NV_VPE_CMD_CHROMA_MOTION_VECTOR_HEADER <<
+ NV_VPE_CMD_TYPE_SHIFT:
+ case NV_VPE_CMD_LUMA_MOTION_VECTOR_HEADER << NV_VPE_CMD_TYPE_SHIFT:
+ case NV_VPE_CMD_MOTION_VECTOR << NV_VPE_CMD_TYPE_SHIFT:
+ case NV_VPE_CMD_NOP << NV_VPE_CMD_TYPE_SHIFT:
+ break;
+ default:
+ NV_ERROR(chan->dev, "vpe - invalid cmd 0x%08X detected. "
+ "Aborting cmd sequence.\n", val);
+ return -EINVAL;
+ }
+ }
+
+ /* Always iomem/vram for vpe.*/
+ iowrite32_native(val, (void __force __iomem *)&mem[i]);
+ }
+
+ chan->dma.cur += nr_dwords;
+#else
+ _OUT_RINGp(chan, data, nr_dwords);
+#endif
+
+ return 0;
+}
+
+static bool
+nouveau_vpe_validate_surface(struct nouveau_vd_vpe_channel *vpe_channel,
+ uint32_t handle,
+ struct nouveau_bo *target_nvbo)
+{
+ struct drm_device *dev = vpe_channel->dev;
+ struct drm_gem_object *gem;
+ struct nouveau_bo *nvbo;
+ bool result;
+
+ gem = drm_gem_object_lookup(dev, vpe_channel->file_priv, handle);
+ if (unlikely(!gem)) {
+ result = false;
+ NV_ERROR(dev, "nouveau_vpe_validate_gem_handle - "
+ "Unknown handle 0x%08X.\n", handle);
+ goto out;
+ }
+ nvbo = nouveau_gem_object(gem);
+ if (unlikely(!nvbo || (nvbo != target_nvbo))) {
+ result = false;
+ NV_ERROR(dev, "nouveau_vpe_validate_gem_handle - "
+ "Unknown bo 0x%08X.\n", handle);
+ goto out;
+ }
+
+ result = true;
+
+out:
+
+ mutex_lock(&dev->struct_mutex);
+ drm_gem_object_unreference(gem);
+ mutex_unlock(&dev->struct_mutex);
+
+ return result;
+}
+
+static int
+nouveau_vpe_pin_surface(struct nouveau_vd_vpe_channel *vpe_channel,
+ uint32_t handle, uint32_t required_size,
+ struct nouveau_bo **pnvbo)
+{
+ struct drm_device *dev = vpe_channel->dev;
+ struct drm_gem_object *gem;
+ struct nouveau_bo *nvbo;
+ uint32_t mem_type;
+ unsigned long size;
+ int ret;
+
+ gem = drm_gem_object_lookup(dev, vpe_channel->file_priv, handle);
+ if (!gem) {
+ NV_ERROR(dev, "nouveau_vpe_pin_surface - "
+ " Unknown handle 0x%08X.\n", handle);
+ return -EINVAL;
+ }
+ nvbo = nouveau_gem_object(gem);
+ if (!nvbo) {
+ ret = -EINVAL;
+ NV_ERROR(dev, "nouveau_vpe_pin_surface - "
+ "Unknown bo 0x%08X.\n", handle);
+ goto out;
+ }
+ ret = ttm_bo_reserve(&nvbo->bo, false, false, false, 0);
+ if (ret)
+ goto out;
+
+ mem_type = nvbo->bo.mem.mem_type;
+ size = nvbo->bo.mem.size;
+
+ ttm_bo_unreserve(&nvbo->bo);
+
+ if (mem_type != TTM_PL_VRAM) {
+ ret = -EINVAL;
+ NV_ERROR(dev, "nouveau_vpe_pin_surface - bo must be in vram.\n");
+ goto out;
+ }
+ if (size < required_size) {
+ ret = -EINVAL;
+ NV_ERROR(dev, "nouveau_vpe_pin_surface - bo 0x%08X has size %lu, "
+ "required %u.\n", handle,
+ size, required_size);
+ goto out;
+ }
+ ret = nouveau_bo_pin(nvbo, TTM_PL_FLAG_VRAM);
+ if (ret) {
+ NV_ERROR(dev, "nouveau_vpe_pin_surface - "
+ "Could not pin handle 0x%08X.\n", handle);
+ goto out;
+ }
+
+ *pnvbo = nvbo;
+ ret = 0;
+
+out:
+
+ mutex_lock(&dev->struct_mutex);
+ drm_gem_object_unreference(gem);
+ mutex_unlock(&dev->struct_mutex);
+
+ return ret;
+}
+
+static void
+nouveau_vpe_unpin_surface(struct nouveau_vd_vpe_channel *vpe_channel,
+ struct nouveau_bo *nvbo)
+{
+ if (nvbo && nvbo->pin_refcnt)
+ nouveau_bo_unpin(nvbo);
+}
+
+static void
+nouveau_vpe_reset_pushbuf_to_start(struct nouveau_vd_vpe_channel *vpe_channel)
+{
+ int i;
+ uint32_t nop_count;
+
+ if (vpe_channel->dma.cur) {
+ /* Just write nops till the end since alignment is a non-issue
+ * here.*/
+ nop_count = vpe_channel->dma.max - vpe_channel->dma.cur;
+
+ for (i = 0; i < nop_count; i++)
+ nouveau_vpe_cmd_write(vpe_channel, NV_VPE_CMD_NOP <<
+ NV_VPE_CMD_TYPE_SHIFT);
+ }
+
+ nouveau_vpe_fire(vpe_channel, VPE_UDELAY_FIRE_END);
+}
+
+static int
+nouveau_vpe_channel_pushbuf_alloc(struct nouveau_vd_vpe_channel *vpe_channel)
+{
+ struct drm_device *dev = vpe_channel->dev;
+ struct nouveau_bo *pushbuf_bo;
+ int ret;
+ uint32_t flags;
+
+ if (0)
+ /*dev_priv->gart_info.type == NOUVEAU_GART_AGP)
+ * agp init is broken right now it seems.*/
+ flags = TTM_PL_FLAG_TT;
+ else
+ flags = TTM_PL_FLAG_VRAM;
+
+ ret = nouveau_gem_new(dev, NULL, NV_VPE_PUSHBUFFER_SIZE, 0,
+ flags, 0, 0x0000, false, true, &pushbuf_bo);
+ if (ret)
+ return ret;
+
+ ret = nouveau_bo_pin(pushbuf_bo, flags);
+ if (ret)
+ goto out_err;
+
+ ret = nouveau_bo_map(pushbuf_bo);
+ if (ret)
+ goto out_err;
+
+ vpe_channel->pushbuf_bo = pushbuf_bo;
+ vpe_channel->dma.max = vpe_channel->pushbuf_bo->bo.mem.size >> 2;
+ vpe_channel->dma.free = vpe_channel->dma.max;
+
+out_err:
+ if (ret) {
+ mutex_lock(&dev->struct_mutex);
+ drm_gem_object_unreference(pushbuf_bo->gem);
+ mutex_unlock(&dev->struct_mutex);
+ }
+
+ return ret;
+}
+
+static int
+nouveau_vpe_channel_hw_init(struct nouveau_vd_vpe_channel *vpe_channel)
+{
+ uint32_t value;
+ struct drm_device *dev = vpe_channel->dev;
+ struct drm_nouveau_private *dev_priv = dev->dev_private;
+ uint32_t pushbuf_offset = 0;
+
+ /* Turn off the mpeg2 decoder.*/
+ nv_wr32(dev, NV_VPE_MPEG2_USER_CONFIG,
+ NV_VPE_MPEG2_USER_NOT_PRESENT);
+ nv_wr32(dev, NV_VPE_MPEG2_ENGINE_CONTROL, NV_VPE_MPEG2_ENGINE_STOP);
+ nv_wr32(dev, NV_VPE_MPEG2_USER_PUT, 0);
+ nv_wr32(dev, NV_VPE_MPEG2_USER_OFFSET, 0);
+ nv_wr32(dev, NV_VPE_MPEG2_USER_SIZE, 0);
+ nv_wr32(dev, NV_VPE_MPEG2_ENGINE_SETUP_1, 0);
+ nv_wr32(dev, NV_VPE_MPEG2_ENGINE_SETUP_2, 0);
+ nv_rd32(dev, NV_VPE_MPEG2_ENGINE_CONTROL);
+
+ /* Pause a tiny bit to let the hardware reset.
+ * This might be needed.*/
+ DRM_UDELAY(100);
+
+ nv_wr32(dev, NV_VPE_MPEG2_ENGINE_SETUP_1, 0x01010000);
+ nv_wr32(dev, NV_VPE_MPEG2_ENGINE_SETUP_2, 0x01010000);
+ nv_wr32(dev, NV_VPE_MPEG2_UNKNOWN_SETUP_3, 0x100);
+
+ /* Some type of mpeg2 engine config.
+ * It seems that the hardware automatically sets this to 0x20.
+ * However, I have an nv4a mmio trace where the nvidia driver
+ * actually writes 0x20.
+ * Also I have noticed that when the mpeg2 engine hw locks
+ * up after playing video, this register gets reset to 0x1.
+ */
+ if (nv_rd32(dev, NV_VPE_MPEG2_ENGINE_CONFIG_1) != 0x20)
+ nv_wr32(dev, NV_VPE_MPEG2_ENGINE_CONFIG_1, 0x20);
+ if (nv_rd32(dev, NV_VPE_MPEG2_ENGINE_CONFIG_2) != 0x20)
+ nv_wr32(dev, NV_VPE_MPEG2_ENGINE_CONFIG_2, 0x20);
+
+ /* Make sure the decoder is ready.
+ * So, we check each status register.
+ * Well, that is what these registers seem to be.
+ */
+ value = nv_rd32(dev, NV_VPE_MPEG2_ENGINE_STATUS);
+
+ /* Is the hw still busy? */
+ if (value & 0x1)
+ if (!nouveau_wait_until(dev, 10000000, NV_VPE_MPEG2_ENGINE_STATUS,
+ 0x0FFFFFFF, 0)) {
+ NV_ERROR(dev, "nouveau_vpe_channel_hw_init - "
+ "unknown status value of 0x%08X for engine "
+ "status reg. Must exit.\n",
+ nv_rd32(dev, NV_VPE_MPEG2_ENGINE_STATUS));
+ return -EINVAL;
+ }
+
+ /* Make sure the decoder is ready. */
+ value = nv_rd32(dev, NV_VPE_MPEG2_ENGINE_STATUS_1);
+
+ /* If we got this value then we might have a problem. */
+ if (value & 0x200) {
+ NV_ERROR(dev, "nouveau_vpe_channel_hw_init - "
+ "unknown status value of 0x%08X for engine status 1 reg. "
+ "Must exit.\n",
+ value);
+ return -EINVAL;
+ }
+
+ /* Is the status reg still busy? */
+ if (value & 0x1)
+ if (!nouveau_wait_until(dev, 10000000, NV_VPE_MPEG2_ENGINE_STATUS_1,
+ 0x0FFFFFFF, 0)) {
+ NV_ERROR(dev, "nouveau_vpe_channel_hw_init - "
+ "unknown status value of 0x%08X for engine status 1 reg. "
+ "Must exit.\n",
+ nv_rd32(dev, NV_VPE_MPEG2_ENGINE_STATUS_1));
+ return -EINVAL;
+ }
+
+ /* Reset the mpeg2 pushbuffer/user. */
+ nv_wr32(dev, NV_VPE_MPEG2_ENGINE_CONTROL, NV_VPE_MPEG2_ENGINE_STOP);
+ nv_wr32(dev, NV_VPE_MPEG2_USER_OFFSET, 0);
+ nv_wr32(dev, NV_VPE_MPEG2_USER_SIZE, 0);
+
+ /* The setup of the command buffer is different for agp and pci/pcie.
+ * NOTE: Agp is not working right now so it is disabled.*/
+ if (vpe_channel->pushbuf_bo->bo.mem.mem_type == TTM_PL_TT) {
+
+ pushbuf_offset = lower_32_bits(dev_priv->gart_info.aper_base) +
+ lower_32_bits(vpe_channel->pushbuf_bo->bo.offset);
+
+ nv_wr32(dev, NV_VPE_MPEG2_USER_CONFIG,
+ NV_VPE_MPEG2_USER_PRESENT | NV_VPE_MPEG2_USER_AGP_OR_PCI);
+ /* This needs the agp aperature in the offset.*/
+ nv_wr32(dev, NV_VPE_MPEG2_USER_OFFSET,
+ pushbuf_offset);
+ nv_wr32(dev, NV_VPE_MPEG2_USER_SIZE,
+ vpe_channel->dma.max << 2);
+ nv_wr32(dev, NV_VPE_MPEG2_ENGINE_SETUP_1, 0x01010000);
+ nv_wr32(dev, NV_VPE_MPEG2_ENGINE_SETUP_2, 0x01010000);
+ nv_wr32(dev, NV_VPE_MPEG2_USER_CONFIG,
+ NV_VPE_MPEG2_USER_PRESENT | NV_VPE_MPEG2_USER_AGP_OR_PCI |
+ NV_VPE_MPEG2_USER_AGP_OR_PCI_READY);
+ } else {
+ /* For pci, only the fb offset is used.
+ * However, have to init the pushbuffer/user using the fb size?
+ * This is not related to decoding but strictly for reading from
+ * the pushbuffer/user. It might be caching related.
+ * The nv driver uses different values but it looks fb size related.
+ * So, I will go with that for now.
+ */
+ pushbuf_offset = lower_32_bits(vpe_channel->pushbuf_bo->bo.offset);
+ nv_wr32(dev, NV_VPE_MPEG2_USER_CONFIG,
+ NV_VPE_MPEG2_USER_PRESENT | NV_VPE_MPEG2_USER_VRAM);
+ nv_wr32(dev, NV_VPE_MPEG2_USER_OFFSET, 0);
+ nv_wr32(dev, NV_VPE_MPEG2_USER_SIZE, dev_priv->fb_available_size);
+ nv_wr32(dev, NV_VPE_MPEG2_ENGINE_SETUP_1, 0x01010000);
+ nv_wr32(dev, NV_VPE_MPEG2_ENGINE_SETUP_2, 0x01010000);
+ nv_wr32(dev, NV_VPE_MPEG2_USER_CONFIG,
+ NV_VPE_MPEG2_USER_PRESENT | NV_VPE_MPEG2_USER_VRAM);
+ nv_wr32(dev, NV_VPE_MPEG2_USER_OFFSET,
+ pushbuf_offset);
+ nv_wr32(dev, NV_VPE_MPEG2_USER_SIZE,
+ vpe_channel->dma.max << 2);
+ }
+
+ /* Start up the mpeg2 engine */
+ nv_wr32(dev, NV_VPE_MPEG2_ENGINE_CONTROL, NV_VPE_MPEG2_ENGINE_STOP);
+ nv_wr32(dev, NV_VPE_MPEG2_USER_PUT, 0);
+ nv_wr32(dev, NV_VPE_MPEG2_ENGINE_CONTROL, NV_VPE_MPEG2_ENGINE_START);
+ nv_rd32(dev, NV_VPE_MPEG2_ENGINE_CONTROL);
+
+ return 0;
+}
+
+static int
+nouveau_vpe_channel_init(struct nouveau_vd_vpe_channel *vpe_channel)
+{
+ struct drm_device *dev = vpe_channel->dev;
+ int ret;
+ int i;
+ uint32_t value;
+
+ /* Reset decoder to the initial state.*/
+ nouveau_vpe_cmd_write(vpe_channel, NV_VPE_CMD_INIT_CHANNEL <<
+ NV_VPE_CMD_TYPE_SHIFT | NV_VPE_CMD_INIT_CHANNEL_ACCEL
+ << 24);
+ nouveau_vpe_cmd_write(vpe_channel, NV_VPE_CMD_INIT_CHANNEL <<
+ NV_VPE_CMD_TYPE_SHIFT);
+ /* NOTE: The surface group info value might be tiling related. */
+ nouveau_vpe_cmd_write(vpe_channel, NV_VPE_CMD_INIT_CHANNEL <<
+ NV_VPE_CMD_TYPE_SHIFT |
+ NV_VPE_CMD_INIT_CHANNEL_SURFACE_GROUP_INFO << 24);
+
+ nouveau_vpe_cmd_end_sequence_header(vpe_channel);
+ /* No body/trailer for the init cmd.*/
+ nouveau_vpe_cmd_end_sequence_finish(vpe_channel);
+
+ ret = nouveau_vpe_channel_wait(vpe_channel, vpe_channel->dma.put);
+ if (ret)
+ return ret;
+
+ /* Clear out all surface references.*/
+ for (i = 0; i < NV_VPE_MAX_SURFACES; i++) {
+
+ nouveau_vpe_cmd_write(vpe_channel, NV_VPE_CMD_INIT_SURFACE <<
+ NV_VPE_CMD_TYPE_SHIFT |
+ NV_VPE_CMD_INIT_SURFACE_LUMA(i));
+ nouveau_vpe_cmd_align(vpe_channel);
+
+ nouveau_vpe_fire(vpe_channel, VPE_UDELAY_FIRE_INIT);
+ ret = nouveau_vpe_channel_wait(vpe_channel, vpe_channel->dma.put);
+ if (ret)
+ return ret;
+
+ nouveau_vpe_cmd_write(vpe_channel, NV_VPE_CMD_INIT_SURFACE <<
+ NV_VPE_CMD_TYPE_SHIFT |
+ NV_VPE_CMD_INIT_SURFACE_CHROMA(i));
+ nouveau_vpe_cmd_align(vpe_channel);
+
+ nouveau_vpe_fire(vpe_channel, VPE_UDELAY_FIRE_INIT);
+ ret = nouveau_vpe_channel_wait(vpe_channel, vpe_channel->dma.put);
+ if (ret)
+ return ret;
+ }
+
+ /* Init the decoder channel.*/
+ nouveau_vpe_cmd_write(vpe_channel, NV_VPE_CMD_INIT_CHANNEL <<
+ NV_VPE_CMD_TYPE_SHIFT |
+ NV_VPE_CMD_INIT_CHANNEL_ACCEL << 24
+ /* If IDCT is disabled then only MC is done.*/
+ | NV_VPE_CMD_INIT_CHANNEL_ACCEL_IDCT);
+ nouveau_vpe_cmd_write(vpe_channel, NV_VPE_CMD_INIT_CHANNEL <<
+ NV_VPE_CMD_TYPE_SHIFT |
+ (vpe_channel->width << 12 | vpe_channel->height));
+ /* NOTE: The surface group info value might be tiling related. */
+ nouveau_vpe_cmd_write(vpe_channel, NV_VPE_CMD_INIT_CHANNEL <<
+ NV_VPE_CMD_TYPE_SHIFT |
+ NV_VPE_CMD_INIT_CHANNEL_SURFACE_GROUP_INFO << 24
+ | (ALIGN(vpe_channel->width, 112) / 32));
+
+ nouveau_vpe_cmd_end_sequence_header(vpe_channel);
+ /* No body/trailer for the init cmd.*/
+ nouveau_vpe_cmd_end_sequence_finish(vpe_channel);
+
+ ret = nouveau_vpe_channel_wait(vpe_channel, vpe_channel->dma.put);
+ if (ret)
+ return ret;
+
+ ret = nouveau_vpe_wait_until_engine_idle(vpe_channel);
+ if (ret)
+ return ret;
+
+ /* Make sure hardware context is setup correctly */
+
+ value = nv_rd32(dev, NV_VPE_MPEG2_SURFACE_INFO);
+ if (value != (0x10000 | (ALIGN(vpe_channel->width, 128)))) {
+ NV_ERROR(dev, "nouveau_vpe_channel_init - "
+ "channel surface setup wrong for width = %d,"
+ "height = %d, got = 0x%08X.\n",
+ vpe_channel->width, vpe_channel->height, value);
+ return -EINVAL;
+ }
+
+ value = nv_rd32(dev, NV_VPE_MPEG2_CONTEXT_DIMENSIONS);
+ if (value != (((vpe_channel->width & 0xFFF) << 16) | (vpe_channel->height & 0xFFF))) {
+ NV_ERROR(dev, "nouveau_vpe_channel_init - "
+ "channel dimensions wrong for width = %d,"
+ "height = %d, got = 0x%08X.\n",
+ vpe_channel->width, vpe_channel->height, value);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void
+nouveau_vpe_channel_shutdown(struct nouveau_vd_vpe_channel *vpe_channel)
+{
+ nouveau_vpe_cmd_end_sequence_header(vpe_channel);
+ /* No body/trailer for the init cmd.*/
+ nouveau_vpe_cmd_end_sequence_finish(vpe_channel);
+}
+
+static void
+nouveau_vpe_channel_hw_shutdown(struct nouveau_vd_vpe_channel *vpe_channel)
+{
+ struct drm_device *dev = vpe_channel->dev;
+
+ nouveau_vpe_channel_shutdown(vpe_channel);
+
+ nouveau_vpe_channel_wait(vpe_channel, vpe_channel->dma.cur);
+
+ /* Just a slight pause. This might not be needed. */
+ DRM_UDELAY(100);
+
+ /* Turn off the mpeg2 decoder.*/
+ nv_wr32(dev, NV_VPE_MPEG2_USER_CONFIG,
+ NV_VPE_MPEG2_USER_NOT_PRESENT);
+ nv_wr32(dev, NV_VPE_MPEG2_ENGINE_CONTROL, NV_VPE_MPEG2_ENGINE_STOP);
+ nv_wr32(dev, NV_VPE_MPEG2_USER_PUT, 0);
+ nv_wr32(dev, NV_VPE_MPEG2_USER_OFFSET, 0);
+ nv_wr32(dev, NV_VPE_MPEG2_USER_SIZE, 0);
+ nv_wr32(dev, NV_VPE_MPEG2_ENGINE_SETUP_1, 0);
+ nv_wr32(dev, NV_VPE_MPEG2_ENGINE_SETUP_2, 0);
+ nv_rd32(dev, NV_VPE_MPEG2_ENGINE_CONTROL);
+}
+
+static int
+nouveau_vpe_channel_alloc(struct drm_device *dev,
+ struct drm_nouveau_vd_vpe_channel_alloc *req,
+ struct drm_file *file_priv)
+{
+ struct drm_nouveau_private *dev_priv = dev->dev_private;
+ struct nouveau_vd_vpe_channel *vpe_channel;
+ int ret;
+
+ if (dev_priv->vpe_channel) {
+ NV_ERROR(dev, "vpe channel is already in use.\n");
+ return -EPERM;
+ }
+
+ if ((dev_priv->card_type != NV_40) &&
+ (dev_priv->card_type != NV_30)) {
+ NV_ERROR(dev, "vpe is not supported on NV%d.\n",
+ dev_priv->card_type);
+ return -EINVAL;
+ }
+
+ if ((req->width < NV_VPE_MIN_WIDTH) ||
+ (req->width > NV_VPE_MAX_WIDTH) ||
+ (req->height < NV_VPE_MIN_HEIGHT) ||
+ (req->height > NV_VPE_MAX_HEIGHT)) {
+ NV_ERROR(dev, "vpe does not support width = %d, height = %d\n",
+ req->width, req->height);
+ return -EINVAL;
+ }
+
+ vpe_channel = kzalloc(sizeof(*vpe_channel), GFP_KERNEL);
+ if (!vpe_channel)
+ return -ENOMEM;
+
+ req->width = ALIGN(req->width, 16);
+ req->height = ALIGN(req->height, 16);
+ vpe_channel->dev = dev;
+ vpe_channel->width = req->width;
+ vpe_channel->height = req->height;
+
+ ret = nouveau_vpe_channel_pushbuf_alloc(vpe_channel);
+ if (ret)
+ goto out_err;
+
+ ret = nouveau_vpe_channel_hw_init(vpe_channel);
+ if (ret)
+ goto out_err;
+
+ ret = nouveau_vpe_channel_init(vpe_channel);
+ if (ret)
+ goto out_err;
+
+ ret = drm_gem_handle_create(file_priv, vpe_channel->pushbuf_bo->gem,
+ &req->pushbuf_handle);
+ if (ret)
+ goto out_err;
+
+ nouveau_debugfs_vpe_channel_init(vpe_channel);
+
+ vpe_channel->file_priv = file_priv;
+ dev_priv->vpe_channel = vpe_channel;
+
+ NV_INFO(dev, "intialized vpe channel\n");
+
+out_err:
+ if (ret)
+ nouveau_vpe_channel_free(vpe_channel);
+
+ return ret;
+}
+
+void
+nouveau_vpe_channel_free(struct nouveau_vd_vpe_channel *vpe_channel)
+{
+ struct drm_device *dev;
+ struct drm_nouveau_private *dev_priv;
+ struct nouveau_vd_vpe_surface *vpe_surface;
+ int i;
+
+ if (!vpe_channel)
+ return;
+
+ dev = vpe_channel->dev;
+ dev_priv = dev->dev_private;
+
+ nouveau_vpe_channel_hw_shutdown(vpe_channel);
+
+ nouveau_debugfs_vpe_channel_fini(vpe_channel);
+
+ for (i = 0; i < ARRAY_SIZE(vpe_channel->surface); i++) {
+ vpe_surface = &vpe_channel->surface[i];
+ if (vpe_surface->luma_bo)
+ nouveau_vpe_unpin_surface(vpe_channel, vpe_surface->luma_bo);
+ if (vpe_surface->chroma_bo)
+ nouveau_vpe_unpin_surface(vpe_channel, vpe_surface->chroma_bo);
+ }
+
+ if (vpe_channel->pushbuf_bo) {
+ nouveau_bo_unmap(vpe_channel->pushbuf_bo);
+ mutex_lock(&vpe_channel->dev->struct_mutex);
+ drm_gem_object_unreference(vpe_channel->pushbuf_bo->gem);
+ mutex_unlock(&vpe_channel->dev->struct_mutex);
+ }
+
+ NV_INFO(vpe_channel->dev, "shutdown vpe channel\n");
+
+ dev_priv->vpe_channel = NULL;
+
+ kfree(vpe_channel);
+}
+
+static int
+nouveau_vpe_reference_surface(struct nouveau_vd_vpe_channel *vpe_channel,
+ uint32_t surface_index, uint64_t addr_offset,
+ bool is_luma)
+{
+ struct drm_device *dev = vpe_channel->dev;
+ uint32_t value;
+ int ret;
+
+ if (vpe_channel->dma.free < 8)
+ nouveau_vpe_reset_pushbuf_to_start(vpe_channel);
+
+ nouveau_vpe_cmd_write(vpe_channel, NV_VPE_CMD_INIT_SURFACE <<
+ NV_VPE_CMD_TYPE_SHIFT | (is_luma ?
+ NV_VPE_CMD_INIT_SURFACE_LUMA(surface_index) :
+ NV_VPE_CMD_INIT_SURFACE_CHROMA(surface_index))
+ | NV_VPE_CMD_INIT_SURFACE_OFFSET_DIV(lower_32_bits(addr_offset)));
+ nouveau_vpe_cmd_align(vpe_channel);
+
+ if (vpe_channel->dma.free >= NV_VPE_CMD_ALIGNMENT)
+ nouveau_vpe_fire(vpe_channel, VPE_UDELAY_FIRE_INIT);
+ else
+ nouveau_vpe_reset_pushbuf_to_start(vpe_channel);
+
+ ret = nouveau_vpe_channel_wait(vpe_channel, vpe_channel->dma.cur);
+ if (ret)
+ return ret;
+
+ ret = nouveau_vpe_wait_until_engine_idle(vpe_channel);
+ if (ret)
+ return ret;
+
+ if (is_luma) {
+ value = nv_rd32(dev, NV_VPE_MPEG2_LUMA_SURFACE_OFFSET_GET(surface_index));
+ if (lower_32_bits(addr_offset) != value) {
+ NV_ERROR(dev, "vpe - surface.luma ref is wrong. "
+ "Expected 0x%08X, Got 0x%08X.\n",
+ lower_32_bits(addr_offset), value);
+ return -EINVAL;
+ }
+ } else {
+ value = nv_rd32(dev, NV_VPE_MPEG2_CHROMA_SURFACE_OFFSET_GET(surface_index));
+ if (lower_32_bits(addr_offset) != value) {
+ NV_ERROR(dev, "vpe - surface.chroma ref is wrong. "
+ "Expected 0x%08X, Got 0x%08X.\n",
+ lower_32_bits(addr_offset), value);
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static int
+nouveau_vpe_channel_validate_surfaces(struct nouveau_vd_vpe_channel *vpe_channel,
+ struct drm_nouveau_vd_vpe_surface *surfaces, int nr_surfaces,
+ struct nouveau_vd_vpe_surface **target_vpe_surface)
+{
+ struct drm_device *dev = vpe_channel->dev;
+ int ret;
+ int i;
+ struct nouveau_vd_vpe_surface *vpe_surface;
+ struct drm_nouveau_vd_vpe_surface *surface;
+ uint32_t decoder_surface_size = 0;
+
+ for (i = 0, surface = surfaces; i < nr_surfaces; i++, surface++) {
+ if (unlikely(surface->surface_index >= ARRAY_SIZE(vpe_channel->surface))) {
+ NV_ERROR(dev, "nouveau_vpe_channel_validate_surfaces - "
+ "surface_index %d is invalid.\n", surface->surface_index);
+ return -EINVAL;
+ }
+
+ vpe_surface = &vpe_channel->surface[surface->surface_index];
+ if (!vpe_surface->luma_bo ||
+ !nouveau_vpe_validate_surface(vpe_channel, surface->luma_handle, vpe_surface->luma_bo)) {
+ if (!decoder_surface_size)
+ decoder_surface_size = vpe_channel->width * vpe_channel->height;
+
+ if (vpe_surface->luma_bo) {
+ nouveau_vpe_unpin_surface(vpe_channel, vpe_surface->luma_bo);
+ vpe_surface->luma_bo = NULL;
+ }
+
+ ret = nouveau_vpe_pin_surface(vpe_channel, surface->luma_handle,
+ decoder_surface_size, &vpe_surface->luma_bo);
+ if (ret) {
+ NV_ERROR(dev, "nouveau_vpe_channel_validate_surfaces - "
+ "could not pin surface_index %d, luma handle 0x%08X, "
+ "error %d.\n", surface->surface_index,
+ surface->luma_handle, ret);
+ return ret;
+ }
+
+ ret = nouveau_vpe_reference_surface(vpe_channel, surface->surface_index,
+ vpe_surface->luma_bo->bo.offset, true);
+ if (ret) {
+ NV_ERROR(dev, "nouveau_vpe_channel_validate_surfaces - "
+ "could not reference surface_index %d, luma handle 0x%08X, "
+ "error %d.\n", surface->surface_index,
+ surface->luma_handle, ret);
+ nouveau_vpe_unpin_surface(vpe_channel, vpe_surface->luma_bo);
+ vpe_surface->luma_bo = NULL;
+ return ret;
+ }
+
+ vpe_surface->dma_sequence = 0;
+ }
+ if (!vpe_surface->chroma_bo ||
+ !nouveau_vpe_validate_surface(vpe_channel, surface->chroma_handle, vpe_surface->chroma_bo)) {
+
+ if (!decoder_surface_size)
+ decoder_surface_size = vpe_channel->width * vpe_channel->height;
+
+ if (vpe_surface->chroma_bo) {
+ nouveau_vpe_unpin_surface(vpe_channel, vpe_surface->chroma_bo);
+ vpe_surface->chroma_bo = NULL;
+ }
+
+ /* The chroma surface is 1/2 the size of the luma in both the width
+ * and height.*/
+ ret = nouveau_vpe_pin_surface(vpe_channel, surface->chroma_handle,
+ decoder_surface_size / 4, &vpe_surface->chroma_bo);
+ if (ret) {
+ NV_ERROR(dev, "nouveau_vpe_channel_validate_surfaces - "
+ "could not pin surface_index %d, chroma handle 0x%08X, "
+ "error %d.\n", surface->surface_index,
+ surface->luma_handle, ret);
+ return ret;
+ }
+
+ ret = nouveau_vpe_reference_surface(vpe_channel, surface->surface_index,
+ vpe_surface->chroma_bo->bo.offset, false);
+ if (ret) {
+ NV_ERROR(dev, "nouveau_vpe_channel_validate_surfaces - "
+ "could not reference surface_index %d, "
+ "chroma handle 0x%08X, error %d.\n",
+ surface->surface_index, surface->luma_handle, ret);
+ nouveau_vpe_unpin_surface(vpe_channel, vpe_surface->chroma_bo);
+ vpe_surface->chroma_bo = NULL;
+ return ret;
+ }
+
+ vpe_surface->dma_sequence = 0;
+ }
+
+ /* First surface is considered the target.*/
+ if (i == 0)
+ *target_vpe_surface = vpe_surface;
+ }
+
+ return 0;
+}
+
+static int
+nouveau_vpe_channel_pushbuf_fire(struct nouveau_vd_vpe_channel *vpe_channel,
+ struct drm_nouveau_vd_vpe_pushbuf_fire *req)
+{
+ int ret;
+ uint32_t *pushbuf = NULL;
+ uint32_t *batches = NULL;
+ struct drm_nouveau_vd_vpe_surface *surfaces = NULL;
+ struct nouveau_vd_vpe_surface *vpe_surface = NULL;
+ int i;
+ uint32_t offset = 0;
+ uint32_t batch_size;
+ bool is_end_sequence = req->flags &
+ NOUVEAU_VD_VPE_PUSHBUF_FIRE_FLAG_END_SEQUENCE;
+ bool is_update_dma_pos = req->flags &
+ NOUVEAU_VD_VPE_PUSHBUF_FIRE_FLAG_UPDATE_DMA_POS;
+ bool do_fire_batch;
+
+ if (req->nr_surfaces) {
+ surfaces = _u_memcpya(req->surfaces, req->nr_surfaces, sizeof(*surfaces));
+ if (unlikely(IS_ERR(surfaces))) {
+ ret = PTR_ERR(surfaces);
+ goto out;
+ }
+ }
+
+ if (req->nr_dwords) {
+ pushbuf = _u_memcpya(req->dwords, req->nr_dwords, sizeof(uint32_t));
+ if (unlikely(IS_ERR(pushbuf))) {
+ ret = PTR_ERR(pushbuf);
+ goto out;
+ }
+ }
+
+ if (req->nr_batches) {
+ batches = _u_memcpya(req->batches, req->nr_batches, sizeof(uint32_t));
+ if (unlikely(IS_ERR(batches))) {
+ ret = PTR_ERR(batches);
+ goto out;
+ }
+ }
+
+ if (req->nr_surfaces) {
+ ret = nouveau_vpe_channel_validate_surfaces(vpe_channel,
+ surfaces, req->nr_surfaces,
+ &vpe_surface);
+ if (unlikely(ret))
+ goto out;
+ }
+
+ if (is_update_dma_pos) {
+ if (req->dma_cur >= vpe_channel->dma.max) {
+ ret = -EINVAL;
+ goto out;
+ }
+ vpe_channel->dma.cur = req->dma_cur;
+ vpe_channel->dma.free = vpe_channel->dma.max - vpe_channel->dma.cur;
+ if (!is_end_sequence)
+ nouveau_vpe_fire(vpe_channel, VPE_UDELAY_FIRE_NORMAL);
+ }
+
+ for (i = 0; i < req->nr_batches; i++) {
+ batch_size = batches[i];
+
+ do_fire_batch = !(batch_size &
+ NOUVEAU_VD_VPE_PUSHBUF_FIRE_BATCH_DO_NOT_FIRE);
+
+ batch_size &= 0xFFFF;
+
+ if (unlikely(!batch_size)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (unlikely((batch_size + offset) > req->nr_dwords)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (batch_size > vpe_channel->dma.free)
+ nouveau_vpe_reset_pushbuf_to_start(vpe_channel);
+
+ ret = nouveau_vpe_cmd_write_user_batch(vpe_channel,
+ (const void *)((uint64_t)pushbuf + (offset << 2)), batch_size);
+ if (ret)
+ goto out;
+
+ offset += batch_size;
+ vpe_channel->dma.free -= batch_size;
+
+ if (!vpe_channel->dma.free) {
+ vpe_channel->dma.cur = 0;
+ vpe_channel->dma.free = vpe_channel->dma.max;
+ nouveau_vpe_fire(vpe_channel, VPE_UDELAY_FIRE_END);
+ }
+
+ if (do_fire_batch)
+ nouveau_vpe_fire(vpe_channel, VPE_UDELAY_FIRE_NORMAL);
+ }
+
+ if (req->nr_dwords) {
+ if (vpe_channel->dma.free < NV_VPE_MAX_MB)
+ nouveau_vpe_reset_pushbuf_to_start(vpe_channel);
+ }
+
+ if (is_end_sequence) {
+ if (vpe_channel->dma.free < NV_VPE_CMD_ALIGNMENT)
+ nouveau_vpe_reset_pushbuf_to_start(vpe_channel);
+ nouveau_vpe_cmd_end_sequence_header(vpe_channel);
+ nouveau_vpe_cmd_end_sequence_trailer(vpe_channel);
+ nouveau_vpe_cmd_end_sequence_finish(vpe_channel);
+
+ if (vpe_surface)
+ vpe_surface->dma_sequence = vpe_channel->dma.sequence;
+ }
+
+ req->dma_free = vpe_channel->dma.free;
+ req->dma_cur = vpe_channel->dma.cur;
+ ret = 0;
+out:
+ if (!IS_ERR(surfaces) && surfaces)
+ kfree(surfaces);
+ if (!IS_ERR(batches) && batches)
+ kfree(batches);
+ if (!IS_ERR(pushbuf) && pushbuf)
+ kfree(pushbuf);
+
+ return ret;
+}
+
+static int
+nouveau_vpe_surface_query(struct nouveau_vd_vpe_channel *vpe_channel,
+ struct drm_nouveau_vd_vpe_surface_query *req)
+{
+ struct drm_device *dev = vpe_channel->dev;
+ struct nouveau_vd_vpe_surface *vpe_surface;
+ uint32_t i;
+ uint32_t value;
+
+ if (unlikely(req->surface_index >= ARRAY_SIZE(vpe_channel->surface))) {
+ NV_ERROR(dev, "nouveau_vpe_surface_query - invalid surface index %d.\n",
+ req->surface_index);
+ return -EINVAL;
+ }
+
+ req->is_busy = 0;
+
+ vpe_surface = &vpe_channel->surface[req->surface_index];
+
+ /* This is set when a cmd sequence is done for the target surface.*/
+ if (vpe_surface->dma_sequence) {
+ /* Read the current sequence and see if any surfaces have
+ * finished rendering.*/
+ value = nv_rd32(dev, NV_VPE_MPEG2_SEQUENCE_GET);
+ for (i = 0; i < ARRAY_SIZE(vpe_channel->surface); i++) {
+ if (vpe_channel->surface[i].luma_bo ||
+ vpe_channel->surface[i].chroma_bo) {
+ if (value >= vpe_channel->surface[i].dma_sequence)
+ vpe_channel->surface[i].dma_sequence = 0;
+ else if (i == req->surface_index)
+ req->is_busy = 1;
+ }
+ }
+ }
+
+ return 0;
+}
+
+/* IOCtls.*/
+
+int
+nouveau_vd_vpe_ioctl_channel_alloc(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+
+ struct drm_nouveau_vd_vpe_channel_alloc *req = data;
+
+ return nouveau_vpe_channel_alloc(dev, req, file_priv);
+}
+
+int
+nouveau_vd_vpe_ioctl_channel_free(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct nouveau_vd_vpe_channel *vpe_channel;
+
+ NOUVEAU_GET_VPE_CHANNEL_WITH_RETURN(file_priv, vpe_channel);
+
+ nouveau_vpe_channel_free(vpe_channel);
+
+ return 0;
+}
+
+int nouveau_vd_vpe_ioctl_pushbuf_fire(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct nouveau_vd_vpe_channel *vpe_channel;
+ struct drm_nouveau_vd_vpe_pushbuf_fire *req = data;
+
+ NOUVEAU_GET_VPE_CHANNEL_WITH_RETURN(file_priv, vpe_channel);
+
+ return nouveau_vpe_channel_pushbuf_fire(vpe_channel, req);
+}
+
+int nouveau_vd_vpe_ioctl_surface_query(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct nouveau_vd_vpe_channel *vpe_channel;
+ struct drm_nouveau_vd_vpe_surface_query *req = data;
+
+ NOUVEAU_GET_VPE_CHANNEL_WITH_RETURN(file_priv, vpe_channel);
+
+ return nouveau_vpe_surface_query(vpe_channel, req);
+}
diff --git a/drivers/gpu/drm/nouveau/nouveau_vpe_hw.h b/drivers/gpu/drm/nouveau/nouveau_vpe_hw.h
new file mode 100644
index 0000000..8e3dfb9
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nouveau_vpe_hw.h
@@ -0,0 +1,153 @@
+/*
+ * Copyright (C) 2010 Jimmy Rentz
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __NOUVEAU_VPE_HW_H__
+#define __NOUVEAU_VPE_HW_H__
+
+/* VPE is the video decoder engine that is found in nv30, nv40 and some
+ * older hardware (geforce 4 and higher I believe).
+ * It contains an mpeg2 decoder with the following properties:
+ * (-) Decodes at the idct level. However, I believe older cards only
+ * support mc level.
+ * (-) 32x64 to 2032x2032 profiles.
+ * (-) 4:2:0 chroma sampling.
+ * (-) Only one set of registers so only one user unless some type of
+ * context/channel switching is added.*/
+
+#define NV_VPE_MAX_CHANNELS 1
+#define NV_VPE_MAX_SURFACES 8
+#define NV_VPE_MIN_WIDTH 32
+#define NV_VPE_MIN_HEIGHT 64
+#define NV_VPE_MAX_WIDTH 2032
+#define NV_VPE_MAX_HEIGHT 2032
+#define NV_VPE_PUSHBUFFER_SIZE (1 * 1024 * 1024)
+#define NV_VPE_CMD_ALIGNMENT 16
+
+#define NV_VPE_MAX_MB_BATCH 16
+#define NV_VPE_MAX_MB_HEADER 20
+#define NV_VPE_MAX_MB_DCT (33 * 6)
+#define NV_VPE_MAX_MB (NV_VPE_MAX_MB_HEADER + NV_VPE_MAX_MB_DCT)
+
+#define NV_VPE_CMD_TYPE_SHIFT 28
+
+/* All cmd info.*/
+#define NV_VPE_CMD_NOP 0x1
+
+#define NV_VPE_CMD_INIT_SURFACE 0x2
+ #define NV_VPE_CMD_INIT_SURFACE_LUMA(index) ((index * 2) << 24)
+ #define NV_VPE_CMD_INIT_SURFACE_CHROMA(index) (((index * 2) + 1) << 24)
+ #define NV_VPE_CMD_INIT_SURFACE_OFFSET_DIV(offset) (offset >> 5)
+
+#define NV_VPE_CMD_INIT_CHANNEL 0x3
+ /* ( (width round to 112) / 32 */
+ #define NV_VPE_CMD_INIT_CHANNEL_SURFACE_GROUP_INFO 0x1
+ #define NV_VPE_CMD_INIT_CHANNEL_ACCEL 0x2
+ /* (0x1 to turn on idct operations). */
+ #define NV_VPE_CMD_INIT_CHANNEL_ACCEL_IDCT 0x1
+
+#define NV_VPE_CMD_DCT_SEPARATOR 0x6
+#define NV_VPE_CMD_END_SEQUENCE 0x7
+ #define NV_VPE_CMD_SEQUENCE 0x1
+
+/* DCT Blocks */
+#define NV_VPE_CMD_DCT_CHROMA_HEADER 0x8
+#define NV_VPE_CMD_DCT_LUMA_HEADER 0x9
+ /* The block pattern is used for chroma and luma blocks */
+ #define NV_VPE_CMD_DCT_BLOCK_PATTERN(p) ((p) << 24)
+ /* Not sure what this is for. This is always set in the dct block header */
+ #define NV_VPE_CMD_DCT_BLOCK_UNKNOWN 0x10000
+ /* Target surface index. Is 0 based. */
+ #define NV_VPE_CMD_DCT_BLOCK_TARGET_SURFACE(s) (s << 20)
+ /* If picture element is frame */
+ #define NV_VPE_CMD_PICT_FRAME 0x80000
+ /* If field based encoding and a luma block */
+ #define NV_VPE_CMD_PICT_FRAME_FIELD 0x800000
+ /* If picture element or field encoding is bottom field */
+ #define NV_VD_VPE_CMD_BOTTOM_FIELD 0x20000
+ /* If macroblock x coordinate is even */
+ #define NV_VD_VPE_CMD_EVEN_X_COORD 0x8000
+
+/* Used to terminate a set of dct data blocks.*/
+#define NV_VPE_DCT_BLOCK_TERMINATOR 0x1
+
+/* Used to designate dct data blocks that are all zero.*/
+#define NV_VPE_DCT_BLOCK_NULL (0x80040000 | NV_VPE_DCT_BLOCK_TERMINATOR)
+
+/* Coordinates of dct */
+#define NV_VPE_CMD_DCT_COORDINATE 0xA
+ #define NV_VPE_DCT_POINTS_LUMA(x, y, p) (((y * 16 * p) << 12) | (x * 16))
+ #define NV_VPE_DCT_POINTS_CHROMA(x, y, p) (((y * 8 * p) << 12) | (x * 16))
+
+/* Motion Vectors */
+#define NV_VPE_CMD_LUMA_MOTION_VECTOR_HEADER 0xD
+#define NV_VPE_CMD_CHROMA_MOTION_VECTOR_HEADER 0xC
+#define NV_VPE_CMD_MOTION_VECTOR 0xE
+
+ /* Motion Vector Header */
+
+ /* Set if 2 motion vectors exist for this header.
+ * Otherwise, it is cleared and only 1 exists.*/
+ #define NV_VPE_CMD_MC_MV_COUNT_2 (0x1 << 16)
+
+ /* [Field Picture or Field Motion Only]
+ * motion_vertical_field_select is set here.
+ * This means that the bottom field is selected for the given vertical
+ * vector. However, dual-prime blocks do not follow this rule.
+ * It is treated speciallly for them.*/
+ #define NV_VPE_CMD_BOTTOM_FIELD_VERTICAL_MOTION_SELECT_FIRST (0x1 << 17)
+
+ /* [Frame Picture and Frame Motion Type only] */
+ #define NV_VPE_CMD_FRAME_PICT_FRAME_MOTION (0x1 << 19)
+
+ /* MC prediction surface index. Is 0 based. */
+ #define NV_VPE_CMD_PREDICTION_SURFACE(s) (s << 20)
+
+ /* Set if this is a second motion vector. Otherwise, the first one is
+ * assumed.*/
+ #define NV_VPE_CMD_MOTION_VECTOR_TYPE_SECOND (0x1 << 23)
+
+ /* [Frame Picture and Frame Motion Type OR Field Picture only]*/
+ #define NV_VPE_CMD_FRAME_FRAME_PICT_OR_FIELD (0x1 << 24)
+
+ /* If Vertical Motion Vector is odd then set. This is before any
+ * operations are done. */
+ #define NV_VPE_CMD_ODD_VERTICAL_MOTION_VECTOR (0x1 << 25)
+
+ /* If Horizontal Motion Vector is odd then set. This is before any
+ * operations are done. */
+ #define NV_VPE_CMD_ODD_HORIZONTAL_MOTION_VECTOR (0x1 << 26)
+
+ /* If set then the motion vectors are backward. Otherwise,
+ * they are forward.*/
+ #define NV_VPE_CMD_MOTION_VECTOR_BACKWARD (0x1 << 27)
+
+ /* Motion Vectors. This is the equation used for each motion vector.
+ * d is only used as a second vector displacement in a couple of cases.
+ */
+ #define NV_VPE_MOTION_VECTOR_VERTICAL(y, c, v, q, d) (((y * c) + (v / q) + d) << 12)
+ #define NV_VPE_MOTION_VECTOR_HORIZONTAL(x, c, v, q, d) ((x * c) + (v / q) + d)
+
+#endif
diff --git a/include/drm/nouveau_drm.h b/include/drm/nouveau_drm.h
index fe917de..c597c0a 100644
--- a/include/drm/nouveau_drm.h
+++ b/include/drm/nouveau_drm.h
@@ -184,6 +184,52 @@ enum nouveau_bus_type {
struct drm_nouveau_sarea {
};
+/* VPE Supports mpeg2 only.*/
+struct drm_nouveau_vd_vpe_channel_alloc {
+ uint32_t width;
+ uint32_t height;
+ /* Used for user pushbuf access.
+ * mmio access is not allowed so you still need to fire as normal.*/
+ uint32_t pushbuf_handle;
+};
+
+struct drm_nouveau_vd_vpe_channel_free {
+};
+
+#define NOUVEAU_VD_VPE_PUSHBUF_FIRE_FLAG_END_SEQUENCE 0x00000001
+#define NOUVEAU_VD_VPE_PUSHBUF_FIRE_FLAG_UPDATE_DMA_POS 0x00000002
+/* structure for surface.*/
+struct drm_nouveau_vd_vpe_surface {
+ uint32_t luma_handle;
+ uint32_t chroma_handle;
+ uint32_t surface_index;
+};
+
+/* This flag lets you turn off firing for a specific batch.
+ * This is needed in some cases to avoid locking up the decoder.*/
+#define NOUVEAU_VD_VPE_PUSHBUF_FIRE_BATCH_DO_NOT_FIRE 0x10000000
+struct drm_nouveau_vd_vpe_pushbuf_fire {
+ /* [in] */
+ uint32_t nr_dwords;
+ uint64_t dwords;
+ uint32_t nr_batches;
+ uint64_t batches;
+ /* Surface[0] is always the target.*/
+ uint32_t nr_surfaces;
+ uint64_t surfaces;
+ uint32_t flags;
+ /* Needed when writing to the hw pushbuf from user space.
+ * This also will perform a fire.*/
+ uint32_t dma_cur;
+ /* [out] */
+ uint32_t dma_free;
+};
+
+struct drm_nouveau_vd_vpe_surface_query {
+ uint32_t surface_index;
+ uint32_t is_busy;
+};
+
#define DRM_NOUVEAU_GETPARAM 0x00
#define DRM_NOUVEAU_SETPARAM 0x01
#define DRM_NOUVEAU_CHANNEL_ALLOC 0x02
@@ -196,5 +242,9 @@ struct drm_nouveau_sarea {
#define DRM_NOUVEAU_GEM_CPU_PREP 0x42
#define DRM_NOUVEAU_GEM_CPU_FINI 0x43
#define DRM_NOUVEAU_GEM_INFO 0x44
+#define DRM_NOUVEAU_VD_VPE_CHANNEL_ALLOC 0x49
+#define DRM_NOUVEAU_VD_VPE_CHANNEL_FREE 0x50
+#define DRM_NOUVEAU_VD_VPE_PUSHBUF_FIRE 0x51
+#define DRM_NOUVEAU_VD_VPE_SURFACE_QUERY 0x52
#endif /* __NOUVEAU_DRM_H__ */
More information about the Nouveau
mailing list