Mesa (nvfx-next-6): nvfx: new 2D: Rewrite and unify miptree and transfer code (v3)

Luca Barbieri lb at kemper.freedesktop.org
Sun Apr 18 14:25:32 UTC 2010


Module: Mesa
Branch: nvfx-next-6
Commit: c6b3d5ff54a5d56f71a4be5dc401cd12d233099c
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=c6b3d5ff54a5d56f71a4be5dc401cd12d233099c

Author: Luca Barbieri <luca at luca-barbieri.com>
Date:   Fri Jan 15 10:11:11 2010 +0100

nvfx: new 2D: Rewrite and unify miptree and transfer code (v3)

Changes:
- Fixed some nv##_miptree that were not converted to nv04_miptree.
- Disable swizzling on non-RGBA 2D textures, since the current 2D
  code is mostly broken in those cases. A later patch will fix this.
  Thanks to Andrew Randrianasulu who reported this.
- Fix compressed texture transfers and hack around the current 2D
  code inability to copy compressed textures by using direct access.
  Thanks to Andrew Randrianasulu who reported this.
- Unify 2D surface functions too

This patch rewrites all the miptree layout and transfer code in the
nvfx driver.

The current code is broken in several ways:
1. 3D textures are laid out first by face, then by level, which is
incorrect
2. Cube maps should have 128-byte aligned faces
3. Swizzled textures have a strange alignment test that seems
unnecessary
4. We store the image_offsets for each face/slice but they can be
easily computed instead
5. "Swizzling" is not supported for compressed formats. They can be
"swizzled" but swizzling only means that there are no gaps (pitch is
level-dependant) and the layout is still linear
6. Swizzling is not supported for non-RGBA formats. All formats (except
possibly depth) can be swizzled according to my testing.

The miptree layout is rewritten based on my empirical testing, which I
posted in the "miptree findings" mail.
The image_offset array is removed, since it can be calculated with a
simple multiplication; the only array in the miptree structure is now
the one for mipmap level starts, which it seems cannot be easily
computed in constant time.

Also, we now directly store a nouveau_bo instead of a pipe_buffer in
the miptree structure, like nv50 does.

Support for render temporaries is removed, and will be readded in a
later patch.

Note that the current temporary code is broken, because it does not
copy the temporary back on render cache flushes.

---

 src/gallium/drivers/nvfx/Makefile          |    1 -
 src/gallium/drivers/nvfx/nv04_surface_2d.h |   22 +-
 src/gallium/drivers/nvfx/nv30_fragtex.c    |    7 +-
 src/gallium/drivers/nvfx/nv40_fragtex.c    |   12 +-
 src/gallium/drivers/nvfx/nvfx_context.c    |    2 +-
 src/gallium/drivers/nvfx/nvfx_miptree.c    |  348 ++++++++----------
 src/gallium/drivers/nvfx/nvfx_resource.h   |   12 +-
 src/gallium/drivers/nvfx/nvfx_screen.c     |    6 +-
 src/gallium/drivers/nvfx/nvfx_screen.h     |    6 +-
 src/gallium/drivers/nvfx/nvfx_state.h      |    1 -
 src/gallium/drivers/nvfx/nvfx_state_fb.c   |   12 +-
 src/gallium/drivers/nvfx/nvfx_surface.c    |  532 +++++++++++++++++++++++++---
 src/gallium/drivers/nvfx/nvfx_transfer.c   |   52 ++--
 13 files changed, 691 insertions(+), 322 deletions(-)

diff --git a/src/gallium/drivers/nvfx/Makefile b/src/gallium/drivers/nvfx/Makefile
index c1d57ca..fc68618 100644
--- a/src/gallium/drivers/nvfx/Makefile
+++ b/src/gallium/drivers/nvfx/Makefile
@@ -4,7 +4,6 @@ include $(TOP)/configs/current
 LIBNAME = nvfx
 
 C_SOURCES = \
-	nv04_surface_2d.c \
 	nvfx_buffer.c \
 	nvfx_context.c \
 	nvfx_clear.c \
diff --git a/src/gallium/drivers/nvfx/nv04_surface_2d.h b/src/gallium/drivers/nvfx/nv04_surface_2d.h
index b2b237b..0b849f0 100644
--- a/src/gallium/drivers/nvfx/nv04_surface_2d.h
+++ b/src/gallium/drivers/nvfx/nv04_surface_2d.h
@@ -1,13 +1,12 @@
 #ifndef __NV04_SURFACE_2D_H__
 #define __NV04_SURFACE_2D_H__
 
-struct nv04_surface {
+struct nvfx_surface {
 	struct pipe_surface base;
 	unsigned pitch;
-	struct nv04_surface* backing;
 };
 
-struct nv04_surface_2d {
+struct nvfx_surface_2d {
 	struct nouveau_notifier *ntfy;
 	struct nouveau_grobj *surf2d;
 	struct nouveau_grobj *swzsurf;
@@ -15,25 +14,8 @@ struct nv04_surface_2d {
 	struct nouveau_grobj *rect;
 	struct nouveau_grobj *blit;
 	struct nouveau_grobj *sifm;
-
-	struct nouveau_bo *(*buf)(struct pipe_surface *);
-
-	void (*copy)(struct nv04_surface_2d *, struct pipe_surface *dst,
-		     int dx, int dy, struct pipe_surface *src, int sx, int sy,
-		     int w, int h);
-	void (*fill)(struct nv04_surface_2d *, struct pipe_surface *dst,
-		     int dx, int dy, int w, int h, unsigned value);
 };
 
-struct nv04_surface_2d *
-nv04_surface_2d_init(struct nouveau_screen *screen);
-
-void
-nv04_surface_2d_takedown(struct nv04_surface_2d **);
-
-struct nv04_surface*
-nv04_surface_wrap_for_render(struct pipe_screen *pscreen, struct nv04_surface_2d* eng2d, struct nv04_surface* ns);
-
 #define NVFX_RESOURCE_FLAG_LINEAR (PIPE_RESOURCE_FLAG_DRV_PRIV << 0)
 
 #endif
diff --git a/src/gallium/drivers/nvfx/nv30_fragtex.c b/src/gallium/drivers/nvfx/nv30_fragtex.c
index dec073a..4828a04 100644
--- a/src/gallium/drivers/nvfx/nv30_fragtex.c
+++ b/src/gallium/drivers/nvfx/nv30_fragtex.c
@@ -92,9 +92,9 @@ void
 nv30_fragtex_set(struct nvfx_context *nvfx, int unit)
 {
 	struct nvfx_sampler_state *ps = nvfx->tex_sampler[unit];
-	struct nvfx_miptree *nv30mt = (struct nvfx_miptree *)nvfx->fragment_sampler_views[unit]->texture;
-	struct pipe_resource *pt = &nv30mt->base.base;
-	struct nouveau_bo *bo = nv30mt->base.bo;
+	struct nvfx_miptree *mt = (struct nvfx_miptree *)nvfx->fragment_sampler_views[unit]->texture;
+	struct pipe_resource *pt = &mt->base.base;
+	struct nouveau_bo *bo = mt->base.bo;
 	struct nv30_texture_format *tf;
 	struct nouveau_channel* chan = nvfx->screen->base.channel;
 	uint32_t txf, txs;
@@ -129,6 +129,7 @@ nv30_fragtex_set(struct nvfx_context *nvfx, int unit)
 		return;
 	}
 
+	/* FIXME: specify whether textures are swizzled or not somehow */
 	txs = tf->swizzle;
 
 	MARK_RING(chan, 9, 2);
diff --git a/src/gallium/drivers/nvfx/nv40_fragtex.c b/src/gallium/drivers/nvfx/nv40_fragtex.c
index 0068b1b..289070e 100644
--- a/src/gallium/drivers/nvfx/nv40_fragtex.c
+++ b/src/gallium/drivers/nvfx/nv40_fragtex.c
@@ -111,9 +111,9 @@ nv40_fragtex_set(struct nvfx_context *nvfx, int unit)
 {
 	struct nouveau_channel* chan = nvfx->screen->base.channel;
 	struct nvfx_sampler_state *ps = nvfx->tex_sampler[unit];
-	struct nvfx_miptree *nv40mt = (struct nvfx_miptree *)nvfx->fragment_sampler_views[unit]->texture;
-	struct nouveau_bo *bo = nv40mt->base.bo;
-	struct pipe_resource *pt = &nv40mt->base.base;
+	struct nvfx_miptree *mt = (struct nvfx_miptree *)nvfx->fragment_sampler_views[unit]->texture;
+	struct nouveau_bo *bo = mt->base.bo;
+	struct pipe_resource *pt = &mt->base.base;
 	struct nv40_texture_format *tf;
 
 	uint32_t txf, txs, txp;
@@ -148,10 +148,10 @@ nv40_fragtex_set(struct nvfx_context *nvfx, int unit)
 		return;
 	}
 
-	if (!(pt->flags & NVFX_RESOURCE_FLAG_LINEAR)) {
+	if (!mt->linear_pitch)
 		txp = 0;
-	} else {
-		txp  = nv40mt->level[0].pitch;
+	else {
+		txp  = mt->linear_pitch;
 		txf |= NV40TCL_TEX_FORMAT_LINEAR;
 	}
 
diff --git a/src/gallium/drivers/nvfx/nvfx_context.c b/src/gallium/drivers/nvfx/nvfx_context.c
index 1faa0af..d4a3ac1 100644
--- a/src/gallium/drivers/nvfx/nvfx_context.c
+++ b/src/gallium/drivers/nvfx/nvfx_context.c
@@ -65,7 +65,7 @@ nvfx_create(struct pipe_screen *pscreen, void *priv)
 	nvfx->is_nv4x = screen->is_nv4x;
 
 	nvfx_init_query_functions(nvfx);
-	nvfx_init_surface_functions(nvfx);
+	nvfx_init_surface_functions(&nvfx->pipe);
 	nvfx_init_state_functions(nvfx);
 	nvfx_init_resource_functions(&nvfx->pipe);
 
diff --git a/src/gallium/drivers/nvfx/nvfx_dummy.c b/src/gallium/drivers/nvfx/nvfx_dummy.c
new file mode 100644
index 0000000..e69de29
diff --git a/src/gallium/drivers/nvfx/nvfx_miptree.c b/src/gallium/drivers/nvfx/nvfx_miptree.c
index 97b2e5e..1aa9abf 100644
--- a/src/gallium/drivers/nvfx/nvfx_miptree.c
+++ b/src/gallium/drivers/nvfx/nvfx_miptree.c
@@ -2,69 +2,104 @@
 #include "pipe/p_defines.h"
 #include "util/u_inlines.h"
 #include "util/u_format.h"
+#include "util/u_memory.h"
 #include "util/u_math.h"
 
-#include "nvfx_context.h"
+#include "nouveau/nouveau_winsys.h"
+#include "nouveau/nouveau_screen.h"
+#include "nv04_surface_2d.h"
 #include "nvfx_resource.h"
 #include "nvfx_transfer.h"
-#include "nv04_surface_2d.h"
-
-/* Currently using separate implementations for buffers and textures,
- * even though gallium has a unified abstraction of these objects.
- * Eventually these should be combined, and mechanisms like transfers
- * be adapted to work for both buffer and texture uploads.
- */
+#include "state_tracker/drm_api.h"
 
 static void
-nvfx_miptree_layout(struct nvfx_miptree *mt)
+nvfx_miptree_choose_format(struct nvfx_miptree *mt)
 {
 	struct pipe_resource *pt = &mt->base.base;
-	uint width = pt->width0;
-	uint offset = 0;
-	int nr_faces, l, f;
-	uint wide_pitch = pt->bind & (PIPE_BIND_SAMPLER_VIEW |
-				      PIPE_BIND_DEPTH_STENCIL |
-				      PIPE_BIND_RENDER_TARGET |
-				      PIPE_BIND_DISPLAY_TARGET |
-				      PIPE_BIND_SCANOUT);
-
-	if (pt->target == PIPE_TEXTURE_CUBE) {
-		nr_faces = 6;
-	} else
-	if (pt->target == PIPE_TEXTURE_3D) {
-		nr_faces = pt->depth0;
-	} else {
-		nr_faces = 1;
-	}
+	unsigned uniform_pitch = 0;
+	static int no_swizzle = -1;
+	if(no_swizzle < 0)
+		no_swizzle = debug_get_bool_option("NOUVEAU_NO_SWIZZLE", FALSE);
 
-	for (l = 0; l <= pt->last_level; l++) {
-		if (wide_pitch && (pt->flags & NVFX_RESOURCE_FLAG_LINEAR))
-			mt->level[l].pitch = align(util_format_get_stride(pt->format, pt->width0), 64);
-		else
-			mt->level[l].pitch = util_format_get_stride(pt->format, width);
+	/* Non-uniform pitch textures must be POT */
+	if (pt->width0 & (pt->width0 - 1) ||
+	    pt->height0 & (pt->height0 - 1) ||
+	    pt->depth0 & (pt->depth0 - 1)
+	    )
+		uniform_pitch = 1;
+
+	/* All texture formats except compressed ones can be swizzled
+	 * Unsure about depth, let's prevent swizzling for now
+	 */
+	if (
+		(pt->bind & (PIPE_BIND_SCANOUT | PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_DEPTH_STENCIL))
+		|| (pt->_usage & PIPE_USAGE_DYNAMIC)
+		|| util_format_is_depth_or_stencil(pt->format)
+		|| util_format_is_s3tc(pt->format)
+		// disable swizzled textures on NV04-NV20 as our current drivers don't fully support that
+		// TODO: hardware should support them, fix the drivers and reenable
+		|| nouveau_screen(pt->screen)->device->chipset < 0x30
+		|| no_swizzle
+
+		// disable swizzling for non-RGBA 2D because our current 2D code can't handle anything
+		// else correctly, and even that is semi-broken
+		|| pt->target != PIPE_TEXTURE_2D
+		|| (pt->format != PIPE_FORMAT_B8G8R8A8_UNORM && pt->format != PIPE_FORMAT_B8G8R8X8_UNORM)
+	)
+		mt->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR;
+
+	/* non compressed formats with uniform pitch must be linear, and vice versa */
+	if(!util_format_is_s3tc(pt->format)
+		&& (uniform_pitch || mt->base.base.flags & NVFX_RESOURCE_FLAG_LINEAR))
+	{
+		mt->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR;
+		uniform_pitch = 1;
+	}
 
-		mt->level[l].image_offset =
-			CALLOC(nr_faces, sizeof(unsigned));
+	if(uniform_pitch)
+	{
+		mt->linear_pitch = util_format_get_stride(pt->format, pt->width0);
 
-		width  = u_minify(width, 1);
+		// TODO: this is only a constraint for rendering and not sampling, apparently
+		// we may also want this unconditionally
+		if(pt->bind & (PIPE_BIND_SAMPLER_VIEW |
+			PIPE_BIND_DEPTH_STENCIL |
+			PIPE_BIND_RENDER_TARGET |
+			PIPE_BIND_DISPLAY_TARGET |
+			PIPE_BIND_SCANOUT))
+			mt->linear_pitch = align(mt->linear_pitch, 64);
 	}
+	else
+		mt->linear_pitch = 0;
+}
+
+static unsigned
+nvfx_miptree_layout(struct nvfx_miptree *mt)
+{
+	struct pipe_resource* pt = &mt->base.base;
+        uint offset = 0;
+	for (unsigned l = 0; l <= pt->last_level; l++)
+	{
+		unsigned size;
+		mt->level_offset[l] = offset;
 
-	for (f = 0; f < nr_faces; f++) {
-		for (l = 0; l < pt->last_level; l++) {
-			mt->level[l].image_offset[f] = offset;
+		if(mt->linear_pitch)
+			size = mt->linear_pitch;
+		else
+			size = util_format_get_stride(pt->format, u_minify(pt->width0, l));
+		size = util_format_get_2d_size(pt->format, size, u_minify(pt->height0, l));
 
-			if (!(pt->flags & NVFX_RESOURCE_FLAG_LINEAR) &&
-			    u_minify(pt->width0, l + 1) > 1 && u_minify(pt->height0, l + 1) > 1)
-				offset += align(mt->level[l].pitch * u_minify(pt->height0, l), 64);
-			else
-				offset += mt->level[l].pitch * u_minify(pt->height0, l);
-		}
+		if(pt->target == PIPE_TEXTURE_3D)
+			size *= u_minify(pt->depth0, l);
 
-		mt->level[l].image_offset[f] = offset;
-		offset += mt->level[l].pitch * u_minify(pt->height0, l);
+		offset += size;
 	}
 
-	mt->total_size = offset;
+	offset = align(offset, 128);
+	mt->face_size = offset;
+	if(mt->base.base.target == PIPE_TEXTURE_CUBE)
+		offset += 5 * mt->face_size;
+	return offset;
 }
 
 static boolean
@@ -79,7 +114,7 @@ nvfx_miptree_get_handle(struct pipe_screen *pscreen,
 
 	return nouveau_screen_bo_get_handle(pscreen,
 					    mt->base.bo,
-					    mt->level[0].pitch,
+					    mt->linear_pitch,
 					    whandle);
 }
 
@@ -88,15 +123,7 @@ static void
 nvfx_miptree_destroy(struct pipe_screen *screen, struct pipe_resource *pt)
 {
 	struct nvfx_miptree *mt = (struct nvfx_miptree *)pt;
-	int l;
-
 	nouveau_screen_bo_release(screen, mt->base.bo);
-
-	for (l = 0; l <= pt->last_level; l++) {
-		if (mt->level[l].image_offset)
-			FREE(mt->level[l].image_offset);
-	}
-
 	FREE(mt);
 }
 
@@ -116,76 +143,52 @@ struct u_resource_vtbl nvfx_miptree_vtbl =
    u_default_transfer_inline_write    /* transfer_inline_write */
 };
 
+static struct nvfx_miptree*
+nvfx_miptree_create_skeleton(struct pipe_screen *pscreen, const struct pipe_resource *pt)
+{
+        struct nvfx_miptree *mt;
+
+        if(pt->width0 > 4096 || pt->height0 > 4096)
+                return NULL;
+
+        mt = CALLOC_STRUCT(nvfx_miptree);
+        if (!mt)
+                return NULL;
+
+        mt->base.base = *pt;
+        mt->base.vtbl = &nvfx_miptree_vtbl;
+        pipe_reference_init(&mt->base.base.reference, 1);
+        mt->base.base.screen = pscreen;
+
+        // set this to the actual capabilities, we use it to decide whether to use the 3D engine for copies
+        // TODO: is this the correct way to use Gallium?
+        mt->base.base.bind = pt->bind | PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_DEPTH_STENCIL;
+
+        // on our current driver (and the driver too), format support does not depend on geometry, so don't bother computing it
+        // TODO: may want to revisit this
+        if(!pscreen->is_format_supported(pscreen, pt->format, pt->target, PIPE_BIND_RENDER_TARGET, 0))
+                mt->base.base.bind &=~ PIPE_BIND_RENDER_TARGET;
+        if(!pscreen->is_format_supported(pscreen, pt->format, pt->target, PIPE_BIND_SAMPLER_VIEW, 0))
+                mt->base.base.bind &=~ PIPE_BIND_SAMPLER_VIEW;
+        if(!pscreen->is_format_supported(pscreen, pt->format, pt->target, PIPE_BIND_DEPTH_STENCIL, 0))
+                mt->base.base.bind &=~ PIPE_BIND_DEPTH_STENCIL;
+
+        return mt;
+}
 
 
 struct pipe_resource *
 nvfx_miptree_create(struct pipe_screen *pscreen, const struct pipe_resource *pt)
 {
-	struct nvfx_miptree *mt;
-	static int no_swizzle = -1;
-	if(no_swizzle < 0)
-		no_swizzle = debug_get_bool_option("NOUVEAU_NO_SWIZZLE", FALSE);
-
-	mt = CALLOC_STRUCT(nvfx_miptree);
-	if (!mt)
-		return NULL;
-
-	mt->base.base = *pt;
-	mt->base.vtbl = &nvfx_miptree_vtbl;
-	pipe_reference_init(&mt->base.base.reference, 1);
-	mt->base.base.screen = pscreen;
-
-	/* Swizzled textures must be POT */
-	if (pt->width0 & (pt->width0 - 1) ||
-	    pt->height0 & (pt->height0 - 1))
-		mt->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR;
-	else
-	if (pt->bind & (PIPE_BIND_SCANOUT |
-			PIPE_BIND_DISPLAY_TARGET |
-			PIPE_BIND_DEPTH_STENCIL))
-		mt->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR;
-	else
-	if (pt->_usage == PIPE_USAGE_DYNAMIC)
-		mt->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR;
-	else {
-		switch (pt->format) {
-		case PIPE_FORMAT_B5G6R5_UNORM:
-		case PIPE_FORMAT_L8A8_UNORM:
-		case PIPE_FORMAT_A8_UNORM:
-		case PIPE_FORMAT_L8_UNORM:
-		case PIPE_FORMAT_I8_UNORM:
-			/* TODO: we can actually swizzle these formats on nv40, we
-				are just preserving the pre-unification behavior.
-				The whole 2D code is going to be rewritten anyway. */
-			if(nvfx_screen(pscreen)->is_nv4x) {
-				mt->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR;
-				break;
-			}
-		/* TODO: Figure out which formats can be swizzled */
-		case PIPE_FORMAT_B8G8R8A8_UNORM:
-		case PIPE_FORMAT_B8G8R8X8_UNORM:
-		case PIPE_FORMAT_R16_SNORM:
-		{
-			if (no_swizzle)
-				mt->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR;
-			break;
-		}
-		default:
-			mt->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR;
-		}
-	}
-
-	/* apparently we can't render to swizzled surfaces smaller than 64 bytes, so make them linear.
-	 * If the user did not ask for a render target, they can still render to it, but it will cost them an extra copy.
-	 * This also happens for small mipmaps of large textures. */
-	if (pt->bind & PIPE_BIND_RENDER_TARGET &&
-	    util_format_get_stride(pt->format, pt->width0) < 64)
-		mt->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR;
+	struct nvfx_miptree* mt = nvfx_miptree_create_skeleton(pscreen, pt);
+	nvfx_miptree_choose_format(mt);
 
-	nvfx_miptree_layout(mt);
+        unsigned flags = NOUVEAU_BO_MAP | NOUVEAU_BO_GART;
+        unsigned size = nvfx_miptree_layout(mt);
 
-	mt->base.bo = nouveau_screen_bo_new(pscreen, 256,
-            pt->_usage, pt->bind, mt->total_size);
+        if (!(pt->_usage & PIPE_USAGE_DYNAMIC))
+                flags |= NOUVEAU_BO_VRAM;
+	nouveau_bo_new(nouveau_screen(pscreen)->device, flags, 256, size, &mt->base.bo);
 	if (!mt->base.bo) {
 		FREE(mt);
 		return NULL;
@@ -193,53 +196,28 @@ nvfx_miptree_create(struct pipe_screen *pscreen, const struct pipe_resource *pt)
 	return &mt->base.base;
 }
 
-
-
-
+// TODO: redo this, just calling miptree_layout
 struct pipe_resource *
-nvfx_miptree_from_handle(struct pipe_screen *pscreen,
-			 const struct pipe_resource *template,
-			 struct winsys_handle *whandle)
+nvfx_miptree_from_handle(struct pipe_screen *pscreen, const struct pipe_resource *template, struct winsys_handle *whandle)
 {
-	struct nvfx_miptree *mt;
-	unsigned stride;
-
-	/* Only supports 2D, non-mipmapped textures for the moment */
-	if (template->target != PIPE_TEXTURE_2D ||
-	    template->last_level != 0 ||
-	    template->depth0 != 1)
-		return NULL;
-
-	mt = CALLOC_STRUCT(nvfx_miptree);
-	if (!mt)
-		return NULL;
-
-	mt->base.bo = nouveau_screen_bo_from_handle(pscreen, whandle, &stride);
-	if (mt->base.bo == NULL) {
-		FREE(mt);
-		return NULL;
-	}
-
-	mt->base.base = *template;
-	mt->base.vtbl = &nvfx_miptree_vtbl;
-	pipe_reference_init(&mt->base.base.reference, 1);
-	mt->base.base.screen = pscreen;
-	mt->level[0].pitch = stride;
-	mt->level[0].image_offset = CALLOC(1, sizeof(unsigned));
-
-	/* Assume whoever created this buffer expects it to be linear for now */
-	mt->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR;
-
-	/* XXX: Need to adjust bo refcount??
-	 */
-	/* nouveau_bo_ref(bo, &mt->base.bo); */
-	return &mt->base.base;
+        struct nvfx_miptree* mt = nvfx_miptree_create_skeleton(pscreen, template);
+        if(whandle->stride) {
+		mt->linear_pitch = whandle->stride;
+		mt->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR;
+        } else
+		nvfx_miptree_choose_format(mt);
+
+        nvfx_miptree_layout(mt);
+
+        unsigned stride;
+        mt->base.bo = nouveau_screen_bo_from_handle(pscreen, whandle, &stride);
+        if (mt->base.bo == NULL) {
+                FREE(mt);
+                return NULL;
+        }
+        return &mt->base.base;
 }
 
-
-
-
-
 /* Surface helpers, not strictly required to implement the resource vtbl:
  */
 struct pipe_surface *
@@ -248,9 +226,9 @@ nvfx_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_resource *pt,
 			 unsigned flags)
 {
 	struct nvfx_miptree *mt = (struct nvfx_miptree *)pt;
-	struct nv04_surface *ns;
+	struct nvfx_surface *ns;
 
-	ns = CALLOC_STRUCT(nv04_surface);
+	ns = CALLOC_STRUCT(nvfx_surface);
 	if (!ns)
 		return NULL;
 	pipe_resource_reference(&ns->base.texture, pt);
@@ -262,33 +240,18 @@ nvfx_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_resource *pt,
 	ns->base.face = face;
 	ns->base.level = level;
 	ns->base.zslice = zslice;
-	ns->pitch = mt->level[level].pitch;
-
-	if (pt->target == PIPE_TEXTURE_CUBE) {
-		ns->base.offset = mt->level[level].image_offset[face];
-	} else
-	if (pt->target == PIPE_TEXTURE_3D) {
-		ns->base.offset = mt->level[level].image_offset[zslice];
-	} else {
-		ns->base.offset = mt->level[level].image_offset[0];
-	}
 
-	/* create a linear temporary that we can render into if
-	 * necessary.
-	 *
-	 * Note that ns->pitch is always a multiple of 64 for linear
-	 * surfaces and swizzled surfaces are POT, so ns->pitch & 63
-	 * is equivalent to (ns->pitch < 64 && swizzled)
-	 */
-
-	if ((ns->pitch & 63) && 
-	    (ns->base.usage & PIPE_BIND_RENDER_TARGET))
-	{
-		struct nv04_surface_2d* eng2d  =
-			((struct nvfx_screen*)pscreen)->eng2d;
+	if(mt->linear_pitch)
+		ns->pitch = mt->linear_pitch;
+	else
+		ns->pitch = util_format_get_stride(ns->base.format, ns->base.width);
 
-		ns = nv04_surface_wrap_for_render(pscreen, eng2d, ns);
-	}
+	ns->base.offset = mt->level_offset[level];
+	if (pt->target == PIPE_TEXTURE_CUBE)
+		ns->base.offset += mt->face_size * face;
+	else if (pt->target == PIPE_TEXTURE_3D && mt->linear_pitch)
+		ns->base.offset += zslice
+		* util_format_get_2d_size(ns->base.format, (mt->linear_pitch ? mt->linear_pitch : util_format_get_stride(ns->base.format, ns->base.width)),  ns->base.height);
 
 	return &ns->base;
 }
@@ -296,15 +259,6 @@ nvfx_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_resource *pt,
 void
 nvfx_miptree_surface_del(struct pipe_surface *ps)
 {
-	struct nv04_surface* ns = (struct nv04_surface*)ps;
-	if(ns->backing)
-	{
-		struct nvfx_screen* screen = (struct nvfx_screen*)ps->texture->screen;
-		if(ns->backing->base.usage & PIPE_BIND_BLIT_DESTINATION)
-			screen->eng2d->copy(screen->eng2d, &ns->backing->base, 0, 0, ps, 0, 0, ns->base.width, ns->base.height);
-		nvfx_miptree_surface_del(&ns->backing->base);
-	}
-
 	pipe_resource_reference(&ps->texture, NULL);
 	FREE(ps);
 }
diff --git a/src/gallium/drivers/nvfx/nvfx_resource.h b/src/gallium/drivers/nvfx/nvfx_resource.h
index a68c14c..73d4019 100644
--- a/src/gallium/drivers/nvfx/nvfx_resource.h
+++ b/src/gallium/drivers/nvfx/nvfx_resource.h
@@ -23,15 +23,11 @@ struct nvfx_resource {
 #define NVFX_MAX_TEXTURE_LEVELS  16
 
 struct nvfx_miptree {
-	struct nvfx_resource base;
-	uint total_size;
+        struct nvfx_resource base;
 
-	struct {
-		uint pitch;
-		uint *image_offset;
-	} level[NVFX_MAX_TEXTURE_LEVELS];
-
-	unsigned image_nr;
+        unsigned linear_pitch; /* for linear textures, 0 for swizzled and compressed textures with level-dependent minimal pitch */
+        unsigned face_size; /* 128-byte aligned face/total size */
+        unsigned level_offset[NVFX_MAX_TEXTURE_LEVELS];
 };
 
 static INLINE 
diff --git a/src/gallium/drivers/nvfx/nvfx_screen.c b/src/gallium/drivers/nvfx/nvfx_screen.c
index 04b456d..25d7520 100644
--- a/src/gallium/drivers/nvfx/nvfx_screen.c
+++ b/src/gallium/drivers/nvfx/nvfx_screen.c
@@ -172,7 +172,6 @@ nvfx_screen_surface_format_supported(struct pipe_screen *pscreen,
 	return FALSE;
 }
 
-
 static void
 nvfx_screen_destroy(struct pipe_screen *pscreen)
 {
@@ -184,7 +183,7 @@ nvfx_screen_destroy(struct pipe_screen *pscreen)
 	nouveau_notifier_free(&screen->query);
 	nouveau_notifier_free(&screen->sync);
 	nouveau_grobj_free(&screen->eng3d);
-	nv04_surface_2d_takedown(&screen->eng2d);
+	nvfx_screen_surface_takedown(pscreen);
 
 	nouveau_screen_fini(&screen->base);
 
@@ -389,8 +388,7 @@ nvfx_screen_create(struct pipe_winsys *ws, struct nouveau_device *dev)
 	}
 
 	/* 2D engine setup */
-	screen->eng2d = nv04_surface_2d_init(&screen->base);
-	screen->eng2d->buf = nvfx_surface_buffer;
+	nvfx_screen_surface_init(pscreen);
 
 	/* Notifier for sync purposes */
 	ret = nouveau_notifier_alloc(chan, 0xbeef0301, 1, &screen->sync);
diff --git a/src/gallium/drivers/nvfx/nvfx_screen.h b/src/gallium/drivers/nvfx/nvfx_screen.h
index 127d891..6377194 100644
--- a/src/gallium/drivers/nvfx/nvfx_screen.h
+++ b/src/gallium/drivers/nvfx/nvfx_screen.h
@@ -19,7 +19,6 @@ struct nvfx_screen {
 	unsigned index_buffer_reloc_flags;
 
 	/* HW graphics objects */
-	struct nv04_surface_2d *eng2d;
 	struct nouveau_grobj *eng3d;
 	struct nouveau_notifier *sync;
 
@@ -31,6 +30,8 @@ struct nvfx_screen {
 	/* Vtxprog resources */
 	struct nouveau_resource *vp_exec_heap;
 	struct nouveau_resource *vp_data_heap;
+
+	struct nvfx_surface_2d eng2d;
 };
 
 static INLINE struct nvfx_screen *
@@ -39,4 +40,7 @@ nvfx_screen(struct pipe_screen *screen)
 	return (struct nvfx_screen *)screen;
 }
 
+int nvfx_screen_surface_init(struct pipe_screen *pscreen);
+void nvfx_screen_surface_takedown(struct pipe_screen *pscreen);
+
 #endif
diff --git a/src/gallium/drivers/nvfx/nvfx_state.h b/src/gallium/drivers/nvfx/nvfx_state.h
index 9ceb257..2f09d77 100644
--- a/src/gallium/drivers/nvfx/nvfx_state.h
+++ b/src/gallium/drivers/nvfx/nvfx_state.h
@@ -73,5 +73,4 @@ struct nvfx_fragment_program {
 	struct nvfx_fragment_program_bo* fpbo;
 };
 
-
 #endif
diff --git a/src/gallium/drivers/nvfx/nvfx_state_fb.c b/src/gallium/drivers/nvfx/nvfx_state_fb.c
index 8c21598..e111d11 100644
--- a/src/gallium/drivers/nvfx/nvfx_state_fb.c
+++ b/src/gallium/drivers/nvfx/nvfx_state_fb.c
@@ -2,8 +2,6 @@
 #include "nvfx_resource.h"
 #include "nouveau/nouveau_util.h"
 
-
-
 void
 nvfx_state_framebuffer_validate(struct nvfx_context *nvfx)
 {
@@ -29,9 +27,9 @@ nvfx_state_framebuffer_validate(struct nvfx_context *nvfx)
 			colour_format = fb->cbufs[i]->format;
 
 		rt_enable |= (NV34TCL_RT_ENABLE_COLOR0 << i);
-		nvfx->hw_rt[i].bo = nvfx_surface_buffer(fb->cbufs[i]);
+		nvfx->hw_rt[i].bo = ((struct nvfx_miptree*)fb->cbufs[i]->texture)->base.bo;
 		nvfx->hw_rt[i].offset = fb->cbufs[i]->offset;
-		nvfx->hw_rt[i].pitch = ((struct nv04_surface *)fb->cbufs[i])->pitch;
+		nvfx->hw_rt[i].pitch = ((struct nvfx_surface *)fb->cbufs[i])->pitch;
 	}
 	for(; i < 4; ++i)
 		nvfx->hw_rt[i].bo = 0;
@@ -42,9 +40,9 @@ nvfx_state_framebuffer_validate(struct nvfx_context *nvfx)
 
 	if (fb->zsbuf) {
 		zeta_format = fb->zsbuf->format;
-		nvfx->hw_zeta.bo = nvfx_surface_buffer(fb->zsbuf);
+		nvfx->hw_zeta.bo = ((struct nvfx_miptree*)fb->zsbuf->texture)->base.bo;
 		nvfx->hw_zeta.offset = fb->zsbuf->offset;
-		nvfx->hw_zeta.pitch = ((struct nv04_surface *)fb->zsbuf)->pitch;
+		nvfx->hw_zeta.pitch = ((struct nvfx_surface *)fb->zsbuf)->pitch;
 	}
 	else
 		nvfx->hw_zeta.bo = 0;
@@ -67,7 +65,7 @@ nvfx_state_framebuffer_validate(struct nvfx_context *nvfx)
 		depth_only = 1;
 
 		/* Render to depth buffer only */
-		if (!(fb->zsbuf->texture->_usage & NVFX_RESOURCE_FLAG_LINEAR)) {
+		if (!(fb->zsbuf->texture->flags & NVFX_RESOURCE_FLAG_LINEAR)) {
 			assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1)));
 
 			rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED |
diff --git a/src/gallium/drivers/nvfx/nvfx_surface.c b/src/gallium/drivers/nvfx/nvfx_surface.c
index 2e11565..21ab48b 100644
--- a/src/gallium/drivers/nvfx/nvfx_surface.c
+++ b/src/gallium/drivers/nvfx/nvfx_surface.c
@@ -1,60 +1,506 @@
+#include <errno.h>
+#include "pipe/p_context.h"
+#include "pipe/p_format.h"
+#include "util/u_format.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
 
-/**************************************************************************
- *
- * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- **************************************************************************/
-
+#include "nouveau/nouveau_winsys.h"
+#include "nouveau/nouveau_util.h"
+#include "nouveau/nouveau_screen.h"
 #include "nvfx_context.h"
-#include "pipe/p_defines.h"
-#include "util/u_inlines.h"
+#include "nvfx_screen.h"
+#include "nv04_surface_2d.h"
+#include "nvfx_resource.h"
+
+static INLINE int
+nvfx_surface_format(enum pipe_format format)
+{
+	switch (format) {
+	case PIPE_FORMAT_A8_UNORM:
+	case PIPE_FORMAT_L8_UNORM:
+	case PIPE_FORMAT_I8_UNORM:
+		return NV04_CONTEXT_SURFACES_2D_FORMAT_Y8;
+	case PIPE_FORMAT_R16_SNORM:
+	case PIPE_FORMAT_B5G6R5_UNORM:
+	case PIPE_FORMAT_Z16_UNORM:
+	case PIPE_FORMAT_L8A8_UNORM:
+		return NV04_CONTEXT_SURFACES_2D_FORMAT_R5G6B5;
+	case PIPE_FORMAT_B8G8R8X8_UNORM:
+	case PIPE_FORMAT_B8G8R8A8_UNORM:
+		return NV04_CONTEXT_SURFACES_2D_FORMAT_A8R8G8B8;
+	case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
+	case PIPE_FORMAT_X8Z24_UNORM:
+		return NV04_CONTEXT_SURFACES_2D_FORMAT_Y32;
+	default:
+		return -1;
+	}
+}
+
+static INLINE int
+nv04_rect_format(enum pipe_format format)
+{
+	switch (format) {
+	case PIPE_FORMAT_A8_UNORM:
+		return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8;
+	case PIPE_FORMAT_B5G6R5_UNORM:
+	case PIPE_FORMAT_L8A8_UNORM:
+	case PIPE_FORMAT_Z16_UNORM:
+		return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A16R5G6B5;
+	case PIPE_FORMAT_B8G8R8X8_UNORM:
+	case PIPE_FORMAT_B8G8R8A8_UNORM:
+	case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
+	case PIPE_FORMAT_X8Z24_UNORM:
+		return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8;
+	default:
+		return -1;
+	}
+}
+
+static INLINE int
+nv04_scaled_image_format(enum pipe_format format)
+{
+	switch (format) {
+	case PIPE_FORMAT_A8_UNORM:
+	case PIPE_FORMAT_L8_UNORM:
+	case PIPE_FORMAT_I8_UNORM:
+		return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_Y8;
+	case PIPE_FORMAT_B5G5R5A1_UNORM:
+		return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A1R5G5B5;
+	case PIPE_FORMAT_B8G8R8A8_UNORM:
+		return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A8R8G8B8;
+	case PIPE_FORMAT_B8G8R8X8_UNORM:
+		return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_X8R8G8B8;
+	case PIPE_FORMAT_B5G6R5_UNORM:
+	case PIPE_FORMAT_R16_SNORM:
+	case PIPE_FORMAT_L8A8_UNORM:
+		return NV03_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_R5G6B5;
+	default:
+		return -1;
+	}
+}
+
+static INLINE unsigned
+nv04_swizzle_bits_square(unsigned x, unsigned y)
+{
+	unsigned u = (x & 0x001) << 0 |
+	             (x & 0x002) << 1 |
+	             (x & 0x004) << 2 |
+	             (x & 0x008) << 3 |
+	             (x & 0x010) << 4 |
+	             (x & 0x020) << 5 |
+	             (x & 0x040) << 6 |
+	             (x & 0x080) << 7 |
+	             (x & 0x100) << 8 |
+	             (x & 0x200) << 9 |
+	             (x & 0x400) << 10 |
+	             (x & 0x800) << 11;
+
+	unsigned v = (y & 0x001) << 1 |
+	             (y & 0x002) << 2 |
+	             (y & 0x004) << 3 |
+	             (y & 0x008) << 4 |
+	             (y & 0x010) << 5 |
+	             (y & 0x020) << 6 |
+	             (y & 0x040) << 7 |
+	             (y & 0x080) << 8 |
+	             (y & 0x100) << 9 |
+	             (y & 0x200) << 10 |
+	             (y & 0x400) << 11 |
+	             (y & 0x800) << 12;
+	return v | u;
+}
+
+/* rectangular swizzled textures are linear concatenations of swizzled square tiles */
+static INLINE unsigned
+nv04_swizzle_bits(unsigned x, unsigned y, unsigned w, unsigned h)
+{
+	unsigned s = MIN2(w, h);
+	unsigned m = s - 1;
+	return (((x | y) & ~m) * s) | nv04_swizzle_bits_square(x & m, y & m);
+}
+
+static int
+nvfx_surface_copy_swizzle(struct nvfx_surface_2d *ctx,
+			  struct pipe_surface *dst, int dx, int dy,
+			  struct pipe_surface *src, int sx, int sy,
+			  int w, int h)
+{
+	struct nouveau_channel *chan = ctx->swzsurf->channel;
+	struct nouveau_grobj *swzsurf = ctx->swzsurf;
+	struct nouveau_grobj *sifm = ctx->sifm;
+	struct nouveau_bo *src_bo = ((struct nvfx_miptree*)src->texture)->base.bo;
+	struct nouveau_bo *dst_bo = ((struct nvfx_miptree*)dst->texture)->base.bo;
+	const unsigned src_pitch = ((struct nvfx_surface *)src)->pitch;
+        /* Max width & height may not be the same on all HW, but must be POT */
+	const unsigned max_w = 1024;
+	const unsigned max_h = 1024;
+	unsigned sub_w = w > max_w ? max_w : w;
+	unsigned sub_h = h > max_h ? max_h : h;
+	unsigned x;
+	unsigned y;
+
+        /* Swizzled surfaces must be POT  */
+	assert(util_is_pot(dst->width) && util_is_pot(dst->height));
+
+        /* If area is too large to copy in one shot we must copy it in POT chunks to meet alignment requirements */
+	assert(sub_w == w || util_is_pot(sub_w));
+	assert(sub_h == h || util_is_pot(sub_h));
+
+	MARK_RING (chan, 8 + ((w+sub_w)/sub_w)*((h+sub_h)/sub_h)*17, 2 +
+			 ((w+sub_w)/sub_w)*((h+sub_h)/sub_h)*2);
+
+	BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_DMA_IMAGE, 1);
+	OUT_RELOCo(chan, dst_bo,
+	                 NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+
+	BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_FORMAT, 1);
+	OUT_RING  (chan, nvfx_surface_format(dst->format) |
+	                 log2i(dst->width) << NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_U_SHIFT |
+	                 log2i(dst->height) << NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_V_SHIFT);
+
+	BEGIN_RING(chan, sifm, NV03_SCALED_IMAGE_FROM_MEMORY_DMA_IMAGE, 1);
+	OUT_RELOCo(chan, src_bo,
+	                 NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+	BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_SURFACE, 1);
+	OUT_RING  (chan, swzsurf->handle);
+
+	for (y = 0; y < h; y += sub_h) {
+	  sub_h = MIN2(sub_h, h - y);
+
+	  for (x = 0; x < w; x += sub_w) {
+	    sub_w = MIN2(sub_w, w - x);
+
+	    assert(!(dst->offset & 63));
+
+	    BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_OFFSET, 1);
+	    OUT_RELOCl(chan, dst_bo, dst->offset,
+                             NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+
+	    BEGIN_RING(chan, sifm, NV05_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION, 9);
+	    OUT_RING  (chan, NV05_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_TRUNCATE);
+	    OUT_RING  (chan, nv04_scaled_image_format(src->format));
+	    OUT_RING  (chan, NV03_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY);
+	    OUT_RING  (chan, (x + dx) | ((y + dy) << NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_POINT_Y_SHIFT));
+	    OUT_RING  (chan, sub_h << NV03_SCALED_IMAGE_FROM_MEMORY_CLIP_SIZE_H_SHIFT | sub_w);
+	    OUT_RING  (chan, (x + dx) | ((y + dy) << NV03_SCALED_IMAGE_FROM_MEMORY_OUT_POINT_Y_SHIFT));
+	    OUT_RING  (chan, sub_h << NV03_SCALED_IMAGE_FROM_MEMORY_OUT_SIZE_H_SHIFT | sub_w);
+	    OUT_RING  (chan, 1 << 20);
+	    OUT_RING  (chan, 1 << 20);
+
+	    BEGIN_RING(chan, sifm, NV03_SCALED_IMAGE_FROM_MEMORY_SIZE, 4);
+	    OUT_RING  (chan, sub_h << NV03_SCALED_IMAGE_FROM_MEMORY_SIZE_H_SHIFT | sub_w);
+	    OUT_RING  (chan, src_pitch |
+			     NV03_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN_CENTER |
+			     NV03_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER_POINT_SAMPLE);
+	    OUT_RELOCl(chan, src_bo, src->offset + (sy+y) * src_pitch + (sx+x) * util_format_get_blocksize(src->texture->format),
+                             NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+	    OUT_RING  (chan, 0);
+	  }
+	}
+
+	return 0;
+}
+
+static int
+nvfx_surface_copy_m2mf(struct nvfx_surface_2d *ctx,
+		       struct pipe_surface *dst, int dx, int dy,
+		       struct pipe_surface *src, int sx, int sy, int w, int h)
+{
+	struct nouveau_channel *chan = ctx->m2mf->channel;
+	struct nouveau_grobj *m2mf = ctx->m2mf;
+	struct nouveau_bo *src_bo = ((struct nvfx_miptree*)src->texture)->base.bo;
+	struct nouveau_bo *dst_bo = ((struct nvfx_miptree*)dst->texture)->base.bo;
+	unsigned src_pitch = ((struct nvfx_surface *)src)->pitch;
+	unsigned dst_pitch = ((struct nvfx_surface *)dst)->pitch;
+	unsigned dst_offset = dst->offset + dy * dst_pitch +
+	                      dx * util_format_get_blocksize(dst->texture->format);
+	unsigned src_offset = src->offset + sy * src_pitch +
+	                      sx * util_format_get_blocksize(src->texture->format);
+
+	MARK_RING (chan, 3 + ((h / 2047) + 1) * 9, 2 + ((h / 2047) + 1) * 2);
+	BEGIN_RING(chan, m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_BUFFER_IN, 2);
+	OUT_RELOCo(chan, src_bo,
+		   NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+	OUT_RELOCo(chan, dst_bo,
+		   NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+
+	while (h) {
+		int count = (h > 2047) ? 2047 : h;
+
+		BEGIN_RING(chan, m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 8);
+		OUT_RELOCl(chan, src_bo, src_offset,
+			   NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+		OUT_RELOCl(chan, dst_bo, dst_offset,
+			   NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_WR);
+		OUT_RING  (chan, src_pitch);
+		OUT_RING  (chan, dst_pitch);
+		OUT_RING  (chan, w * util_format_get_blocksize(src->texture->format));
+		OUT_RING  (chan, count);
+		OUT_RING  (chan, 0x0101);
+		OUT_RING  (chan, 0);
+
+		h -= count;
+		src_offset += src_pitch * count;
+		dst_offset += dst_pitch * count;
+	}
+
+	return 0;
+}
+
+static int
+nvfx_surface_copy_blit(struct nvfx_surface_2d *ctx, struct pipe_surface *dst,
+		       int dx, int dy, struct pipe_surface *src, int sx, int sy,
+		       int w, int h)
+{
+	struct nouveau_channel *chan = ctx->surf2d->channel;
+	struct nouveau_grobj *surf2d = ctx->surf2d;
+	struct nouveau_grobj *blit = ctx->blit;
+	struct nouveau_bo *src_bo = ((struct nvfx_miptree*)src->texture)->base.bo;
+	struct nouveau_bo *dst_bo = ((struct nvfx_miptree*)dst->texture)->base.bo;
+	unsigned src_pitch = ((struct nvfx_surface *)src)->pitch;
+	unsigned dst_pitch = ((struct nvfx_surface *)dst)->pitch;
+	int format;
+
+	format = nvfx_surface_format(dst->format);
+	if (format < 0)
+		return 1;
+
+	MARK_RING (chan, 12, 4);
+	BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2);
+	OUT_RELOCo(chan, src_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+	OUT_RELOCo(chan, dst_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+	BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_FORMAT, 4);
+	OUT_RING  (chan, format);
+	OUT_RING  (chan, (dst_pitch << 16) | src_pitch);
+	OUT_RELOCl(chan, src_bo, src->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD);
+	OUT_RELOCl(chan, dst_bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+
+	BEGIN_RING(chan, blit, 0x0300, 3);
+	OUT_RING  (chan, (sy << 16) | sx);
+	OUT_RING  (chan, (dy << 16) | dx);
+	OUT_RING  (chan, ( h << 16) |  w);
+
+	return 0;
+}
 
 static void
-nvfx_surface_copy(struct pipe_context *pipe,
-		  struct pipe_surface *dest, unsigned destx, unsigned desty,
-		  struct pipe_surface *src, unsigned srcx, unsigned srcy,
-		  unsigned width, unsigned height)
+nvfx_surface_copy(struct pipe_context* pipe, struct pipe_surface *dst,
+		  unsigned dx, unsigned dy, struct pipe_surface *src, unsigned sx, unsigned sy,
+		  unsigned w, unsigned h)
 {
-	struct nvfx_context *nvfx = nvfx_context(pipe);
-	struct nv04_surface_2d *eng2d = nvfx->screen->eng2d;
+	struct nvfx_surface_2d *ctx = &nvfx_screen(pipe->screen)->eng2d;
+	//unsigned src_pitch = ((struct nvfx_surface *)src)->pitch;
+	//unsigned dst_pitch = ((struct nvfx_surface *)dst)->pitch;
+	int src_linear = src->texture->flags & NVFX_RESOURCE_FLAG_LINEAR;
+	int dst_linear = dst->texture->flags & NVFX_RESOURCE_FLAG_LINEAR;
+
+	assert(src->format == dst->format);
+
+	/* Setup transfer to swizzle the texture to vram if needed */
+        if (src_linear && !dst_linear && w > 1 && h > 1) {
+           nvfx_surface_copy_swizzle(ctx, dst, dx, dy, src, sx, sy, w, h);
+           return;
+        }
+
+        /* Use M2MF instead of the blitter since it always works
+         * Any possible performance drop is likely to be not very significant
+         * and dwarfed anyway by the current buffer management problems
+	 *
+	 * TODO: consider restarting to use the blitter
+         */
 
-	eng2d->copy(eng2d, dest, destx, desty, src, srcx, srcy, width, height);
+	if(0)
+		nvfx_surface_copy_blit(ctx, dst, dx, dy, src, sx, sy, w, h);
+
+        nvfx_surface_copy_m2mf(ctx, dst, dx, dy, src, sx, sy, w, h);
 }
 
 static void
-nvfx_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest,
-		  unsigned destx, unsigned desty, unsigned width,
-		  unsigned height, unsigned value)
+nvfx_surface_fill(struct pipe_context* pipe, struct pipe_surface *dst,
+		  unsigned dx, unsigned dy, unsigned w, unsigned h, unsigned value)
 {
-	struct nvfx_context *nvfx = nvfx_context(pipe);
-	struct nv04_surface_2d *eng2d = nvfx->screen->eng2d;
+	struct nvfx_surface_2d *ctx = &nvfx_screen(pipe->screen)->eng2d;
+	struct nouveau_channel *chan = ctx->surf2d->channel;
+	struct nouveau_grobj *surf2d = ctx->surf2d;
+	struct nouveau_grobj *rect = ctx->rect;
+	struct nouveau_bo *dst_bo = ((struct nvfx_miptree*)dst->texture)->base.bo;
+	unsigned dst_pitch = ((struct nvfx_surface *)dst)->pitch;
+	int cs2d_format, gdirect_format;
+
+	cs2d_format = nvfx_surface_format(dst->format);
+	assert(cs2d_format >= 0);
+
+	gdirect_format = nv04_rect_format(dst->format);
+	assert(gdirect_format >= 0);
+
+	MARK_RING (chan, 16, 4);
+	BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2);
+	OUT_RELOCo(chan, dst_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+	OUT_RELOCo(chan, dst_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+	BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_FORMAT, 4);
+	OUT_RING  (chan, cs2d_format);
+	OUT_RING  (chan, (dst_pitch << 16) | dst_pitch);
+	OUT_RELOCl(chan, dst_bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+	OUT_RELOCl(chan, dst_bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR);
+
+	BEGIN_RING(chan, rect, NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT, 1);
+	OUT_RING  (chan, gdirect_format);
+	BEGIN_RING(chan, rect, NV04_GDI_RECTANGLE_TEXT_COLOR1_A, 1);
+	OUT_RING  (chan, value);
+	BEGIN_RING(chan, rect,
+		   NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT(0), 2);
+	OUT_RING  (chan, (dx << 16) | dy);
+	OUT_RING  (chan, ( w << 16) |  h);
+}
 
-	eng2d->fill(eng2d, dest, destx, desty, width, height, value);
+static void
+nvfx_surface_2d_takedown(struct nvfx_surface_2d* ctx)
+{
+	nouveau_notifier_free(&ctx->ntfy);
+	nouveau_grobj_free(&ctx->m2mf);
+	nouveau_grobj_free(&ctx->surf2d);
+	nouveau_grobj_free(&ctx->swzsurf);
+	nouveau_grobj_free(&ctx->rect);
+	nouveau_grobj_free(&ctx->blit);
+	nouveau_grobj_free(&ctx->sifm);
 }
 
 void
-nvfx_init_surface_functions(struct nvfx_context *nvfx)
+nvfx_screen_surface_takedown(struct pipe_screen *pscreen)
+{
+	struct nvfx_surface_2d *ctx = &nvfx_screen(pscreen)->eng2d;
+	nvfx_surface_2d_takedown(ctx);
+}
+
+int
+nvfx_screen_surface_init(struct pipe_screen *pscreen)
+{
+	struct nvfx_surface_2d *ctx = &nvfx_screen(pscreen)->eng2d;
+	struct nouveau_channel *chan = nouveau_screen(pscreen)->channel;
+	unsigned handle = 0x88000000, class;
+	int ret;
+
+	ret = nouveau_notifier_alloc(chan, handle++, 1, &ctx->ntfy);
+	if (ret) {
+		nvfx_surface_2d_takedown(ctx);
+		return ret;
+	}
+
+	ret = nouveau_grobj_alloc(chan, handle++, 0x0039, &ctx->m2mf);
+	if (ret) {
+		nvfx_surface_2d_takedown(ctx);
+		return ret;
+	}
+
+	BEGIN_RING(chan, ctx->m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_NOTIFY, 1);
+	OUT_RING  (chan, ctx->ntfy->handle);
+
+	if (chan->device->chipset < 0x10)
+		class = NV04_CONTEXT_SURFACES_2D;
+	else
+		class = NV10_CONTEXT_SURFACES_2D;
+
+	ret = nouveau_grobj_alloc(chan, handle++, class, &ctx->surf2d);
+	if (ret) {
+		nvfx_surface_2d_takedown(ctx);
+		return ret;
+	}
+
+	BEGIN_RING(chan, ctx->surf2d,
+			 NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2);
+	OUT_RING  (chan, chan->vram->handle);
+	OUT_RING  (chan, chan->vram->handle);
+
+	if (chan->device->chipset < 0x10)
+		class = NV04_IMAGE_BLIT;
+	else
+		class = NV12_IMAGE_BLIT;
+
+	ret = nouveau_grobj_alloc(chan, handle++, class, &ctx->blit);
+	if (ret) {
+		nvfx_surface_2d_takedown(ctx);
+		return ret;
+	}
+
+	BEGIN_RING(chan, ctx->blit, NV01_IMAGE_BLIT_DMA_NOTIFY, 1);
+	OUT_RING  (chan, ctx->ntfy->handle);
+	BEGIN_RING(chan, ctx->blit, NV04_IMAGE_BLIT_SURFACE, 1);
+	OUT_RING  (chan, ctx->surf2d->handle);
+	BEGIN_RING(chan, ctx->blit, NV01_IMAGE_BLIT_OPERATION, 1);
+	OUT_RING  (chan, NV01_IMAGE_BLIT_OPERATION_SRCCOPY);
+
+	ret = nouveau_grobj_alloc(chan, handle++, NV04_GDI_RECTANGLE_TEXT,
+				  &ctx->rect);
+	if (ret) {
+		nvfx_surface_2d_takedown(ctx);
+		return ret;
+	}
+
+	BEGIN_RING(chan, ctx->rect, NV04_GDI_RECTANGLE_TEXT_DMA_NOTIFY, 1);
+	OUT_RING  (chan, ctx->ntfy->handle);
+	BEGIN_RING(chan, ctx->rect, NV04_GDI_RECTANGLE_TEXT_SURFACE, 1);
+	OUT_RING  (chan, ctx->surf2d->handle);
+	BEGIN_RING(chan, ctx->rect, NV04_GDI_RECTANGLE_TEXT_OPERATION, 1);
+	OUT_RING  (chan, NV04_GDI_RECTANGLE_TEXT_OPERATION_SRCCOPY);
+	BEGIN_RING(chan, ctx->rect,
+			 NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT, 1);
+	OUT_RING  (chan, NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT_LE);
+
+	switch (chan->device->chipset & 0xf0) {
+	case 0x00:
+	case 0x10:
+		class = NV04_SWIZZLED_SURFACE;
+		break;
+	case 0x20:
+		class = NV20_SWIZZLED_SURFACE;
+		break;
+	case 0x30:
+		class = NV30_SWIZZLED_SURFACE;
+		break;
+	case 0x40:
+	case 0x60:
+		class = NV40_SWIZZLED_SURFACE;
+		break;
+	default:
+		/* Famous last words: this really can't happen.. */
+		assert(0);
+		break;
+	}
+
+	ret = nouveau_grobj_alloc(chan, handle++, class, &ctx->swzsurf);
+	if (ret) {
+		nvfx_surface_2d_takedown(ctx);
+		return ret;
+	}
+
+	switch (chan->device->chipset & 0xf0) {
+	case 0x10:
+	case 0x20:
+		class = NV10_SCALED_IMAGE_FROM_MEMORY;
+		break;
+	case 0x30:
+		class = NV30_SCALED_IMAGE_FROM_MEMORY;
+		break;
+	case 0x40:
+	case 0x60:
+		class = NV40_SCALED_IMAGE_FROM_MEMORY;
+		break;
+	default:
+		class = NV04_SCALED_IMAGE_FROM_MEMORY;
+		break;
+	}
+
+	ret = nouveau_grobj_alloc(chan, handle++, class, &ctx->sifm);
+	if (ret) {
+		nvfx_surface_2d_takedown(ctx);
+		return ret;
+	}
+
+	return 0;
+}
+
+void nvfx_init_surface_functions(struct nvfx_context *nvfx)
 {
 	nvfx->pipe.surface_copy = nvfx_surface_copy;
 	nvfx->pipe.surface_fill = nvfx_surface_fill;
diff --git a/src/gallium/drivers/nvfx/nvfx_transfer.c b/src/gallium/drivers/nvfx/nvfx_transfer.c
index a776ab5..ec0e6d2 100644
--- a/src/gallium/drivers/nvfx/nvfx_transfer.c
+++ b/src/gallium/drivers/nvfx/nvfx_transfer.c
@@ -60,11 +60,7 @@ nvfx_miptree_transfer_new(struct pipe_context *pipe,
 	struct nvfx_miptree *mt = (struct nvfx_miptree *)pt;
 	struct nvfx_transfer *tx;
 	struct pipe_resource tx_tex_template, *tx_tex;
-	static int no_transfer = -1;
 	unsigned bind = nvfx_transfer_bind_flags(usage);
-	if(no_transfer < 0)
-		no_transfer = debug_get_bool_option("NOUVEAU_NO_TRANSFER", FALSE);
-
 
 	tx = CALLOC_STRUCT(nvfx_transfer);
 	if (!tx)
@@ -76,13 +72,14 @@ nvfx_miptree_transfer_new(struct pipe_context *pipe,
 
 	pipe_resource_reference(&tx->base.resource, pt);
 	tx->base.sr = sr;
+	tx->base.stride = mt->linear_pitch;
+	if(!tx->base.stride)
+		tx->base.stride = util_format_get_stride(pt->format, u_minify(pt->width0, sr.level));
 	tx->base.usage = usage;
 	tx->base.box = *box;
-	tx->base.stride = mt->level[sr.level].pitch;
 
 	/* Direct access to texture */
-	if ((pt->_usage == PIPE_USAGE_DYNAMIC ||
-	     no_transfer) &&
+	if ((util_format_is_s3tc(pt->format) || pt->_usage & PIPE_USAGE_DYNAMIC) &&
 	    pt->flags & NVFX_RESOURCE_FLAG_LINEAR)
 	{
 		tx->direct = true;
@@ -107,7 +104,9 @@ nvfx_miptree_transfer_new(struct pipe_context *pipe,
 		return NULL;
 	}
 
-	tx->base.stride = ((struct nvfx_miptree*)tx_tex)->level[0].pitch;
+	tx->base.stride = ((struct nvfx_miptree*)tx_tex)->linear_pitch;
+	if(!tx->base.stride)
+		tx->base.stride = util_format_get_stride(pt->format, box->width);
 
 	tx->surface = pscreen->get_tex_surface(pscreen, tx_tex,
 	                                       0, 0, 0,
@@ -123,20 +122,16 @@ nvfx_miptree_transfer_new(struct pipe_context *pipe,
 	}
 
 	if (usage & PIPE_TRANSFER_READ) {
-		struct nvfx_screen *nvscreen = nvfx_screen(pscreen);
 		struct pipe_surface *src;
 
 		src = pscreen->get_tex_surface(pscreen, pt,
 	                                       sr.face, sr.level, box->z,
 	                                       PIPE_BIND_BLIT_SOURCE);
 
-		/* TODO: Check if SIFM can deal with x,y,w,h when swizzling */
-		/* TODO: Check if SIFM can un-swizzle */
-		nvscreen->eng2d->copy(nvscreen->eng2d,
-		                      tx->surface, 0, 0,
-		                      src,
-				      box->x, box->y,
-		                      box->width, box->height);
+		pipe->surface_copy(pipe,
+		                  tx->surface, 0, 0,
+		                  src, box->x, box->y,
+		                  box->width, box->height);
 
 		pipe_surface_reference(&src, NULL);
 	}
@@ -152,7 +147,6 @@ nvfx_miptree_transfer_del(struct pipe_context *pipe,
 
 	if (!tx->direct && (ptx->usage & PIPE_TRANSFER_WRITE)) {
 		struct pipe_screen *pscreen = pipe->screen;
-		struct nvfx_screen *nvscreen = nvfx_screen(pscreen);
 		struct pipe_surface *dst;
 
 		dst = pscreen->get_tex_surface(pscreen,
@@ -162,11 +156,10 @@ nvfx_miptree_transfer_del(struct pipe_context *pipe,
 					       ptx->box.z,
 	                                       PIPE_BIND_BLIT_DESTINATION);
 
-		/* TODO: Check if SIFM can deal with x,y,w,h when swizzling */
-		nvscreen->eng2d->copy(nvscreen->eng2d,
-		                      dst, ptx->box.x, ptx->box.y,
-		                      tx->surface, 0, 0,
-		                      ptx->box.width, ptx->box.height);
+		pipe->surface_copy(pipe,
+		                  dst, tx->base.box.x, tx->base.box.y,
+		                  tx->surface, 0, 0,
+		                  tx->base.box.width, tx->base.box.height);
 
 		pipe_surface_reference(&dst, NULL);
 	}
@@ -179,27 +172,26 @@ nvfx_miptree_transfer_del(struct pipe_context *pipe,
 void *
 nvfx_miptree_transfer_map(struct pipe_context *pipe, struct pipe_transfer *ptx)
 {
-	struct pipe_screen *pscreen = pipe->screen;
 	struct nvfx_transfer *tx = (struct nvfx_transfer *)ptx;
-	struct nv04_surface *ns = (struct nv04_surface *)tx->surface;
+	struct nvfx_surface *ns = (struct nvfx_surface *)tx->surface;
 	struct nvfx_miptree *mt = (struct nvfx_miptree *)tx->surface->texture;
-	uint8_t *map = nouveau_screen_bo_map(pscreen, mt->base.bo,
+
+	uint8_t *map = nouveau_screen_bo_map(pipe->screen, mt->base.bo,
 					     nouveau_screen_transfer_flags(ptx->usage));
 
 	if(!tx->direct)
 		return map + ns->base.offset;
 	else
-		return (map + ns->base.offset + 
-			ptx->box.y * ns->pitch + 
-			ptx->box.x * util_format_get_blocksize(ptx->resource->format));
+		return map + ns->base.offset
+		+ util_format_get_2d_size(ns->base.format, ns->pitch, ptx->box.y)
+		+ util_format_get_stride(ptx->resource->format, ptx->box.x);
 }
 
 void
 nvfx_miptree_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *ptx)
 {
-	struct pipe_screen *pscreen = pipe->screen;
 	struct nvfx_transfer *tx = (struct nvfx_transfer *)ptx;
 	struct nvfx_miptree *mt = (struct nvfx_miptree *)tx->surface->texture;
 
-	nouveau_screen_bo_unmap(pscreen, mt->base.bo);
+	nouveau_screen_bo_unmap(pipe->screen, mt->base.bo);
 }




More information about the mesa-commit mailing list