[Mesa-dev] [PATCH] freedreno: gallium driver for adreno

Rob Clark robdclark at gmail.com
Mon Feb 25 16:59:05 PST 2013


From: Rob Clark <robclark at freedesktop.org>

Currently works on a220.  Others in the a2xx family look pretty similar
and should be pretty straightforward to support with the same driver.

The a3xx has a new shader ISA, and while many registers appear similar,
the register addresses have been completely shuffled around.  I am not
sure yet whether it is best to support with the same driver, but
different compiler, or whether it should be split into a different
driver.

v1: original
v2: build file updates from review comments, and remove GPL licensed
    header files from msm kernel

Signed-off-by: Rob Clark <robclark at freedesktop.org>
---
 configure.ac                                       |   14 +-
 src/gallium/drivers/freedreno/Makefile.am          |   32 +
 src/gallium/drivers/freedreno/disasm.c             |  632 +++++++++++
 src/gallium/drivers/freedreno/disasm.h             |   40 +
 src/gallium/drivers/freedreno/freedreno_a2xx_reg.h | 1172 ++++++++++++++++++++
 src/gallium/drivers/freedreno/freedreno_blend.c    |  175 +++
 src/gallium/drivers/freedreno/freedreno_blend.h    |   44 +
 src/gallium/drivers/freedreno/freedreno_clear.c    |  200 ++++
 src/gallium/drivers/freedreno/freedreno_clear.h    |   37 +
 src/gallium/drivers/freedreno/freedreno_compiler.c | 1096 ++++++++++++++++++
 src/gallium/drivers/freedreno/freedreno_compiler.h |   38 +
 src/gallium/drivers/freedreno/freedreno_context.c  |  200 ++++
 src/gallium/drivers/freedreno/freedreno_context.h  |  184 +++
 src/gallium/drivers/freedreno/freedreno_fence.c    |   52 +
 src/gallium/drivers/freedreno/freedreno_fence.h    |   65 ++
 src/gallium/drivers/freedreno/freedreno_gmem.c     |  479 ++++++++
 src/gallium/drivers/freedreno/freedreno_gmem.h     |   37 +
 src/gallium/drivers/freedreno/freedreno_pm4.h      |   86 ++
 src/gallium/drivers/freedreno/freedreno_program.c  |  503 +++++++++
 src/gallium/drivers/freedreno/freedreno_program.h  |   82 ++
 .../drivers/freedreno/freedreno_rasterizer.c       |  151 +++
 .../drivers/freedreno/freedreno_rasterizer.h       |   48 +
 src/gallium/drivers/freedreno/freedreno_resource.c |  248 +++++
 src/gallium/drivers/freedreno/freedreno_resource.h |   49 +
 src/gallium/drivers/freedreno/freedreno_screen.c   |  471 ++++++++
 src/gallium/drivers/freedreno/freedreno_screen.h   |   70 ++
 src/gallium/drivers/freedreno/freedreno_state.c    |  639 +++++++++++
 src/gallium/drivers/freedreno/freedreno_state.h    |   53 +
 src/gallium/drivers/freedreno/freedreno_surface.c  |   73 ++
 src/gallium/drivers/freedreno/freedreno_surface.h  |   54 +
 src/gallium/drivers/freedreno/freedreno_texture.c  |  286 +++++
 src/gallium/drivers/freedreno/freedreno_texture.h  |   61 +
 src/gallium/drivers/freedreno/freedreno_util.c     |  351 ++++++
 src/gallium/drivers/freedreno/freedreno_util.h     |  124 +++
 src/gallium/drivers/freedreno/freedreno_vbo.c      |  226 ++++
 src/gallium/drivers/freedreno/freedreno_vbo.h      |   42 +
 src/gallium/drivers/freedreno/freedreno_zsa.c      |  144 +++
 src/gallium/drivers/freedreno/freedreno_zsa.h      |   60 +
 src/gallium/drivers/freedreno/instr.h              |  386 +++++++
 src/gallium/drivers/freedreno/ir.c                 |  701 ++++++++++++
 src/gallium/drivers/freedreno/ir.h                 |  243 ++++
 src/gallium/targets/dri-freedreno/Makefile.am      |   71 ++
 src/gallium/targets/dri-freedreno/target.c         |   20 +
 src/gallium/targets/egl-static/Makefile.am         |    9 +
 src/gallium/targets/egl-static/egl_pipe.c          |   22 +
 src/gallium/winsys/freedreno/drm/.gitignore        |    1 +
 src/gallium/winsys/freedreno/drm/Makefile.am       |   32 +
 .../winsys/freedreno/drm/freedreno_drm_public.h    |    9 +
 .../winsys/freedreno/drm/freedreno_drm_winsys.c    |   18 +
 49 files changed, 9829 insertions(+), 1 deletion(-)
 create mode 100644 src/gallium/drivers/freedreno/Makefile.am
 create mode 100644 src/gallium/drivers/freedreno/disasm.c
 create mode 100644 src/gallium/drivers/freedreno/disasm.h
 create mode 100644 src/gallium/drivers/freedreno/freedreno_a2xx_reg.h
 create mode 100644 src/gallium/drivers/freedreno/freedreno_blend.c
 create mode 100644 src/gallium/drivers/freedreno/freedreno_blend.h
 create mode 100644 src/gallium/drivers/freedreno/freedreno_clear.c
 create mode 100644 src/gallium/drivers/freedreno/freedreno_clear.h
 create mode 100644 src/gallium/drivers/freedreno/freedreno_compiler.c
 create mode 100644 src/gallium/drivers/freedreno/freedreno_compiler.h
 create mode 100644 src/gallium/drivers/freedreno/freedreno_context.c
 create mode 100644 src/gallium/drivers/freedreno/freedreno_context.h
 create mode 100644 src/gallium/drivers/freedreno/freedreno_fence.c
 create mode 100644 src/gallium/drivers/freedreno/freedreno_fence.h
 create mode 100644 src/gallium/drivers/freedreno/freedreno_gmem.c
 create mode 100644 src/gallium/drivers/freedreno/freedreno_gmem.h
 create mode 100644 src/gallium/drivers/freedreno/freedreno_pm4.h
 create mode 100644 src/gallium/drivers/freedreno/freedreno_program.c
 create mode 100644 src/gallium/drivers/freedreno/freedreno_program.h
 create mode 100644 src/gallium/drivers/freedreno/freedreno_rasterizer.c
 create mode 100644 src/gallium/drivers/freedreno/freedreno_rasterizer.h
 create mode 100644 src/gallium/drivers/freedreno/freedreno_resource.c
 create mode 100644 src/gallium/drivers/freedreno/freedreno_resource.h
 create mode 100644 src/gallium/drivers/freedreno/freedreno_screen.c
 create mode 100644 src/gallium/drivers/freedreno/freedreno_screen.h
 create mode 100644 src/gallium/drivers/freedreno/freedreno_state.c
 create mode 100644 src/gallium/drivers/freedreno/freedreno_state.h
 create mode 100644 src/gallium/drivers/freedreno/freedreno_surface.c
 create mode 100644 src/gallium/drivers/freedreno/freedreno_surface.h
 create mode 100644 src/gallium/drivers/freedreno/freedreno_texture.c
 create mode 100644 src/gallium/drivers/freedreno/freedreno_texture.h
 create mode 100644 src/gallium/drivers/freedreno/freedreno_util.c
 create mode 100644 src/gallium/drivers/freedreno/freedreno_util.h
 create mode 100644 src/gallium/drivers/freedreno/freedreno_vbo.c
 create mode 100644 src/gallium/drivers/freedreno/freedreno_vbo.h
 create mode 100644 src/gallium/drivers/freedreno/freedreno_zsa.c
 create mode 100644 src/gallium/drivers/freedreno/freedreno_zsa.h
 create mode 100644 src/gallium/drivers/freedreno/instr.h
 create mode 100644 src/gallium/drivers/freedreno/ir.c
 create mode 100644 src/gallium/drivers/freedreno/ir.h
 create mode 100644 src/gallium/targets/dri-freedreno/Makefile.am
 create mode 100644 src/gallium/targets/dri-freedreno/target.c
 create mode 100644 src/gallium/winsys/freedreno/drm/.gitignore
 create mode 100644 src/gallium/winsys/freedreno/drm/Makefile.am
 create mode 100644 src/gallium/winsys/freedreno/drm/freedreno_drm_public.h
 create mode 100644 src/gallium/winsys/freedreno/drm/freedreno_drm_winsys.c

diff --git a/configure.ac b/configure.ac
index 16c2f8c..aeaa9c0 100644
--- a/configure.ac
+++ b/configure.ac
@@ -34,6 +34,7 @@ LIBDRM_RADEON_REQUIRED=2.4.42
 LIBDRM_INTEL_REQUIRED=2.4.38
 LIBDRM_NVVIEUX_REQUIRED=2.4.33
 LIBDRM_NOUVEAU_REQUIRED="2.4.33 libdrm >= 2.4.41"
+LIBDRM_FREEDRENO_REQUIRED=2.4.39
 DRI2PROTO_REQUIRED=2.6
 GLPROTO_REQUIRED=1.4.14
 LIBDRM_XORG_REQUIRED=2.4.24
@@ -659,7 +660,7 @@ GALLIUM_DRIVERS_DEFAULT="r300,r600,svga,swrast"
 AC_ARG_WITH([gallium-drivers],
     [AS_HELP_STRING([--with-gallium-drivers@<:@=DIRS...@:>@],
         [comma delimited Gallium drivers list, e.g.
-        "i915,nouveau,r300,r600,radeonsi,svga,swrast"
+        "i915,nouveau,r300,r600,radeonsi,freedreno,svga,swrast"
         @<:@default=r300,r600,svga,swrast@:>@])],
     [with_gallium_drivers="$withval"],
     [with_gallium_drivers="$GALLIUM_DRIVERS_DEFAULT"])
@@ -1880,6 +1881,13 @@ if test "x$with_gallium_drivers" != x; then
             GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS nouveau nv30 nv50 nvc0"
             gallium_check_st "nouveau/drm" "dri-nouveau" "xorg-nouveau" "" "xvmc-nouveau" "vdpau-nouveau"
             ;;
+        xfreedreno)
+            HAVE_GALLIUM_FREEDRENO=yes
+            PKG_CHECK_MODULES([FREEDRENO], [libdrm_freedreno >= $LIBDRM_FREEDRENO_REQUIRED])
+            gallium_require_drm_loader
+            GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS freedreno"
+            gallium_check_st "freedreno/drm" "dri-freedreno" "" "" "" ""
+            ;;
         xswrast)
             HAVE_GALLIUM_SOFTPIPE=yes
             GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS softpipe"
@@ -1960,6 +1968,7 @@ AM_CONDITIONAL(HAVE_GALLIUM_R300, test "x$HAVE_GALLIUM_R300" = xyes)
 AM_CONDITIONAL(HAVE_GALLIUM_R600, test "x$HAVE_GALLIUM_R600" = xyes)
 AM_CONDITIONAL(HAVE_GALLIUM_RADEONSI, test "x$HAVE_GALLIUM_RADEONSI" = xyes)
 AM_CONDITIONAL(HAVE_GALLIUM_NOUVEAU, test "x$HAVE_GALLIUM_NOUVEAU" = xyes)
+AM_CONDITIONAL(HAVE_GALLIUM_FREEDRENO, test "x$HAVE_GALLIUM_FREEDRENO" = xyes)
 AM_CONDITIONAL(HAVE_GALLIUM_SOFTPIPE, test "x$HAVE_GALLIUM_SOFTPIPE" = xyes)
 AM_CONDITIONAL(HAVE_GALLIUM_LLVMPIPE, test "x$HAVE_GALLIUM_LLVMPIPE" = xyes)
 
@@ -2083,6 +2092,7 @@ AC_CONFIG_FILES([Makefile
 		src/gallium/drivers/rbug/Makefile
 		src/gallium/drivers/softpipe/Makefile
 		src/gallium/drivers/svga/Makefile
+		src/gallium/drivers/freedreno/Makefile
 		src/gallium/drivers/trace/Makefile
 		src/gallium/state_trackers/Makefile
 		src/gallium/state_trackers/clover/Makefile
@@ -2103,6 +2113,7 @@ AC_CONFIG_FILES([Makefile
 		src/gallium/targets/dri-r300/Makefile
 		src/gallium/targets/dri-r600/Makefile
 		src/gallium/targets/dri-radeonsi/Makefile
+		src/gallium/targets/dri-freedreno/Makefile
 		src/gallium/targets/dri-swrast/Makefile
 		src/gallium/targets/dri-vmwgfx/Makefile
 		src/gallium/targets/egl-static/Makefile
@@ -2133,6 +2144,7 @@ AC_CONFIG_FILES([Makefile
 		src/gallium/winsys/nouveau/drm/Makefile
 		src/gallium/winsys/radeon/drm/Makefile
 		src/gallium/winsys/svga/drm/Makefile
+		src/gallium/winsys/freedreno/drm/Makefile
 		src/gallium/winsys/sw/Makefile
 		src/gallium/winsys/sw/dri/Makefile
 		src/gallium/winsys/sw/fbdev/Makefile
diff --git a/src/gallium/drivers/freedreno/Makefile.am b/src/gallium/drivers/freedreno/Makefile.am
new file mode 100644
index 0000000..9bb532d
--- /dev/null
+++ b/src/gallium/drivers/freedreno/Makefile.am
@@ -0,0 +1,32 @@
+include $(top_srcdir)/src/gallium/Automake.inc
+
+noinst_LTLIBRARIES = libfreedreno.la
+
+AM_CFLAGS = \
+	-Wno-packed-bitfield-compat \
+	-I$(top_srcdir)/src/gallium/drivers \
+	$(GALLIUM_CFLAGS) \
+	$(FREEDRENO_CFLAGS) \
+	$(PIC_FLAGS) \
+	$(VISIBILITY_CFLAGS)
+
+libfreedreno_la_SOURCES = \
+	freedreno_util.c \
+	freedreno_fence.c \
+	freedreno_resource.c \
+	freedreno_surface.c \
+	freedreno_vbo.c \
+	freedreno_blend.c \
+	freedreno_rasterizer.c \
+	freedreno_zsa.c \
+	freedreno_state.c \
+	freedreno_clear.c \
+	freedreno_program.c \
+	freedreno_texture.c \
+	freedreno_context.c \
+	freedreno_screen.c \
+	freedreno_gmem.c \
+	freedreno_compiler.c \
+	ir.c \
+	disasm.c
+
diff --git a/src/gallium/drivers/freedreno/disasm.c b/src/gallium/drivers/freedreno/disasm.c
new file mode 100644
index 0000000..ee14ced
--- /dev/null
+++ b/src/gallium/drivers/freedreno/disasm.c
@@ -0,0 +1,632 @@
+/*
+ * Copyright (c) 2012 Rob Clark <robdclark at gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <string.h>
+
+#include "disasm.h"
+#include "instr.h"
+
+static const char *levels[] = {
+		"\t",
+		"\t\t",
+		"\t\t\t",
+		"\t\t\t\t",
+		"\t\t\t\t\t",
+		"\t\t\t\t\t\t",
+		"\t\t\t\t\t\t\t",
+		"\t\t\t\t\t\t\t\t",
+		"\t\t\t\t\t\t\t\t\t",
+		"x",
+		"x",
+		"x",
+		"x",
+		"x",
+		"x",
+};
+
+static enum debug_t debug;
+
+/*
+ * ALU instructions:
+ */
+
+static const char chan_names[] = {
+		'x', 'y', 'z', 'w',
+		/* these only apply to FETCH dst's: */
+		'0', '1', '?', '_',
+};
+
+static void print_srcreg(uint32_t num, uint32_t type,
+		uint32_t swiz, uint32_t negate, uint32_t abs)
+{
+	if (negate)
+		printf("-");
+	if (abs)
+		printf("|");
+	printf("%c%u", type ? 'R' : 'C', num);
+	if (swiz) {
+		int i;
+		printf(".");
+		for (i = 0; i < 4; i++) {
+			printf("%c", chan_names[(swiz + i) & 0x3]);
+			swiz >>= 2;
+		}
+	}
+	if (abs)
+		printf("|");
+}
+
+static void print_dstreg(uint32_t num, uint32_t mask, uint32_t dst_exp)
+{
+	printf("%s%u", dst_exp ? "export" : "R", num);
+	if (mask != 0xf) {
+		int i;
+		printf(".");
+		for (i = 0; i < 4; i++) {
+			printf("%c", (mask & 0x1) ? chan_names[i] : '_');
+			mask >>= 1;
+		}
+	}
+}
+
+static void print_export_comment(uint32_t num, enum shader_t type)
+{
+	const char *name = NULL;
+	switch (type) {
+	case SHADER_VERTEX:
+		switch (num) {
+		case 62: name = "gl_Position";  break;
+		case 63: name = "gl_PointSize"; break;
+		}
+		break;
+	case SHADER_FRAGMENT:
+		switch (num) {
+		case 0:  name = "gl_FragColor"; break;
+		}
+		break;
+	}
+	/* if we had a symbol table here, we could look
+	 * up the name of the varying..
+	 */
+	if (name) {
+		printf("\t; %s", name);
+	}
+}
+
+struct {
+	uint32_t num_srcs;
+	const char *name;
+} vector_instructions[0x20] = {
+#define INSTR(opc, num_srcs) [opc] = { num_srcs, #opc }
+		INSTR(ADDv, 2),
+		INSTR(MULv, 2),
+		INSTR(MAXv, 2),
+		INSTR(MINv, 2),
+		INSTR(SETEv, 2),
+		INSTR(SETGTv, 2),
+		INSTR(SETGTEv, 2),
+		INSTR(SETNEv, 2),
+		INSTR(FRACv, 1),
+		INSTR(TRUNCv, 1),
+		INSTR(FLOORv, 1),
+		INSTR(MULADDv, 3),
+		INSTR(CNDEv, 3),
+		INSTR(CNDGTEv, 3),
+		INSTR(CNDGTv, 3),
+		INSTR(DOT4v, 2),
+		INSTR(DOT3v, 2),
+		INSTR(DOT2ADDv, 3),  // ???
+		INSTR(CUBEv, 2),
+		INSTR(MAX4v, 1),
+		INSTR(PRED_SETE_PUSHv, 2),
+		INSTR(PRED_SETNE_PUSHv, 2),
+		INSTR(PRED_SETGT_PUSHv, 2),
+		INSTR(PRED_SETGTE_PUSHv, 2),
+		INSTR(KILLEv, 2),
+		INSTR(KILLGTv, 2),
+		INSTR(KILLGTEv, 2),
+		INSTR(KILLNEv, 2),
+		INSTR(DSTv, 2),
+		INSTR(MOVAv, 1),
+}, scalar_instructions[0x40] = {
+		INSTR(ADDs, 1),
+		INSTR(ADD_PREVs, 1),
+		INSTR(MULs, 1),
+		INSTR(MUL_PREVs, 1),
+		INSTR(MUL_PREV2s, 1),
+		INSTR(MAXs, 1),
+		INSTR(MINs, 1),
+		INSTR(SETEs, 1),
+		INSTR(SETGTs, 1),
+		INSTR(SETGTEs, 1),
+		INSTR(SETNEs, 1),
+		INSTR(FRACs, 1),
+		INSTR(TRUNCs, 1),
+		INSTR(FLOORs, 1),
+		INSTR(EXP_IEEE, 1),
+		INSTR(LOG_CLAMP, 1),
+		INSTR(LOG_IEEE, 1),
+		INSTR(RECIP_CLAMP, 1),
+		INSTR(RECIP_FF, 1),
+		INSTR(RECIP_IEEE, 1),
+		INSTR(RECIPSQ_CLAMP, 1),
+		INSTR(RECIPSQ_FF, 1),
+		INSTR(RECIPSQ_IEEE, 1),
+		INSTR(MOVAs, 1),
+		INSTR(MOVA_FLOORs, 1),
+		INSTR(SUBs, 1),
+		INSTR(SUB_PREVs, 1),
+		INSTR(PRED_SETEs, 1),
+		INSTR(PRED_SETNEs, 1),
+		INSTR(PRED_SETGTs, 1),
+		INSTR(PRED_SETGTEs, 1),
+		INSTR(PRED_SET_INVs, 1),
+		INSTR(PRED_SET_POPs, 1),
+		INSTR(PRED_SET_CLRs, 1),
+		INSTR(PRED_SET_RESTOREs, 1),
+		INSTR(KILLEs, 1),
+		INSTR(KILLGTs, 1),
+		INSTR(KILLGTEs, 1),
+		INSTR(KILLNEs, 1),
+		INSTR(KILLONEs, 1),
+		INSTR(SQRT_IEEE, 1),
+		INSTR(MUL_CONST_0, 1),
+		INSTR(MUL_CONST_1, 1),
+		INSTR(ADD_CONST_0, 1),
+		INSTR(ADD_CONST_1, 1),
+		INSTR(SUB_CONST_0, 1),
+		INSTR(SUB_CONST_1, 1),
+		INSTR(SIN, 1),
+		INSTR(COS, 1),
+		INSTR(RETAIN_PREV, 1),
+#undef INSTR
+};
+
+static int disasm_alu(uint32_t *dwords, uint32_t alu_off,
+		int level, int sync, enum shader_t type)
+{
+	instr_alu_t *alu = (instr_alu_t *)dwords;
+
+	printf("%s", levels[level]);
+	if (debug & PRINT_RAW) {
+		printf("%02x: %08x %08x %08x\t", alu_off,
+				dwords[0], dwords[1], dwords[2]);
+	}
+
+	printf("   %sALU:\t", sync ? "(S)" : "   ");
+
+	printf("%s", vector_instructions[alu->vector_opc].name);
+
+	if (alu->pred_select & 0x2) {
+		/* seems to work similar to conditional execution in ARM instruction
+		 * set, so let's use a similar syntax for now:
+		 */
+		printf((alu->pred_select & 0x1) ? "EQ" : "NE");
+	}
+
+	printf("\t");
+
+	print_dstreg(alu->vector_dest, alu->vector_write_mask, alu->export_data);
+	printf(" = ");
+	if (vector_instructions[alu->vector_opc].num_srcs == 3) {
+		print_srcreg(alu->src3_reg, alu->src3_sel, alu->src3_swiz,
+				alu->src3_reg_negate, alu->src3_reg_abs);
+		printf(", ");
+	}
+	print_srcreg(alu->src1_reg, alu->src1_sel, alu->src1_swiz,
+			alu->src1_reg_negate, alu->src1_reg_abs);
+	if (vector_instructions[alu->vector_opc].num_srcs > 1) {
+		printf(", ");
+		print_srcreg(alu->src2_reg, alu->src2_sel, alu->src2_swiz,
+				alu->src2_reg_negate, alu->src2_reg_abs);
+	}
+
+	if (alu->vector_clamp)
+		printf(" CLAMP");
+
+	if (alu->export_data)
+		print_export_comment(alu->vector_dest, type);
+
+	printf("\n");
+
+	if (alu->scalar_write_mask || !alu->vector_write_mask) {
+		/* 2nd optional scalar op: */
+
+		printf("%s", levels[level]);
+		if (debug & PRINT_RAW)
+			printf("                          \t");
+
+		if (scalar_instructions[alu->scalar_opc].name) {
+			printf("\t    \t%s\t", scalar_instructions[alu->scalar_opc].name);
+		} else {
+			printf("\t    \tOP(%u)\t", alu->scalar_opc);
+		}
+
+		print_dstreg(alu->scalar_dest, alu->scalar_write_mask, alu->export_data);
+		printf(" = ");
+		print_srcreg(alu->src3_reg, alu->src3_sel, alu->src3_swiz,
+				alu->src3_reg_negate, alu->src3_reg_abs);
+		// TODO ADD/MUL must have another src?!?
+		if (alu->scalar_clamp)
+			printf(" CLAMP");
+		if (alu->export_data)
+			print_export_comment(alu->scalar_dest, type);
+		printf("\n");
+	}
+
+	return 0;
+}
+
+
+/*
+ * FETCH instructions:
+ */
+
+struct {
+	const char *name;
+} fetch_types[0xff] = {
+#define TYPE(id) [id] = { #id }
+		TYPE(FMT_1_REVERSE),
+		TYPE(FMT_32_FLOAT),
+		TYPE(FMT_32_32_FLOAT),
+		TYPE(FMT_32_32_32_FLOAT),
+		TYPE(FMT_32_32_32_32_FLOAT),
+		TYPE(FMT_16),
+		TYPE(FMT_16_16),
+		TYPE(FMT_16_16_16_16),
+		TYPE(FMT_8),
+		TYPE(FMT_8_8),
+		TYPE(FMT_8_8_8_8),
+		TYPE(FMT_32),
+		TYPE(FMT_32_32),
+		TYPE(FMT_32_32_32_32),
+#undef TYPE
+};
+
+static void print_fetch_dst(uint32_t dst_reg, uint32_t dst_swiz)
+{
+	int i;
+	printf("\tR%u.", dst_reg);
+	for (i = 0; i < 4; i++) {
+		printf("%c", chan_names[dst_swiz & 0x7]);
+		dst_swiz >>= 3;
+	}
+}
+
+static void print_fetch_vtx(instr_fetch_t *fetch)
+{
+	instr_fetch_vtx_t *vtx = &fetch->vtx;
+
+	if (vtx->pred_select) {
+		/* seems to work similar to conditional execution in ARM instruction
+		 * set, so let's use a similar syntax for now:
+		 */
+		printf(vtx->pred_condition ? "EQ" : "NE");
+	}
+
+	print_fetch_dst(vtx->dst_reg, vtx->dst_swiz);
+	printf(" = R%u.", vtx->src_reg);
+	printf("%c", chan_names[vtx->src_swiz & 0x3]);
+	if (fetch_types[vtx->format].name) {
+		printf(" %s", fetch_types[vtx->format].name);
+	} else  {
+		printf(" TYPE(0x%x)", vtx->format);
+	}
+	printf(" %s", vtx->format_comp_all ? "SIGNED" : "UNSIGNED");
+	if (!vtx->num_format_all)
+		printf(" NORMALIZED");
+	printf(" STRIDE(%u)", vtx->stride);
+	if (vtx->offset)
+		printf(" OFFSET(%u)", vtx->offset);
+	printf(" CONST(%u, %u)", vtx->const_index, vtx->const_index_sel);
+	if (0) {
+		// XXX
+		printf(" src_reg_am=%u", vtx->src_reg_am);
+		printf(" dst_reg_am=%u", vtx->dst_reg_am);
+		printf(" num_format_all=%u", vtx->num_format_all);
+		printf(" signed_rf_mode_all=%u", vtx->signed_rf_mode_all);
+		printf(" exp_adjust_all=%u", vtx->exp_adjust_all);
+	}
+}
+
+static void print_fetch_tex(instr_fetch_t *fetch)
+{
+	static const char *filter[] = {
+			[TEX_FILTER_POINT] = "POINT",
+			[TEX_FILTER_LINEAR] = "LINEAR",
+			[TEX_FILTER_BASEMAP] = "BASEMAP",
+	};
+	static const char *aniso_filter[] = {
+			[ANISO_FILTER_DISABLED] = "DISABLED",
+			[ANISO_FILTER_MAX_1_1] = "MAX_1_1",
+			[ANISO_FILTER_MAX_2_1] = "MAX_2_1",
+			[ANISO_FILTER_MAX_4_1] = "MAX_4_1",
+			[ANISO_FILTER_MAX_8_1] = "MAX_8_1",
+			[ANISO_FILTER_MAX_16_1] = "MAX_16_1",
+	};
+	static const char *arbitrary_filter[] = {
+			[ARBITRARY_FILTER_2X4_SYM] = "2x4_SYM",
+			[ARBITRARY_FILTER_2X4_ASYM] = "2x4_ASYM",
+			[ARBITRARY_FILTER_4X2_SYM] = "4x2_SYM",
+			[ARBITRARY_FILTER_4X2_ASYM] = "4x2_ASYM",
+			[ARBITRARY_FILTER_4X4_SYM] = "4x4_SYM",
+			[ARBITRARY_FILTER_4X4_ASYM] = "4x4_ASYM",
+	};
+	static const char *sample_loc[] = {
+			[SAMPLE_CENTROID] = "CENTROID",
+			[SAMPLE_CENTER] = "CENTER",
+	};
+	instr_fetch_tex_t *tex = &fetch->tex;
+	uint32_t src_swiz = tex->src_swiz;
+	int i;
+
+	if (tex->pred_select) {
+		/* seems to work similar to conditional execution in ARM instruction
+		 * set, so let's use a similar syntax for now:
+		 */
+		printf(tex->pred_condition ? "EQ" : "NE");
+	}
+
+	print_fetch_dst(tex->dst_reg, tex->dst_swiz);
+	printf(" = R%u.", tex->src_reg);
+	for (i = 0; i < 3; i++) {
+		printf("%c", chan_names[src_swiz & 0x3]);
+		src_swiz >>= 2;
+	}
+	printf(" CONST(%u)", tex->const_idx);
+	if (tex->fetch_valid_only)
+		printf(" VALID_ONLY");
+	if (tex->tx_coord_denorm)
+		printf(" DENORM");
+	if (tex->mag_filter != TEX_FILTER_USE_FETCH_CONST)
+		printf(" MAG(%s)", filter[tex->mag_filter]);
+	if (tex->min_filter != TEX_FILTER_USE_FETCH_CONST)
+		printf(" MIN(%s)", filter[tex->min_filter]);
+	if (tex->mip_filter != TEX_FILTER_USE_FETCH_CONST)
+		printf(" MIP(%s)", filter[tex->mip_filter]);
+	if (tex->aniso_filter != ANISO_FILTER_USE_FETCH_CONST)
+		printf(" ANISO(%s)", aniso_filter[tex->aniso_filter]);
+	if (tex->arbitrary_filter != ARBITRARY_FILTER_USE_FETCH_CONST)
+		printf(" ARBITRARY(%s)", arbitrary_filter[tex->arbitrary_filter]);
+	if (tex->vol_mag_filter != TEX_FILTER_USE_FETCH_CONST)
+		printf(" VOL_MAG(%s)", filter[tex->vol_mag_filter]);
+	if (tex->vol_min_filter != TEX_FILTER_USE_FETCH_CONST)
+		printf(" VOL_MIN(%s)", filter[tex->vol_min_filter]);
+	if (!tex->use_comp_lod) {
+		printf(" LOD(%u)", tex->use_comp_lod);
+		printf(" LOD_BIAS(%u)", tex->lod_bias);
+	}
+	if (tex->use_reg_gradients)
+		printf(" USE_REG_GRADIENTS");
+	printf(" LOCATION(%s)", sample_loc[tex->sample_location]);
+	if (tex->offset_x || tex->offset_y || tex->offset_z)
+		printf(" OFFSET(%u,%u,%u)", tex->offset_x, tex->offset_y, tex->offset_z);
+}
+
+struct {
+	const char *name;
+	void (*fxn)(instr_fetch_t *cf);
+} fetch_instructions[] = {
+#define INSTR(opc, name, fxn) [opc] = { name, fxn }
+		INSTR(VTX_FETCH, "VERTEX", print_fetch_vtx),
+		INSTR(TEX_FETCH, "SAMPLE", print_fetch_tex),
+		INSTR(TEX_GET_BORDER_COLOR_FRAC, "?", print_fetch_tex),
+		INSTR(TEX_GET_COMP_TEX_LOD, "?", print_fetch_tex),
+		INSTR(TEX_GET_GRADIENTS, "?", print_fetch_tex),
+		INSTR(TEX_GET_WEIGHTS, "?", print_fetch_tex),
+		INSTR(TEX_SET_TEX_LOD, "SET_TEX_LOD", print_fetch_tex),
+		INSTR(TEX_SET_GRADIENTS_H, "?", print_fetch_tex),
+		INSTR(TEX_SET_GRADIENTS_V, "?", print_fetch_tex),
+		INSTR(TEX_RESERVED_4, "?", print_fetch_tex),
+#undef INSTR
+};
+
+static int disasm_fetch(uint32_t *dwords, uint32_t alu_off, int level, int sync)
+{
+	instr_fetch_t *fetch = (instr_fetch_t *)dwords;
+
+	printf("%s", levels[level]);
+	if (debug & PRINT_RAW) {
+		printf("%02x: %08x %08x %08x\t", alu_off,
+				dwords[0], dwords[1], dwords[2]);
+	}
+
+	printf("   %sFETCH:\t", sync ? "(S)" : "   ");
+	printf("%s", fetch_instructions[fetch->opc].name);
+	fetch_instructions[fetch->opc].fxn(fetch);
+	printf("\n");
+
+	return 0;
+}
+
+/*
+ * CF instructions:
+ */
+
+static int cf_exec(instr_cf_t *cf)
+{
+	return (cf->opc == EXEC) ||
+			(cf->opc == EXEC_END) ||
+			(cf->opc == COND_EXEC) ||
+			(cf->opc == COND_EXEC_END) ||
+			(cf->opc == COND_PRED_EXEC) ||
+			(cf->opc == COND_PRED_EXEC_END) ||
+			(cf->opc == COND_EXEC_PRED_CLEAN) ||
+			(cf->opc == COND_EXEC_PRED_CLEAN_END);
+}
+
+static int cf_cond_exec(instr_cf_t *cf)
+{
+	return (cf->opc == COND_EXEC) ||
+			(cf->opc == COND_EXEC_END) ||
+			(cf->opc == COND_PRED_EXEC) ||
+			(cf->opc == COND_PRED_EXEC_END) ||
+			(cf->opc == COND_EXEC_PRED_CLEAN) ||
+			(cf->opc == COND_EXEC_PRED_CLEAN_END);
+}
+
+static void print_cf_nop(instr_cf_t *cf)
+{
+}
+
+static void print_cf_exec(instr_cf_t *cf)
+{
+	printf(" ADDR(0x%x) CNT(0x%x)", cf->exec.address, cf->exec.count);
+	if (cf->exec.yeild)
+		printf(" YIELD");
+	if (cf->exec.vc)
+		printf(" VC(0x%x)", cf->exec.vc);
+	if (cf->exec.bool_addr)
+		printf(" BOOL_ADDR(0x%x)", cf->exec.bool_addr);
+	if (cf->exec.address_mode == ABSOLUTE_ADDR)
+		printf(" ABSOLUTE_ADDR");
+	if (cf_cond_exec(cf))
+		printf(" COND(%d)", cf->exec.condition);
+}
+
+static void print_cf_loop(instr_cf_t *cf)
+{
+	printf(" ADDR(0x%x) LOOP_ID(%d)", cf->loop.address, cf->loop.loop_id);
+	if (cf->loop.address_mode == ABSOLUTE_ADDR)
+		printf(" ABSOLUTE_ADDR");
+}
+
+static void print_cf_jmp_call(instr_cf_t *cf)
+{
+	printf(" ADDR(0x%x) DIR(%d)", cf->jmp_call.address, cf->jmp_call.direction);
+	if (cf->jmp_call.force_call)
+		printf(" FORCE_CALL");
+	if (cf->jmp_call.predicated_jmp)
+		printf(" COND(%d)", cf->jmp_call.condition);
+	if (cf->jmp_call.bool_addr)
+		printf(" BOOL_ADDR(0x%x)", cf->jmp_call.bool_addr);
+	if (cf->jmp_call.address_mode == ABSOLUTE_ADDR)
+		printf(" ABSOLUTE_ADDR");
+}
+
+static void print_cf_alloc(instr_cf_t *cf)
+{
+	static const char *bufname[] = {
+			[SQ_NO_ALLOC] = "NO ALLOC",
+			[SQ_POSITION] = "POSITION",
+			[SQ_PARAMETER_PIXEL] = "PARAM/PIXEL",
+			[SQ_MEMORY] = "MEMORY",
+	};
+	printf(" %s SIZE(0x%x)", bufname[cf->alloc.buffer_select], cf->alloc.size);
+	if (cf->alloc.no_serial)
+		printf(" NO_SERIAL");
+	if (cf->alloc.alloc_mode) // ???
+		printf(" ALLOC_MODE");
+}
+
+struct {
+	const char *name;
+	void (*fxn)(instr_cf_t *cf);
+} cf_instructions[] = {
+#define INSTR(opc, fxn) [opc] = { #opc, fxn }
+		INSTR(NOP, print_cf_nop),
+		INSTR(EXEC, print_cf_exec),
+		INSTR(EXEC_END, print_cf_exec),
+		INSTR(COND_EXEC, print_cf_exec),
+		INSTR(COND_EXEC_END, print_cf_exec),
+		INSTR(COND_PRED_EXEC, print_cf_exec),
+		INSTR(COND_PRED_EXEC_END, print_cf_exec),
+		INSTR(LOOP_START, print_cf_loop),
+		INSTR(LOOP_END, print_cf_loop),
+		INSTR(COND_CALL, print_cf_jmp_call),
+		INSTR(RETURN, print_cf_jmp_call),
+		INSTR(COND_JMP, print_cf_jmp_call),
+		INSTR(ALLOC, print_cf_alloc),
+		INSTR(COND_EXEC_PRED_CLEAN, print_cf_exec),
+		INSTR(COND_EXEC_PRED_CLEAN_END, print_cf_exec),
+		INSTR(MARK_VS_FETCH_DONE, print_cf_nop),  // ??
+#undef INSTR
+};
+
+static void print_cf(instr_cf_t *cf, int level)
+{
+	printf("%s", levels[level]);
+	if (debug & PRINT_RAW) {
+		uint16_t *words = (uint16_t *)cf;
+		printf("    %04x %04x %04x            \t",
+				words[0], words[1], words[2]);
+	}
+	printf("%s", cf_instructions[cf->opc].name);
+	cf_instructions[cf->opc].fxn(cf);
+	printf("\n");
+}
+
+/*
+ * The adreno shader microcode consists of two parts:
+ *   1) A CF (control-flow) program, at the header of the compiled shader,
+ *      which refers to ALU/FETCH instructions that follow it by address.
+ *   2) ALU and FETCH instructions
+ */
+
+int disasm(uint32_t *dwords, int sizedwords, int level, enum shader_t type)
+{
+	instr_cf_t *cfs = (instr_cf_t *)dwords;
+	int idx, max_idx;
+
+	for (idx = 0; ; idx++) {
+		instr_cf_t *cf = &cfs[idx];
+		if (cf_exec(cf)) {
+			max_idx = 2 * cf->exec.address;
+			break;
+		}
+	}
+
+	for (idx = 0; idx < max_idx; idx++) {
+		instr_cf_t *cf = &cfs[idx];
+
+		print_cf(cf, level);
+
+		if (cf_exec(cf)) {
+			uint32_t sequence = cf->exec.serialize;
+			uint32_t i;
+			for (i = 0; i < cf->exec.count; i++) {
+				uint32_t alu_off = (cf->exec.address + i);
+				if (sequence & 0x1) {
+					disasm_fetch(dwords + alu_off * 3, alu_off, level, sequence & 0x2);
+				} else {
+					disasm_alu(dwords + alu_off * 3, alu_off, level, sequence & 0x2, type);
+				}
+				sequence >>= 2;
+			}
+		}
+	}
+
+	return 0;
+}
+
+void disasm_set_debug(enum debug_t d)
+{
+	debug= d;
+}
diff --git a/src/gallium/drivers/freedreno/disasm.h b/src/gallium/drivers/freedreno/disasm.h
new file mode 100644
index 0000000..92efd5a
--- /dev/null
+++ b/src/gallium/drivers/freedreno/disasm.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright © 2012 Rob Clark <robclark at freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef DISASM_H_
+#define DISASM_H_
+
+enum shader_t {
+	SHADER_VERTEX,
+	SHADER_FRAGMENT,
+};
+
+/* bitmask of debug flags */
+enum debug_t {
+	PRINT_RAW      = 0x1,    /* dump raw hexdump */
+};
+
+int disasm(uint32_t *dwords, int sizedwords, int level, enum shader_t type);
+void disasm_set_debug(enum debug_t debug);
+
+#endif /* DISASM_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_a2xx_reg.h b/src/gallium/drivers/freedreno/freedreno_a2xx_reg.h
new file mode 100644
index 0000000..7c5982c
--- /dev/null
+++ b/src/gallium/drivers/freedreno/freedreno_a2xx_reg.h
@@ -0,0 +1,1172 @@
+/*
+ * Copyright (c) 2012 Rob Clark <robdclark at gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef FREEDRENO_A2XX_REG_H_
+#define FREEDRENO_A2XX_REG_H_
+
+#include <GLES2/gl2.h>
+
+/* convert float to dword */
+static inline uint32_t f2d(float f)
+{
+	union {
+		float f;
+		uint32_t d;
+	} u = {
+		.f = f,
+	};
+	return u.d;
+}
+
+/* convert float to 12.4 fixed point */
+static inline uint32_t f2d12_4(float f)
+{
+	return (uint32_t)(f * 8.0);
+}
+
+/* convert x,y to dword */
+static inline uint32_t xy2d(uint16_t x, uint16_t y)
+{
+	return ((y & 0x3fff) << 16) | (x & 0x3fff);
+}
+
+/*
+ * Values for CP_EVENT_WRITE:
+ */
+
+enum VGT_EVENT_TYPE {
+	VS_DEALLOC = 0,
+	PS_DEALLOC = 1,
+	VS_DONE_TS = 2,
+	PS_DONE_TS = 3,
+	CACHE_FLUSH_TS = 4,
+	CONTEXT_DONE = 5,
+	CACHE_FLUSH = 6,
+	VIZQUERY_START = 7,
+	VIZQUERY_END = 8,
+	SC_WAIT_WC = 9,
+	RST_PIX_CNT = 13,
+	RST_VTX_CNT = 14,
+	TILE_FLUSH = 15,
+	CACHE_FLUSH_AND_INV_TS_EVENT = 20,
+	ZPASS_DONE = 21,
+	CACHE_FLUSH_AND_INV_EVENT = 22,
+	PERFCOUNTER_START = 23,
+	PERFCOUNTER_STOP = 24,
+	VS_FETCH_DONE = 27,
+	FACENESS_FLUSH = 28,
+};
+
+/*
+ * Color/surface formats:
+ */
+
+enum rb_colorformatx {
+	COLORX_4_4_4_4 = 0,
+	COLORX_1_5_5_5 = 1,
+	COLORX_5_6_5 = 2,
+	COLORX_8 = 3,
+	COLORX_8_8 = 4,
+	COLORX_8_8_8_8 = 5,
+	COLORX_S8_8_8_8 = 6,
+	COLORX_16_FLOAT = 7,
+	COLORX_16_16_FLOAT = 8,
+	COLORX_16_16_16_16_FLOAT = 9,
+	COLORX_32_FLOAT = 10,
+	COLORX_32_32_FLOAT = 11,
+	COLORX_32_32_32_32_FLOAT = 12,
+	COLORX_2_3_3 = 13,
+	COLORX_8_8_8 = 14,
+	COLORX_INVALID,
+};
+
+enum sq_surfaceformat {
+	FMT_1_REVERSE                  = 0,
+	FMT_1                          = 1,
+	FMT_8                          = 2,
+	FMT_1_5_5_5                    = 3,
+	FMT_5_6_5                      = 4,
+	FMT_6_5_5                      = 5,
+	FMT_8_8_8_8                    = 6,
+	FMT_2_10_10_10                 = 7,
+	FMT_8_A                        = 8,
+	FMT_8_B                        = 9,
+	FMT_8_8                        = 10,
+	FMT_Cr_Y1_Cb_Y0                = 11,
+	FMT_Y1_Cr_Y0_Cb                = 12,
+	FMT_5_5_5_1                    = 13,
+	FMT_8_8_8_8_A                  = 14,
+	FMT_4_4_4_4                    = 15,
+	FMT_10_11_11                   = 16,
+	FMT_11_11_10                   = 17,
+	FMT_DXT1                       = 18,
+	FMT_DXT2_3                     = 19,
+	FMT_DXT4_5                     = 20,
+	FMT_24_8                       = 22,
+	FMT_24_8_FLOAT                 = 23,
+	FMT_16                         = 24,
+	FMT_16_16                      = 25,
+	FMT_16_16_16_16                = 26,
+	FMT_16_EXPAND                  = 27,
+	FMT_16_16_EXPAND               = 28,
+	FMT_16_16_16_16_EXPAND         = 29,
+	FMT_16_FLOAT                   = 30,
+	FMT_16_16_FLOAT                = 31,
+	FMT_16_16_16_16_FLOAT          = 32,
+	FMT_32                         = 33,
+	FMT_32_32                      = 34,
+	FMT_32_32_32_32                = 35,
+	FMT_32_FLOAT                   = 36,
+	FMT_32_32_FLOAT                = 37,
+	FMT_32_32_32_32_FLOAT          = 38,
+	FMT_32_AS_8                    = 39,
+	FMT_32_AS_8_8                  = 40,
+	FMT_16_MPEG                    = 41,
+	FMT_16_16_MPEG                 = 42,
+	FMT_8_INTERLACED               = 43,
+	FMT_32_AS_8_INTERLACED         = 44,
+	FMT_32_AS_8_8_INTERLACED       = 45,
+	FMT_16_INTERLACED              = 46,
+	FMT_16_MPEG_INTERLACED         = 47,
+	FMT_16_16_MPEG_INTERLACED      = 48,
+	FMT_DXN                        = 49,
+	FMT_8_8_8_8_AS_16_16_16_16     = 50,
+	FMT_DXT1_AS_16_16_16_16        = 51,
+	FMT_DXT2_3_AS_16_16_16_16      = 52,
+	FMT_DXT4_5_AS_16_16_16_16      = 53,
+	FMT_2_10_10_10_AS_16_16_16_16  = 54,
+	FMT_10_11_11_AS_16_16_16_16    = 55,
+	FMT_11_11_10_AS_16_16_16_16    = 56,
+	FMT_32_32_32_FLOAT             = 57,
+	FMT_DXT3A                      = 58,
+	FMT_DXT5A                      = 59,
+	FMT_CTX1                       = 60,
+	FMT_DXT3A_AS_1_1_1_1           = 61,
+	FMT_INVALID
+};
+
+/*
+ * Register addresses:
+ */
+
+#define REG_COHER_BASE_PM4                  0xa2a
+#define REG_COHER_DEST_BASE_0               0x2006
+#define REG_COHER_SIZE_PM4                  0xa29
+#define REG_COHER_STATUS_PM4                0xa2b
+#define REG_CP_CSQ_IB1_STAT                 0x01fe
+#define REG_CP_CSQ_IB2_STAT                 0x01ff
+#define REG_CP_CSQ_RB_STAT                  0x01fd
+#define REG_CP_DEBUG                        0x01fc
+#define REG_CP_IB1_BASE                     0x0458
+#define REG_CP_IB1_BUFSZ                    0x0459
+#define REG_CP_IB2_BASE                     0x045a
+#define REG_CP_IB2_BUFSZ                    0x045b
+#define REG_CP_INT_ACK                      0x01f4
+#define REG_CP_INT_CNTL                     0x01f2
+#define REG_CP_INT_STATUS                   0x01f3
+#define REG_CP_ME_CNTL                      0x01f6
+#define REG_CP_ME_RAM_DATA                  0x01fa
+#define REG_CP_ME_RAM_RADDR                 0x01f9
+#define REG_CP_ME_RAM_WADDR                 0x01f8
+#define REG_CP_ME_STATUS                    0x01f7
+#define REG_CP_PERFCOUNTER_HI               0x0447
+#define REG_CP_PERFCOUNTER_LO               0x0446
+#define REG_CP_PERFCOUNTER_SELECT           0x0445
+#define REG_CP_PERFMON_CNTL                 0x0444
+#define REG_CP_PFP_UCODE_ADDR               0x00c0
+#define REG_CP_PFP_UCODE_DATA               0x00c1
+#define REG_CP_QUEUE_THRESHOLDS             0x01d5
+#define REG_CP_RB_BASE                      0x01c0
+#define REG_CP_RB_CNTL                      0x01c1
+#define REG_CP_RB_RPTR                      0x01c4
+#define REG_CP_RB_RPTR_ADDR                 0x01c3
+#define REG_CP_RB_RPTR_WR                   0x01c7
+#define REG_CP_RB_WPTR                      0x01c5
+#define REG_CP_RB_WPTR_BASE                 0x01c8
+#define REG_CP_RB_WPTR_DELAY                0x01c6
+#define REG_CP_STAT                         0x047f
+#define REG_CP_STATE_DEBUG_DATA             0x01ed
+#define REG_CP_STATE_DEBUG_INDEX            0x01ec
+#define REG_CP_ST_BASE                      0x044d
+#define REG_CP_ST_BUFSZ                     0x044e
+#define REG_GRAS_DEBUG_CNTL                 0x0c80
+#define REG_GRAS_DEBUG_DATA                 0x0c81
+#define REG_MASTER_INT_SIGNAL               0x03b7
+#define REG_PA_CL_CLIP_CNTL                 0x2204
+#define REG_PA_CL_GB_HORZ_CLIP_ADJ          0x2305
+#define REG_PA_CL_GB_HORZ_DISC_ADJ          0x2306
+#define REG_PA_CL_GB_VERT_CLIP_ADJ          0x2303
+#define REG_PA_CL_GB_VERT_DISC_ADJ          0x2304
+#define REG_PA_CL_VPORT_XOFFSET             0x2110
+#define REG_PA_CL_VPORT_XSCALE              0x210f
+#define REG_PA_CL_VPORT_YOFFSET             0x2112
+#define REG_PA_CL_VPORT_YSCALE              0x2111
+#define REG_PA_CL_VPORT_ZOFFSET             0x2114
+#define REG_PA_CL_VPORT_ZSCALE              0x2113
+#define REG_PA_CL_VTE_CNTL                  0x2206
+#define REG_PA_SC_AA_CONFIG                 0x2301
+#define REG_PA_SC_AA_MASK                   0x2312
+#define REG_PA_SC_LINE_CNTL                 0x2300
+#define REG_PA_SC_LINE_STIPPLE              0x2283
+#define REG_PA_SC_SCREEN_SCISSOR_BR         0x200f
+#define REG_PA_SC_SCREEN_SCISSOR_TL         0x200e
+#define REG_PA_SC_VIZ_QUERY                 0x2293
+#define REG_PA_SC_VIZ_QUERY_STATUS          0x0c44
+#define REG_PA_SC_WINDOW_OFFSET             0x2080
+#define REG_PA_SC_WINDOW_SCISSOR_BR         0x2082
+#define REG_PA_SC_WINDOW_SCISSOR_TL         0x2081
+#define REG_PA_SU_DEBUG_CNTL                0x0c80
+#define REG_PA_SU_DEBUG_DATA                0x0c81
+#define REG_PA_SU_FACE_DATA                 0x0c86
+#define REG_PA_SU_LINE_CNTL                 0x2282
+#define REG_PA_SU_POINT_MINMAX              0x2281
+#define REG_PA_SU_POINT_SIZE                0x2280
+#define REG_PA_SU_POLY_OFFSET_BACK_OFFSET   0x2383
+#define REG_PA_SU_POLY_OFFSET_FRONT_SCALE   0x2380
+#define REG_PA_SU_SC_MODE_CNTL              0x2205
+#define REG_PA_SU_VTX_CNTL                  0x2302
+#define REG_PC_DEBUG_CNTL                   0x0c38
+#define REG_PC_DEBUG_DATA                   0x0c39
+#define REG_RB_ALPHA_REF                    0x210e
+#define REG_RB_BC_CONTROL                   0x0f01
+#define REG_RB_BLEND_ALPHA                  0x2108
+#define REG_RB_BLEND_BLUE                   0x2107
+#define REG_RB_BLEND_CONTROL                0x2201
+#define REG_RB_BLEND_GREEN                  0x2106
+#define REG_RB_BLEND_RED                    0x2105
+#define REG_RBBM_CNTL                       0x003b
+#define REG_RBBM_DEBUG                      0x039b
+#define REG_RBBM_DEBUG_CNTL                 0x03a1
+#define REG_RBBM_DEBUG_OUT                  0x03a0
+#define REG_RBBM_INT_ACK                    0x03b6
+#define REG_RBBM_INT_CNTL                   0x03b4
+#define REG_RBBM_INT_STATUS                 0x03b5
+#define REG_RBBM_PATCH_RELEASE              0x0001
+#define REG_RBBM_PERFCOUNTER1_HI            0x0398
+#define REG_RBBM_PERFCOUNTER1_LO            0x0397
+#define REG_RBBM_PERFCOUNTER1_SELECT        0x0395
+#define REG_RBBM_PERIPHID1                  0x03f9
+#define REG_RBBM_PERIPHID2                  0x03fa
+#define REG_RBBM_PM_OVERRIDE1               0x039c
+#define REG_RBBM_PM_OVERRIDE2               0x039d
+#define REG_RBBM_READ_ERROR                 0x03b3
+#define REG_RBBM_SOFT_RESET                 0x003c
+#define REG_RBBM_STATUS                     0x05d0
+#define REG_RB_COLORCONTROL                 0x2202
+#define REG_RB_COLOR_DEST_MASK              0x2326
+#define REG_RB_COLOR_INFO                   0x2001
+#define REG_RB_COLOR_MASK                   0x2104
+#define REG_RB_COPY_CONTROL                 0x2318
+#define REG_RB_COPY_DEST_BASE               0x2319
+#define REG_RB_COPY_DEST_INFO               0x231b
+#define REG_RB_COPY_DEST_OFFSET             0x231c
+#define REG_RB_COPY_DEST_PITCH              0x231a
+#define REG_RB_DEBUG_CNTL                   0x0f26
+#define REG_RB_DEBUG_DATA                   0x0f27
+#define REG_RB_DEPTH_CLEAR                  0x231d
+#define REG_RB_DEPTHCONTROL                 0x2200
+#define REG_RB_DEPTH_INFO                   0x2002
+#define REG_RB_EDRAM_INFO                   0x0f02
+#define REG_RB_FOG_COLOR                    0x2109
+#define REG_RB_MODECONTROL                  0x2208
+#define REG_RB_SAMPLE_COUNT_CTL             0x2324
+#define REG_RB_SAMPLE_POS                   0x220a
+#define REG_RB_STENCILREFMASK               0x210d
+#define REG_RB_STENCILREFMASK_BF            0x210c
+#define REG_RB_SURFACE_INFO                 0x2000
+#define REG_SCRATCH_ADDR                    0x01dd
+#define REG_SCRATCH_REG0                    0x0578
+#define REG_SCRATCH_REG2                    0x057a
+#define REG_SCRATCH_UMSK                    0x01dc
+#define REG_SQ_CF_BOOLEANS                  0x4900
+#define REG_SQ_CF_LOOP                      0x4908
+#define REG_SQ_CONSTANT_0                   0x4000
+#define REG_SQ_CONTEXT_MISC                 0x2181
+#define REG_SQ_DEBUG_CONST_MGR_FSM          0x0daf
+#define REG_SQ_DEBUG_EXP_ALLOC              0x0db3
+#define REG_SQ_DEBUG_FSM_ALU_0              0x0db1
+#define REG_SQ_DEBUG_FSM_ALU_1              0x0db2
+#define REG_SQ_DEBUG_GPR_PIX                0x0db6
+#define REG_SQ_DEBUG_GPR_VTX                0x0db5
+#define REG_SQ_DEBUG_INPUT_FSM              0x0dae
+#define REG_SQ_DEBUG_MISC_0                 0x2309
+#define REG_SQ_DEBUG_MISC                   0x0d05
+#define REG_SQ_DEBUG_MISC_1                 0x230a
+#define REG_SQ_DEBUG_PIX_TB_0               0x0dbc
+#define REG_SQ_DEBUG_PIX_TB_STATE_MEM       0x0dc1
+#define REG_SQ_DEBUG_PIX_TB_STATUS_REG_0    0x0dbd
+#define REG_SQ_DEBUG_PIX_TB_STATUS_REG_1    0x0dbe
+#define REG_SQ_DEBUG_PIX_TB_STATUS_REG_2    0x0dbf
+#define REG_SQ_DEBUG_PIX_TB_STATUS_REG_3    0x0dc0
+#define REG_SQ_DEBUG_PTR_BUFF               0x0db4
+#define REG_SQ_DEBUG_TB_STATUS_SEL          0x0db7
+#define REG_SQ_DEBUG_TP_FSM                 0x0db0
+#define REG_SQ_DEBUG_VTX_TB_0               0x0db8
+#define REG_SQ_DEBUG_VTX_TB_1               0x0db9
+#define REG_SQ_DEBUG_VTX_TB_STATE_MEM       0x0dbb
+#define REG_SQ_DEBUG_VTX_TB_STATUS_REG      0x0dba
+#define REG_SQ_FETCH_0                      0x4800
+#define REG_SQ_FLOW_CONTROL                 0x0d01
+#define REG_SQ_GPR_MANAGEMENT               0x0d00
+#define REG_SQ_INST_STORE_MANAGMENT         0x0d02
+#define REG_SQ_INT_ACK                      0x0d36
+#define REG_SQ_INT_CNTL                     0x0d34
+#define REG_SQ_INTERPOLATOR_CNTL            0x2182
+#define REG_SQ_INT_STATUS                   0x0d35
+#define REG_SQ_PROGRAM_CNTL                 0x2180
+#define REG_SQ_PS_CONST                     0x2308
+#define REG_SQ_PS_PROGRAM                   0x21f6
+#define REG_SQ_VS_CONST                     0x2307
+#define REG_SQ_VS_PROGRAM                   0x21f7
+#define REG_SQ_WRAPPING_0                   0x2183
+#define REG_SQ_WRAPPING_1                   0x2184
+#define REG_TC_CNTL_STATUS                  0x0e00
+#define REG_TP0_CHICKEN                     0x0e1e
+#define REG_VGT_CURRENT_BIN_ID_MAX          0x2203
+#define REG_VGT_CURRENT_BIN_ID_MIN          0x2207
+#define REG_VGT_ENHANCE                     0x2294
+#define REG_VGT_INDX_OFFSET                 0x2102
+#define REG_VGT_MAX_VTX_INDX                0x2100
+#define REG_VGT_MIN_VTX_INDX                0x2101
+#define REG_VGT_OUT_DEALLOC_CNTL            0x2317
+#define REG_VGT_VERTEX_REUSE_BLOCK_CNTL     0x2316
+
+/* Added in a220: */
+#define REG_A220_RB_LRZ_VSC_CONTROL         0x2209
+#define REG_A220_GRAS_CONTROL               0x2210
+#define REG_A220_VSC_BIN_SIZE               0x0c01
+#define REG_A220_VSC_PIPE_DATA_LENGTH_7     0x0c1d
+#define REG_VSC_PIPE_CONFIG_0               0x0c06
+#define REG_VSC_PIPE_DATA_ADDRESS_0         0x0c07
+#define REG_VSC_PIPE_DATA_LENGTH_0          0x0c08
+#define REG_VSC_PIPE_CONFIG_1               0x0c09
+#define REG_VSC_PIPE_DATA_ADDRESS_1         0x0c0a
+#define REG_VSC_PIPE_DATA_LENGTH_1          0x0c0b
+#define REG_VSC_PIPE_CONFIG_2               0x0c0c
+#define REG_VSC_PIPE_DATA_ADDRESS_2         0x0c0d
+#define REG_VSC_PIPE_DATA_LENGTH_2          0x0c0e
+#define REG_VSC_PIPE_CONFIG_3               0x0c0f
+#define REG_VSC_PIPE_DATA_ADDRESS_3         0x0c10
+#define REG_VSC_PIPE_DATA_LENGTH_3          0x0c11
+#define REG_VSC_PIPE_CONFIG_4               0x0c12
+#define REG_VSC_PIPE_DATA_ADDRESS_4         0x0c13
+#define REG_VSC_PIPE_DATA_LENGTH_4          0x0c14
+#define REG_VSC_PIPE_CONFIG_5               0x0c15
+#define REG_VSC_PIPE_DATA_ADDRESS_5         0x0c16
+#define REG_VSC_PIPE_DATA_LENGTH_5          0x0c17
+#define REG_VSC_PIPE_CONFIG_6               0x0c18
+#define REG_VSC_PIPE_DATA_ADDRESS_6         0x0c19
+#define REG_VSC_PIPE_DATA_LENGTH_6          0x0c1a
+#define REG_VSC_PIPE_CONFIG_7               0x0c1b
+#define REG_VSC_PIPE_DATA_ADDRESS_7         0x0c1c
+#define REG_VSC_PIPE_DATA_LENGTH_7          0x0c1d
+
+/* Added in a225: */
+#define REG_A225_RB_COLOR_INFO3             0x2005
+#define REG_A225_PC_MULTI_PRIM_IB_RESET_INDX 0x2103
+#define REG_A225_GRAS_UCP0X                 0x2340
+#define REG_A225_GRAS_UCP5W                 0x2357
+#define REG_A225_GRAS_UCP_ENABLED           0x2360
+
+/* not sure, maybe RB_CLEAR_COLOR? */
+#define REG_CLEAR_COLOR                     0x220b
+
+/* unnamed registers: */
+#define REG_0c02                            0x0c02
+#define REG_0c04                            0x0c04
+#define REG_0c06                            0x0c06
+#define REG_2010                            0x2010
+
+
+/*
+ * Format for 2nd dword in CP_DRAW_INDX and friends:
+ */
+
+/* see VGT_PRIMITIVE_TYPE.PRIM_TYPE? */
+enum pc_di_primtype {
+	DI_PT_NONE = 0,
+	DI_PT_POINTLIST = 1,
+	DI_PT_LINELIST = 2,
+	DI_PT_LINESTRIP = 3,
+	DI_PT_TRILIST = 4,
+	DI_PT_TRIFAN = 5,
+	DI_PT_TRISTRIP = 6,
+	DI_PT_RECTLIST = 8,
+	DI_PT_QUADLIST = 13,
+	DI_PT_QUADSTRIP = 14,
+	DI_PT_POLYGON = 15,
+	DI_PT_2D_COPY_RECT_LIST_V0 = 16,
+	DI_PT_2D_COPY_RECT_LIST_V1 = 17,
+	DI_PT_2D_COPY_RECT_LIST_V2 = 18,
+	DI_PT_2D_COPY_RECT_LIST_V3 = 19,
+	DI_PT_2D_FILL_RECT_LIST = 20,
+	DI_PT_2D_LINE_STRIP = 21,
+	DI_PT_2D_TRI_STRIP = 22,
+};
+
+/* see VGT:VGT_DRAW_INITIATOR.SOURCE_SELECT? */
+enum pc_di_src_sel {
+	DI_SRC_SEL_DMA = 0,
+	DI_SRC_SEL_IMMEDIATE = 1,
+	DI_SRC_SEL_AUTO_INDEX = 2,
+	DI_SRC_SEL_RESERVED = 3,
+};
+
+/* see VGT_DMA_INDEX_TYPE.INDEX_TYPE? */
+enum pc_di_index_size {
+	INDEX_SIZE_IGN    = 0,
+	INDEX_SIZE_16_BIT = 0,
+	INDEX_SIZE_32_BIT = 1,
+	INDEX_SIZE_8_BIT  = 2,
+	INDEX_SIZE_INVALID
+};
+
+enum pc_di_vis_cull_mode {
+	IGNORE_VISIBILITY = 0,
+};
+
+static inline uint32_t DRAW(enum pc_di_primtype prim_type,
+		enum pc_di_src_sel source_select, enum pc_di_index_size index_size,
+		enum pc_di_vis_cull_mode vis_cull_mode)
+{
+	return (prim_type         << 0) |
+			(source_select     << 6) |
+			((index_size & 1)  << 11) |
+			((index_size >> 1) << 13) |
+			(vis_cull_mode     << 9) |
+			(1                 << 14);
+}
+
+
+/*
+ * Bits for VGT_CURRENT_BIN_ID_MIN/MAX:
+ */
+
+#define VGT_CURRENT_BIN_ID_MIN_COLUMN(val)       (((val) & 0x7) << 0)
+#define VGT_CURRENT_BIN_ID_MIN_ROW(val)          (((val) & 0x7) << 3)
+#define VGT_CURRENT_BIN_ID_MIN_GUARD_BAND(val)   (((val) & 0x7) << 6)
+
+
+/*
+ * Bits for PA_CL_VTE_CNTL:
+ */
+
+#define PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA         0x00000001
+#define PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA        0x00000002
+#define PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA         0x00000004
+#define PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA        0x00000008
+#define PA_CL_VTE_CNTL_VPORT_Z_SCALE_ENA         0x00000010
+#define PA_CL_VTE_CNTL_VPORT_Z_OFFSET_ENA        0x00000020
+#define PA_CL_VTE_CNTL_VTX_XY_FMT                0x00000100
+#define PA_CL_VTE_CNTL_VTX_Z_FMT                 0x00000200
+#define PA_CL_VTE_CNTL_VTX_W0_FMT                0x00000400
+#define PA_CL_VTE_CNTL_PERFCOUNTER_REF           0x00000800
+
+
+/*
+ * Bits for PA_CL_CLIP_CNTL:
+ */
+
+#define PA_CL_CLIP_CNTL_CLIP_DISABLE             0x00010000
+#define PA_CL_CLIP_CNTL_BOUNDARY_EDGE_FLAG_ENA   0x00040000
+enum dx_clip_space {
+	DXCLIP_OPENGL = 0,
+	DXCLIP_DIRECTX = 1,
+};
+static inline uint32_t PA_CL_CLIP_CNTL_DX_CLIP_SPACE_DEF(enum dx_clip_space val)
+{
+	return val << 19;
+}
+#define PA_CL_CLIP_CNTL_DIS_CLIP_ERR_DETECT      0x00100000
+#define PA_CL_CLIP_CNTL_VTX_KILL_OR              0x00200000
+#define PA_CL_CLIP_CNTL_XY_NAN_RETAIN            0x00400000
+#define PA_CL_CLIP_CNTL_Z_NAN_RETAIN             0x00800000
+#define PA_CL_CLIP_CNTL_W_NAN_RETAIN             0x01000000
+
+
+/*
+ * Bits for PA_SU_SC_MODE_CNTL:
+ */
+
+#define PA_SU_SC_MODE_CNTL_CULL_FRONT            0x00000001
+#define PA_SU_SC_MODE_CNTL_CULL_BACK             0x00000002
+#define PA_SU_SC_MODE_CNTL_FACE                  0x00000004
+enum pa_su_sc_polymode {
+	POLY_DISABLED     = 0,
+	POLY_DUALMODE     = 1,
+};
+static inline uint32_t PA_SU_SC_MODE_CNTL_POLYMODE(enum pa_su_sc_polymode val)
+{
+	return val << 3;
+}
+enum pa_su_sc_draw {
+	DRAW_POINTS       = 0,
+	DRAW_LINES        = 1,
+	DRAW_TRIANGLES    = 2,
+};
+static inline uint32_t PA_SU_SC_MODE_CNTL_POLYMODE_FRONT_PTYPE(enum pa_su_sc_draw val)
+{
+	return val << 5;
+}
+static inline uint32_t PA_SU_SC_MODE_CNTL_POLYMODE_BACK_PTYPE(enum pa_su_sc_draw val)
+{
+	return val << 8;
+}
+#define PA_SU_SC_MODE_CNTL_POLY_OFFSET_FRONT_ENABLE        0x00000800
+#define PA_SU_SC_MODE_CNTL_POLY_OFFSET_BACK_ENABLE         0x00001000
+#define PA_SU_SC_MODE_CNTL_POLY_OFFSET_PARA_ENABLE         0x00002000
+#define PA_SU_SC_MODE_CNTL_MSAA_ENABLE                     0x00008000
+#define PA_SU_SC_MODE_CNTL_VTX_WINDOW_OFFSET_ENABLE        0x00010000
+#define PA_SU_SC_MODE_CNTL_LINE_STIPPLE_ENABLE             0x00040000
+#define PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST              0x00080000
+#define PA_SU_SC_MODE_CNTL_PERSP_CORR_DIS                  0x00100000
+#define PA_SU_SC_MODE_CNTL_MULTI_PRIM_IB_ENA               0x00200000
+#define PA_SU_SC_MODE_CNTL_QUAD_ORDER_ENABLE               0x00800000
+#define PA_SU_SC_MODE_CNTL_WAIT_RB_IDLE_ALL_TRI            0x02000000
+#define PA_SU_SC_MODE_CNTL_WAIT_RB_IDLE_FIRST_TRI_NEW_STATE 0x04000000
+#define PA_SU_SC_MODE_CNTL_CLAMPED_FACENESS                0x10000000
+#define PA_SU_SC_MODE_CNTL_ZERO_AREA_FACENESS              0x20000000
+#define PA_SU_SC_MODE_CNTL_FACE_KILL_ENABLE                0x40000000
+#define PA_SU_SC_MODE_CNTL_FACE_WRITE_ENABLE               0x80000000
+
+
+/*
+ * Bits for PA_SC_LINE_STIPPLE:
+ */
+
+#define PA_SC_LINE_STIPPLE_LINE_PATTERN(val)     ((val) & 0x0000ffff)
+#define PA_SC_LINE_STIPPLE_REPEAT_COUNT(val)     (((val) << 16) & 0x00ff0000)
+enum pa_sc_pattern_bit_order {
+	PATTERN_BIT_ORDER_LITTLE = 0,
+	PATTERN_BIT_ORDER_BIG    = 1,
+};
+static inline uint32_t PA_SC_LINE_STIPPLE_PATTERN_BIT_ORDER(enum pa_sc_pattern_bit_order val)
+{
+	return val << 28;
+}
+enum pa_sc_auto_reset_cntl {
+	AUTO_RESET_NEVER          = 0,
+	AUTO_RESET_EACH_PRIMITIVE = 1,
+	AUTO_RESET_EACH_PACKET    = 2,
+};
+static inline uint32_t PA_SC_LINE_STIPPLE_AUTO_RESET_CNTL(enum pa_sc_auto_reset_cntl val)
+{
+	return val << 29;
+}
+
+
+/*
+ * Bits for PA_SC_LINE_CNTL:
+ */
+
+#define PA_SC_LINE_CNTL_BRES_CNTL_MASK(val)      ((val) & 0x000000ff)
+#define PA_SC_LINE_CNTL_USE_BRES_CNTL            0x00000100
+#define PA_SC_LINE_CNTL_EXPAND_LINE_WIDTH        0x00000200
+#define PA_SC_LINE_CNTL_LAST_PIXEL               0x00000400
+
+
+/*
+ * Bits for PA_SU_VTX_CNTL:
+ */
+
+enum pa_pixcenter {
+	PIXCENTER_D3D = 0,
+	PIXCENTER_OGL = 1,
+};
+static inline uint32_t PA_SU_VTX_CNTL_PIX_CENTER(enum pa_pixcenter val)
+{
+	return val;
+}
+
+enum pa_roundmode {
+	TRUNCATE = 0,
+	ROUND = 1,
+	ROUNDTOEVEN = 2,
+	ROUNDTOODD = 3,
+};
+static inline uint32_t PA_SU_VTX_CNTL_ROUND_MODE_MASK(enum pa_roundmode val)
+{
+	return val << 1;
+}
+
+enum pa_quantmode {
+	ONE_SIXTEENTH = 0,
+	ONE_EIGHTH = 1,
+	ONE_QUARTER = 2,
+	ONE_HALF = 3,
+	ONE = 4,
+};
+static inline uint32_t PA_SU_VTX_CNTL_QUANT_MODE(enum pa_quantmode val)
+{
+	return val << 3;
+}
+
+
+/*
+ * Bits for PA_SU_POINT_SIZE:
+ */
+
+#define PA_SU_POINT_SIZE_HEIGHT(val)        (f2d12_4(val) & 0xffff)
+#define PA_SU_POINT_SIZE_WIDTH(val)         ((f2d12_4(val) << 16) & 0xffff)
+
+
+/*
+ * Bits for PA_SU_POINT_MINMAX:
+ */
+
+#define PA_SU_POINT_MINMAX_MIN_SIZE(val)    (f2d12_4(val) & 0xffff)
+#define PA_SU_POINT_MINMAX_MAX_SIZE(val)    ((f2d12_4(val) << 16) & 0xffff)
+
+
+/*
+ * Bits for PA_SU_LINE_CNTL:
+ */
+
+#define PA_SU_LINE_CNTL_WIDTH(val)          (f2d12_4(val) & 0xffff)
+
+
+/*
+ * Bits for PA_SC_WINDOW_OFFSET:
+ * (seems to be same as r600)
+ */
+#define PA_SC_WINDOW_OFFSET_X(val)          ((val) & 0x7fff)
+#define PA_SC_WINDOW_OFFSET_Y(val)          (((val) & 0x7fff) << 16)
+
+#define PA_SC_WINDOW_OFFSET_DISABLE         0x80000000
+
+
+/*
+ * Bits for SQ_CONTEXT_MISC:
+ */
+
+#define SQ_CONTEXT_MISC_INST_PRED_OPTIMIZE  0x00000001
+#define SQ_CONTEXT_MISC_SC_OUTPUT_SCREEN_XY 0x00000002
+enum sq_sample_cntl {
+	CENTROIDS_ONLY = 0,
+	CENTERS_ONLY = 1,
+	CENTROIDS_AND_CENTERS = 2,
+};
+static inline uint32_t SQ_CONTEXT_MISC_SC_SAMPLE_CNTL(enum sq_sample_cntl val)
+{
+	return (val & 0x3) << 2;
+}
+#define SQ_CONTEXT_MISC_PARAM_GEN_POS(val)  (((val) & 0xff) << 8)
+#define SQ_CONTEXT_MISC_PERFCOUNTER_REF     0x00010000
+#define SQ_CONTEXT_MISC_YEILD_OPTIMIZE      0x00020000
+#define SQ_CONTEXT_MISC_TX_CACHE_SEL        0x00040000
+
+
+/*
+ * Bits for SQ_PROGRAM_CNTL:
+ */
+/* note: only 0x3f worth of valid register values, but high bit is
+ * set to indicate '0 registers used':
+ */
+#define SQ_PROGRAM_CNTL_VS_REGS(val)        ((val) & 0xff)
+#define SQ_PROGRAM_CNTL_PS_REGS(val)        (((val) & 0xff) << 8)
+#define SQ_PROGRAM_CNTL_VS_RESOURCE         0x00010000
+#define SQ_PROGRAM_CNTL_PS_RESOURCE         0x00020000
+#define SQ_PROGRAM_CNTL_PARAM_GEN           0x00040000
+#define SQ_PROGRAM_CNTL_GEN_INDEX_PIX       0x00080000
+#define SQ_PROGRAM_CNTL_VS_EXPORT_COUNT(val) (((val) & 0xf) << 20)
+#define SQ_PROGRAM_CNTL_VS_EXPORT_MODE(val)  (((val) & 0x7) << 24)
+enum sq_ps_vtx_mode {
+	POSITION_1_VECTOR              = 0,
+	POSITION_2_VECTORS_UNUSED      = 1,
+	POSITION_2_VECTORS_SPRITE      = 2,
+	POSITION_2_VECTORS_EDGE        = 3,
+	POSITION_2_VECTORS_KILL        = 4,
+	POSITION_2_VECTORS_SPRITE_KILL = 5,
+	POSITION_2_VECTORS_EDGE_KILL   = 6,
+	MULTIPASS                      = 7,
+};
+static inline uint32_t SQ_PROGRAM_CNTL_PS_EXPORT_MODE(enum sq_ps_vtx_mode val)
+{
+	return val << 27;
+}
+#define SQ_PROGRAM_CNTL_GEN_INDEX_VTX  0x80000000
+
+
+/*
+ * Bits for SQ_VS_CONST
+ */
+
+#define SQ_VS_CONST_BASE(val)          ((val) & 0x1ff)
+#define SQ_VS_CONST_SIZE(val)          (((val) & 0x1ff) << 12)
+
+
+/*
+ * Bits for SQ_PS_CONST
+ */
+
+#define SQ_PS_CONST_BASE(val)          ((val) & 0x1ff)
+#define SQ_PS_CONST_SIZE(val)          (((val) & 0x1ff) << 12)
+
+
+/*
+ * Bits for tex sampler:
+ */
+
+/* dword0 */
+enum sq_tex_clamp {
+	SQ_TEX_WRAP                    = 0,    /* GL_REPEAT */
+	SQ_TEX_MIRROR                  = 1,    /* GL_MIRRORED_REPEAT */
+	SQ_TEX_CLAMP_LAST_TEXEL        = 2,    /* GL_CLAMP_TO_EDGE */
+	/* TODO confirm these: */
+	SQ_TEX_MIRROR_ONCE_LAST_TEXEL  = 3,
+	SQ_TEX_CLAMP_HALF_BORDER       = 4,
+	SQ_TEX_MIRROR_ONCE_HALF_BORDER = 5,
+	SQ_TEX_CLAMP_BORDER            = 6,
+	SQ_TEX_MIRROR_ONCE_BORDER      = 7,
+};
+static inline uint32_t SQ_TEX0_CLAMP_X(enum sq_tex_clamp val)
+{
+	return (val & 0x7) << 10;
+}
+static inline uint32_t SQ_TEX0_CLAMP_Y(enum sq_tex_clamp val)
+{
+	return (val & 0x7) << 13;
+}
+static inline uint32_t SQ_TEX0_CLAMP_Z(enum sq_tex_clamp val)
+{
+	return (val & 0x7) << 16;
+}
+#define SQ_TEX0_PITCH(val)             (((val) >> 5) << 22)
+
+/* dword2 */
+#define SQ_TEX2_HEIGHT(val)            (((val) - 1) << 13)
+#define SQ_TEX2_WIDTH(val)             ((val) - 1)
+
+/* dword3 */
+enum sq_tex_swiz {
+	SQ_TEX_X    = 0,
+	SQ_TEX_Y    = 1,
+	SQ_TEX_Z    = 2,
+	SQ_TEX_W    = 3,
+	SQ_TEX_ZERO = 4,
+	SQ_TEX_ONE  = 5,
+};
+static inline uint32_t SQ_TEX3_SWIZ_X(enum sq_tex_swiz val)
+{
+	return (val & 0x7) << 1;
+}
+static inline uint32_t SQ_TEX3_SWIZ_Y(enum sq_tex_swiz val)
+{
+	return (val & 0x7) << 4;
+}
+static inline uint32_t SQ_TEX3_SWIZ_Z(enum sq_tex_swiz val)
+{
+	return (val & 0x7) << 7;
+}
+static inline uint32_t SQ_TEX3_SWIZ_W(enum sq_tex_swiz val)
+{
+	return (val & 0x7) << 10;
+}
+
+enum sq_tex_filter {
+	SQ_TEX_FILTER_POINT    = 0,
+	SQ_TEX_FILTER_BILINEAR = 1,
+	SQ_TEX_FILTER_BICUBIC  = 2,  /* presumed */
+};
+static inline uint32_t SQ_TEX3_XY_MAG_FILTER(enum sq_tex_filter val)
+{
+	return (val & 0x3) << 19;
+}
+static inline uint32_t SQ_TEX3_XY_MIN_FILTER(enum sq_tex_filter val)
+{
+	return (val & 0x3) << 21;
+}
+
+
+/*
+ * Bits for RB_BLEND_CONTROL:
+ */
+
+enum rb_blend_op {
+	RB_BLEND_ZERO = 0,
+	RB_BLEND_ONE = 1,
+	RB_BLEND_SRC_COLOR = 4,
+	RB_BLEND_ONE_MINUS_SRC_COLOR = 5,
+	RB_BLEND_SRC_ALPHA = 6,
+	RB_BLEND_ONE_MINUS_SRC_ALPHA = 7,
+	RB_BLEND_DST_COLOR = 8,
+	RB_BLEND_ONE_MINUS_DST_COLOR = 9,
+	RB_BLEND_DST_ALPHA = 10,
+	RB_BLEND_ONE_MINUS_DST_ALPHA = 11,
+	RB_BLEND_CONSTANT_COLOR = 12,
+	RB_BLEND_ONE_MINUS_CONSTANT_COLOR = 13,
+	RB_BLEND_CONSTANT_ALPHA = 14,
+	RB_BLEND_ONE_MINUS_CONSTANT_ALPHA = 15,
+	RB_BLEND_SRC_ALPHA_SATURATE = 16,
+};
+
+enum rb_comb_func {
+	COMB_DST_PLUS_SRC = 0,
+	COMB_SRC_MINUS_DST = 1,
+	COMB_MIN_DST_SRC = 2,
+	COMB_MAX_DST_SRC = 3,
+	COMB_DST_MINUS_SRC = 4,
+	COMB_DST_PLUS_SRC_BIAS = 5,
+};
+
+#define RB_BLENDCONTROL_COLOR_SRCBLEND_MASK      0x0000001f
+static inline uint32_t RB_BLENDCONTROL_COLOR_SRCBLEND(enum rb_blend_op val)
+{
+	return val & RB_BLENDCONTROL_COLOR_SRCBLEND_MASK;
+}
+#define RB_BLENDCONTROL_COLOR_COMB_FCN_MASK      0x000000e0
+static inline uint32_t RB_BLENDCONTROL_COLOR_COMB_FCN(enum rb_comb_func val)
+{
+	return (val << 5) & RB_BLENDCONTROL_COLOR_COMB_FCN_MASK;
+}
+#define RB_BLENDCONTROL_COLOR_DESTBLEND_MASK     0x00001f00
+static inline uint32_t RB_BLENDCONTROL_COLOR_DESTBLEND(enum rb_blend_op val)
+{
+	return (val << 8) & RB_BLENDCONTROL_COLOR_DESTBLEND_MASK;
+}
+#define RB_BLENDCONTROL_ALPHA_SRCBLEND_MASK      0x001f0000
+static inline uint32_t RB_BLENDCONTROL_ALPHA_SRCBLEND(enum rb_blend_op val)
+{
+	return (val << 16) & RB_BLENDCONTROL_ALPHA_SRCBLEND_MASK;
+}
+#define RB_BLENDCONTROL_ALPHA_COMB_FCN_MASK      0x00e00000
+static inline uint32_t RB_BLENDCONTROL_ALPHA_COMB_FCN(enum rb_comb_func val)
+{
+	return (val << 21) & RB_BLENDCONTROL_ALPHA_COMB_FCN_MASK;
+}
+#define RB_BLENDCONTROL_ALPHA_DESTBLEND_MASK     0x1f000000
+static inline uint32_t RB_BLENDCONTROL_ALPHA_DESTBLEND(enum rb_blend_op val)
+{
+	return (val << 24) & RB_BLENDCONTROL_ALPHA_DESTBLEND_MASK;
+}
+#define RB_BLENDCONTROL_BLEND_FORCE_ENABLE       0x20000000
+#define RB_BLENDCONTROL_BLEND_FORCE              0x40000000
+
+
+/*
+ * Bits for RB_COLOR_MASK:
+ */
+#define RB_COLOR_MASK_WRITE_RED                  0x00000001
+#define RB_COLOR_MASK_WRITE_GREEN                0x00000002
+#define RB_COLOR_MASK_WRITE_BLUE                 0x00000004
+#define RB_COLOR_MASK_WRITE_ALPHA                0x00000008
+
+
+/*
+ * Bits for RB_COLOR_INFO:
+ */
+
+#define RB_COLOR_INFO_COLOR_FORMAT_MASK          0x0000000f
+static inline uint32_t RB_COLOR_INFO_COLOR_FORMAT(enum rb_colorformatx val)
+{
+	return val & RB_COLOR_INFO_COLOR_FORMAT_MASK;
+}
+
+#define RB_COLOR_INFO_COLOR_ROUND_MODE(val)      (((val) & 0x3) << 4)
+#define RB_COLOR_INFO_COLOR_LINEAR               0x00000040
+#define RB_COLOR_INFO_COLOR_ENDIAN(val)          (((val) & 0x3) << 7)
+#define RB_COLOR_INFO_COLOR_SWAP(val)            (((val) & 0x3) << 9)
+#define RB_COLOR_INFO_COLOR_BASE(val)            (((val) & 0xfffff) << 12)
+
+
+/*
+ * Bits for RB_MODECONTROL:
+ */
+
+enum rb_edram_mode {
+	EDRAM_NOP = 0,
+	COLOR_DEPTH = 4,
+	DEPTH_ONLY = 5,
+	EDRAM_COPY = 6,
+};
+static inline uint32_t RB_MODECONTROL_EDRAM_MODE(enum rb_edram_mode val)
+{
+	return val & 0x7;
+}
+
+
+/*
+ * Bits for RB_DEPTHCONTROL:
+ */
+
+#define RB_DEPTHCONTROL_STENCIL_ENABLE      0x00000001
+#define RB_DEPTHCONTROL_Z_ENABLE            0x00000002
+#define RB_DEPTHCONTROL_Z_WRITE_ENABLE      0x00000004
+#define RB_DEPTHCONTROL_EARLY_Z_ENABLE      0x00000008
+#define RB_DEPTHCONTROL_ZFUNC_MASK          0x00000070
+#define RB_DEPTHCONTROL_ZFUNC(depth_func) \
+	(((depth_func) << 4) & RB_DEPTHCONTROL_ZFUNC_MASK)
+#define RB_DEPTHCONTROL_BACKFACE_ENABLE     0x00000080
+#define RB_DEPTHCONTROL_STENCILFUNC_MASK    0x00000700
+#define RB_DEPTHCONTROL_STENCILFUNC(depth_func) \
+	(((depth_func) << 8) & RB_DEPTHCONTROL_STENCILFUNC_MASK)
+enum rb_stencil_op {
+	STENCIL_KEEP = 0,
+	STENCIL_ZERO = 1,
+	STENCIL_REPLACE = 2,
+	STENCIL_INCR_CLAMP = 3,
+	STENCIL_DECR_CLAMP = 4,
+	STENCIL_INVERT = 5,
+	STENCIL_INCR_WRAP = 6,
+	STENCIL_DECR_WRAP = 7
+};
+#define RB_DEPTHCONTROL_STENCILFAIL_MASK         0x00003800
+static inline uint32_t RB_DEPTHCONTROL_STENCILFAIL(enum rb_stencil_op val)
+{
+	return (val << 11) & RB_DEPTHCONTROL_STENCILFAIL_MASK;
+}
+#define RB_DEPTHCONTROL_STENCILZPASS_MASK        0x0001c000
+static inline uint32_t RB_DEPTHCONTROL_STENCILZPASS(enum rb_stencil_op val)
+{
+	return (val << 14) & RB_DEPTHCONTROL_STENCILZPASS_MASK;
+}
+#define RB_DEPTHCONTROL_STENCILZFAIL_MASK        0x000e0000
+static inline uint32_t RB_DEPTHCONTROL_STENCILZFAIL(enum rb_stencil_op val)
+{
+	return (val << 17) & RB_DEPTHCONTROL_STENCILZFAIL_MASK;
+}
+#define RB_DEPTHCONTROL_STENCILFUNC_BF_MASK      0x00700000
+#define RB_DEPTHCONTROL_STENCILFUNC_BF(depth_func) \
+	(((depth_func) << 20) & RB_DEPTHCONTROL_STENCILFUNC_BF_MASK)
+#define RB_DEPTHCONTROL_STENCILFAIL_BF_MASK      0x03800000
+static inline uint32_t RB_DEPTHCONTROL_STENCILFAIL_BF(enum rb_stencil_op val)
+{
+	return (val << 23) & RB_DEPTHCONTROL_STENCILFAIL_BF_MASK;
+}
+#define RB_DEPTHCONTROL_STENCILZPASS_BF_MASK     0x1c000000
+static inline uint32_t RB_DEPTHCONTROL_STENCILZPASS_BF(enum rb_stencil_op val)
+{
+	return (val << 26) & RB_DEPTHCONTROL_STENCILZPASS_BF_MASK;
+}
+#define RB_DEPTHCONTROL_STENCILZFAIL_BF_MASK     0xe0000000
+static inline uint32_t RB_DEPTHCONTROL_STENCILZFAIL_BF(enum rb_stencil_op val)
+{
+	return (val << 29) & RB_DEPTHCONTROL_STENCILZFAIL_BF_MASK;
+}
+
+
+/*
+ * Bits for RB_COPY_DEST_INFO:
+ */
+
+enum rb_surface_endian {
+	ENDIAN_NONE = 0,
+	ENDIAN_8IN16 = 1,
+	ENDIAN_8IN32 = 2,
+	ENDIAN_16IN32 = 3,
+	ENDIAN_8IN64 = 4,
+	ENDIAN_8IN128 = 5,
+};
+static inline uint32_t RB_COPY_DEST_INFO_DEST_ENDIAN(enum rb_surface_endian val)
+{
+	return (val & 0x7) << 0;
+}
+#define RB_COPY_DEST_INFO_LINEAR       0x00000008
+static inline uint32_t RB_COPY_DEST_INFO_FORMAT(enum rb_colorformatx val)
+{
+	return val << 4;
+}
+#define RB_COPY_DEST_INFO_SWAP(val)    (((val) & 0x3) << 8) /* maybe VGT_DMA_SWAP_MODE? */
+enum rb_dither_mode {
+	DITHER_DISABLE = 0,
+	DITHER_ALWAYS = 1,
+	DITHER_IF_ALPHA_OFF = 2,
+};
+static inline uint32_t RB_COPY_DEST_INFO_DITHER_MODE(enum rb_dither_mode val)
+{
+	return val << 10;
+}
+enum rb_dither_type {
+	DITHER_PIXEL = 0,
+	DITHER_SUBPIXEL = 1,
+};
+static inline uint32_t RB_COPY_DEST_INFO_DITHER_TYPE(enum rb_dither_type val)
+{
+	return val << 12;
+}
+#define RB_COPY_DEST_INFO_WRITE_RED    0x00004000
+#define RB_COPY_DEST_INFO_WRITE_GREEN  0x00008000
+#define RB_COPY_DEST_INFO_WRITE_BLUE   0x00010000
+#define RB_COPY_DEST_INFO_WRITE_ALPHA  0x00020000
+
+
+/*
+ * Bits for RB_COPY_DEST_OFFSET:
+ */
+
+#define RB_COPY_DEST_OFFSET_X(val)     ((val) & 0x3fff)
+#define RB_COPY_DEST_OFFSET_Y(val)     (((val) & 0x3fff) << 13)
+
+
+/*
+ * Bits for RB_COPY_CONTROL:
+ */
+
+#define RB_COPY_CONTROL_DEPTH_CLEAR_ENABLE  0x00000008L
+#define RB_COPY_CONTROL_CLEAR_MASK(val)     ((val & 0xf) << 4)
+
+
+/*
+ * Bits for RB_COLORCONTROL:
+ */
+
+#define RB_COLORCONTROL_ALPHA_FUNC(val)          ((val) & 0x7)
+#define RB_COLORCONTROL_ALPHA_TEST_ENABLE        0x00000008
+#define RB_COLORCONTROL_ALPHA_TO_MASK_ENABLE     0x00000010
+#define RB_COLORCONTROL_BLEND_DISABLE            0x00000020
+#define RB_COLORCONTROL_FOG_ENABLE               0x00000040
+#define RB_COLORCONTROL_VS_EXPORTS_FOG           0x00000080
+#define RB_COLORCONTROL_ROP_CODE(val)            (((val) & 0xf) << 8)
+static inline uint32_t RB_COLORCONTROL_DITHER_MODE(enum rb_dither_mode val)
+{
+	return (val & 0x3) << 12;
+}
+static inline uint32_t RB_COLORCONTROL_DITHER_TYPE(enum rb_dither_type val)
+{
+	return (val & 0x3) << 14;
+}
+#define RB_COLORCONTROL_PIXEL_FOG                0x00010000
+#define RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET0(val) (((val) & 0x3) << 24)
+#define RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET1(val) (((val) & 0x3) << 26)
+#define RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET2(val) (((val) & 0x3) << 28)
+#define RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET3(val) (((val) & 0x3) << 30)
+
+
+/*
+ * Bits for RB_DEPTH_INFO:
+ */
+
+enum rb_depth_format {
+	DEPTHX_16 = 0,
+	DEPTHX_24_8 = 1,
+	DEPTHX_INVALID,
+};
+
+static inline uint32_t RB_DEPTH_INFO_DEPTH_FORMAT(enum rb_depth_format val)
+{
+	return val & 0x1;
+}
+#define RB_DEPTH_INFO_DEPTH_BASE(val)            ((val) << 12)
+
+
+/*
+ * Bits for RB_STENCILREFMASK (RB_STENCILREFMASK_BF is same):
+ */
+
+#define RB_STENCILREFMASK_STENCILREF_MASK        0x000000ff
+#define RB_STENCILREFMASK_STENCILREF(val)        ((val) & RB_STENCILREFMASK_STENCILREF_MASK)
+#define RB_STENCILREFMASK_STENCILMASK_MASK       0x0000ff00
+#define RB_STENCILREFMASK_STENCILMASK(val)       (((val) << 8) & RB_STENCILREFMASK_STENCILMASK_MASK)
+#define RB_STENCILREFMASK_STENCILWRITEMASK_MASK  0x00ff0000
+#define RB_STENCILREFMASK_STENCILWRITEMASK(val)  (((val) << 16) & RB_STENCILREFMASK_STENCILWRITEMASK_MASK)
+
+
+/*
+ * Bits for RB_BC_CONTROL:
+ */
+
+#define RB_BC_CONTROL_ACCUM_LINEAR_MODE_ENABLE            0x00000001
+#define RB_BC_CONTROL_ACCUM_TIMEOUT_SELECT(val)           (((val) & 0x3) << 1)
+#define RB_BC_CONTROL_DISABLE_EDRAM_CAM                   0x00000008
+#define RB_BC_CONTROL_DISABLE_EZ_FAST_CONTEXT_SWITCH      0x00000010
+#define RB_BC_CONTROL_DISABLE_EZ_NULL_ZCMD_DROP           0x00000020
+#define RB_BC_CONTROL_DISABLE_LZ_NULL_ZCMD_DROP           0x00000040
+#define RB_BC_CONTROL_ENABLE_AZ_THROTTLE                  0x00000080
+#define RB_BC_CONTROL_AZ_THROTTLE_COUNT(val)              (((val) & 0x1f) << 8)
+#define RB_BC_CONTROL_ENABLE_CRC_UPDATE                   0x00004000
+#define RB_BC_CONTROL_CRC_MODE                            0x00008000
+#define RB_BC_CONTROL_DISABLE_SAMPLE_COUNTERS             0x00010000
+#define RB_BC_CONTROL_DISABLE_ACCUM                       0x00020000
+#define RB_BC_CONTROL_ACCUM_ALLOC_MASK(val)               (((val) & 0xf) << 18)
+#define RB_BC_CONTROL_LINEAR_PERFORMANCE_ENABLE           0x00400000
+#define RB_BC_CONTROL_ACCUM_DATA_FIFO_LIMIT(val)          (((val) & 0xf) << 23)
+#define RB_BC_CONTROL_MEM_EXPORT_TIMEOUT_SELECT(val)      (((val) & 0x3) << 27)
+#define RB_BC_CONTROL_MEM_EXPORT_LINEAR_MODE_ENABLE       0x20000000
+#define RB_BC_CONTROL_CRC_SYSTEM                          0x40000000
+#define RB_BC_CONTROL_RESERVED6                           0x80000000
+
+
+/*
+ * Bits for RBBM_PM_OVERRIDE1:
+ */
+
+#define RBBM_PM_OVERRIDE1_RBBM_AHBCLK_PM_OVERRIDE         0x00000001
+#define RBBM_PM_OVERRIDE1_SC_REG_SCLK_PM_OVERRIDE         0x00000002
+#define RBBM_PM_OVERRIDE1_SC_SCLK_PM_OVERRIDE             0x00000004
+#define RBBM_PM_OVERRIDE1_SP_TOP_SCLK_PM_OVERRIDE         0x00000008
+#define RBBM_PM_OVERRIDE1_SP_V0_SCLK_PM_OVERRIDE          0x00000010
+#define RBBM_PM_OVERRIDE1_SQ_REG_SCLK_PM_OVERRIDE         0x00000020
+#define RBBM_PM_OVERRIDE1_SQ_REG_FIFOS_SCLK_PM_OVERRIDE   0x00000040
+#define RBBM_PM_OVERRIDE1_SQ_CONST_MEM_SCLK_PM_OVERRIDE   0x00000080
+#define RBBM_PM_OVERRIDE1_SQ_SQ_SCLK_PM_OVERRIDE          0x00000100
+#define RBBM_PM_OVERRIDE1_SX_SCLK_PM_OVERRIDE             0x00000200
+#define RBBM_PM_OVERRIDE1_SX_REG_SCLK_PM_OVERRIDE         0x00000400
+#define RBBM_PM_OVERRIDE1_TCM_TCO_SCLK_PM_OVERRIDE        0x00000800
+#define RBBM_PM_OVERRIDE1_TCM_TCM_SCLK_PM_OVERRIDE        0x00001000
+#define RBBM_PM_OVERRIDE1_TCM_TCD_SCLK_PM_OVERRIDE        0x00002000
+#define RBBM_PM_OVERRIDE1_TCM_REG_SCLK_PM_OVERRIDE        0x00004000
+#define RBBM_PM_OVERRIDE1_TPC_TPC_SCLK_PM_OVERRIDE        0x00008000
+#define RBBM_PM_OVERRIDE1_TPC_REG_SCLK_PM_OVERRIDE        0x00010000
+#define RBBM_PM_OVERRIDE1_TCF_TCA_SCLK_PM_OVERRIDE        0x00020000
+#define RBBM_PM_OVERRIDE1_TCF_TCB_SCLK_PM_OVERRIDE        0x00040000
+#define RBBM_PM_OVERRIDE1_TCF_TCB_READ_SCLK_PM_OVERRIDE   0x00080000
+#define RBBM_PM_OVERRIDE1_TP_TP_SCLK_PM_OVERRIDE          0x00100000
+#define RBBM_PM_OVERRIDE1_TP_REG_SCLK_PM_OVERRIDE         0x00200000
+#define RBBM_PM_OVERRIDE1_CP_G_SCLK_PM_OVERRIDE           0x00400000
+#define RBBM_PM_OVERRIDE1_CP_REG_SCLK_PM_OVERRIDE         0x00800000
+#define RBBM_PM_OVERRIDE1_CP_G_REG_SCLK_PM_OVERRIDE       0x01000000
+#define RBBM_PM_OVERRIDE1_SPI_SCLK_PM_OVERRIDE            0x02000000
+#define RBBM_PM_OVERRIDE1_RB_REG_SCLK_PM_OVERRIDE         0x04000000
+#define RBBM_PM_OVERRIDE1_RB_SCLK_PM_OVERRIDE             0x08000000
+#define RBBM_PM_OVERRIDE1_MH_MH_SCLK_PM_OVERRIDE          0x10000000
+#define RBBM_PM_OVERRIDE1_MH_REG_SCLK_PM_OVERRIDE         0x20000000
+#define RBBM_PM_OVERRIDE1_MH_MMU_SCLK_PM_OVERRIDE         0x40000000
+#define RBBM_PM_OVERRIDE1_MH_TCROQ_SCLK_PM_OVERRIDE       0x80000000
+
+
+/*
+ * Bits for RBBM_PM_OVERRIDE2:
+ */
+
+#define RBBM_PM_OVERRIDE2_PA_REG_SCLK_PM_OVERRIDE         0x00000001
+#define RBBM_PM_OVERRIDE2_PA_PA_SCLK_PM_OVERRIDE          0x00000002
+#define RBBM_PM_OVERRIDE2_PA_AG_SCLK_PM_OVERRIDE          0x00000004
+#define RBBM_PM_OVERRIDE2_VGT_REG_SCLK_PM_OVERRIDE        0x00000008
+#define RBBM_PM_OVERRIDE2_VGT_FIFOS_SCLK_PM_OVERRIDE      0x00000010
+#define RBBM_PM_OVERRIDE2_VGT_VGT_SCLK_PM_OVERRIDE        0x00000020
+#define RBBM_PM_OVERRIDE2_DEBUG_PERF_SCLK_PM_OVERRIDE     0x00000040
+#define RBBM_PM_OVERRIDE2_PERM_SCLK_PM_OVERRIDE           0x00000080
+#define RBBM_PM_OVERRIDE2_GC_GA_GMEM0_PM_OVERRIDE         0x00000100
+#define RBBM_PM_OVERRIDE2_GC_GA_GMEM1_PM_OVERRIDE         0x00000200
+#define RBBM_PM_OVERRIDE2_GC_GA_GMEM2_PM_OVERRIDE         0x00000400
+#define RBBM_PM_OVERRIDE2_GC_GA_GMEM3_PM_OVERRIDE         0x00000800
+
+
+/*
+ * Bits for TC_CNTL_STATUS:
+ */
+
+#define TC_CNTL_STATUS_L2_INVALIDATE             0x00000001
+
+
+#endif /* FREEDRENO_A2XX_REG_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_blend.c b/src/gallium/drivers/freedreno/freedreno_blend.c
new file mode 100644
index 0000000..c965a73
--- /dev/null
+++ b/src/gallium/drivers/freedreno/freedreno_blend.c
@@ -0,0 +1,175 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark at freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark at freedesktop.org>
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+
+#include "freedreno_blend.h"
+#include "freedreno_context.h"
+#include "freedreno_util.h"
+
+static enum rb_blend_op
+blend_factor(unsigned factor)
+{
+	switch (factor) {
+	case PIPE_BLENDFACTOR_ONE:
+		return RB_BLEND_ONE;
+	case PIPE_BLENDFACTOR_SRC_COLOR:
+		return RB_BLEND_SRC_COLOR;
+	case PIPE_BLENDFACTOR_SRC_ALPHA:
+		return RB_BLEND_SRC_ALPHA;
+	case PIPE_BLENDFACTOR_DST_ALPHA:
+		return RB_BLEND_DST_ALPHA;
+	case PIPE_BLENDFACTOR_DST_COLOR:
+		return RB_BLEND_DST_COLOR;
+	case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+		return RB_BLEND_SRC_ALPHA_SATURATE;
+	case PIPE_BLENDFACTOR_CONST_COLOR:
+		return RB_BLEND_CONSTANT_COLOR;
+	case PIPE_BLENDFACTOR_CONST_ALPHA:
+		return RB_BLEND_CONSTANT_ALPHA;
+	case PIPE_BLENDFACTOR_ZERO:
+	case 0:
+		return RB_BLEND_ZERO;
+	case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+		return RB_BLEND_ONE_MINUS_SRC_COLOR;
+	case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+		return RB_BLEND_ONE_MINUS_SRC_ALPHA;
+	case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+		return RB_BLEND_ONE_MINUS_DST_ALPHA;
+	case PIPE_BLENDFACTOR_INV_DST_COLOR:
+		return RB_BLEND_ONE_MINUS_DST_COLOR;
+	case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+		return RB_BLEND_ONE_MINUS_CONSTANT_COLOR;
+	case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+		return RB_BLEND_ONE_MINUS_CONSTANT_ALPHA;
+	case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+	case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+	case PIPE_BLENDFACTOR_SRC1_COLOR:
+	case PIPE_BLENDFACTOR_SRC1_ALPHA:
+		/* I don't think these are supported */
+	default:
+		DBG("invalid blend factor: %x", factor);
+		return 0;
+	}
+}
+
+static enum rb_comb_func
+blend_func(unsigned func)
+{
+	switch (func) {
+	case PIPE_BLEND_ADD:
+		return COMB_DST_PLUS_SRC;
+	case PIPE_BLEND_MIN:
+		return COMB_MIN_DST_SRC;
+	case PIPE_BLEND_MAX:
+		return COMB_MAX_DST_SRC;
+	case PIPE_BLEND_SUBTRACT:
+		return COMB_SRC_MINUS_DST;
+	case PIPE_BLEND_REVERSE_SUBTRACT:
+		return COMB_DST_MINUS_SRC;
+	default:
+		DBG("invalid blend func: %x", func);
+		return 0;
+	}
+}
+
+static void *
+fd_blend_state_create(struct pipe_context *pctx,
+		const struct pipe_blend_state *cso)
+{
+	const struct pipe_rt_blend_state *rt = &cso->rt[0];
+	struct fd_blend_stateobj *so;
+
+	if (cso->logicop_enable) {
+		DBG("Unsupported! logicop");
+		return NULL;
+	}
+
+	if (cso->independent_blend_enable) {
+		DBG("Unsupported! independent blend state");
+		return NULL;
+	}
+
+	so = CALLOC_STRUCT(fd_blend_stateobj);
+	if (!so)
+		return NULL;
+
+	so->base = *cso;
+
+	so->rb_colorcontrol = RB_COLORCONTROL_ROP_CODE(12);
+
+	so->rb_blendcontrol =
+		RB_BLENDCONTROL_COLOR_SRCBLEND(blend_factor(rt->rgb_src_factor)) |
+		RB_BLENDCONTROL_COLOR_COMB_FCN(blend_func(rt->rgb_func)) |
+		RB_BLENDCONTROL_COLOR_DESTBLEND(blend_factor(rt->rgb_dst_factor)) |
+		RB_BLENDCONTROL_ALPHA_SRCBLEND(blend_factor(rt->alpha_src_factor)) |
+		RB_BLENDCONTROL_ALPHA_COMB_FCN(blend_func(rt->alpha_func)) |
+		RB_BLENDCONTROL_ALPHA_DESTBLEND(blend_factor(rt->alpha_dst_factor));
+
+	if (rt->colormask & PIPE_MASK_R)
+		so->rb_colormask |= RB_COLOR_MASK_WRITE_RED;
+	if (rt->colormask & PIPE_MASK_G)
+		so->rb_colormask |= RB_COLOR_MASK_WRITE_GREEN;
+	if (rt->colormask & PIPE_MASK_B)
+		so->rb_colormask |= RB_COLOR_MASK_WRITE_BLUE;
+	if (rt->colormask & PIPE_MASK_A)
+		so->rb_colormask |= RB_COLOR_MASK_WRITE_ALPHA;
+
+	if (!rt->blend_enable)
+		so->rb_colorcontrol |= RB_COLORCONTROL_BLEND_DISABLE;
+
+	if (cso->dither)
+		so->rb_colorcontrol |= RB_COLORCONTROL_DITHER_MODE(DITHER_ALWAYS);
+
+	return so;
+}
+
+static void
+fd_blend_state_bind(struct pipe_context *pctx, void *hwcso)
+{
+	struct fd_context *ctx = fd_context(pctx);
+	ctx->blend = hwcso;
+	ctx->dirty |= FD_DIRTY_BLEND;
+}
+
+static void
+fd_blend_state_delete(struct pipe_context *pctx, void *hwcso)
+{
+	FREE(hwcso);
+}
+
+void
+fd_blend_init(struct pipe_context *pctx)
+{
+	pctx->create_blend_state = fd_blend_state_create;
+	pctx->bind_blend_state = fd_blend_state_bind;
+	pctx->delete_blend_state = fd_blend_state_delete;
+}
+
diff --git a/src/gallium/drivers/freedreno/freedreno_blend.h b/src/gallium/drivers/freedreno/freedreno_blend.h
new file mode 100644
index 0000000..70950df
--- /dev/null
+++ b/src/gallium/drivers/freedreno/freedreno_blend.h
@@ -0,0 +1,44 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark at freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark at freedesktop.org>
+ */
+
+#ifndef FREEDRENO_BLEND_H_
+#define FREEDRENO_BLEND_H_
+
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+
+struct fd_blend_stateobj {
+	struct pipe_blend_state base;
+	uint32_t rb_blendcontrol;
+	uint32_t rb_colorcontrol;   /* must be OR'd w/ zsa->rb_colorcontrol */
+	uint32_t rb_colormask;
+};
+
+void fd_blend_init(struct pipe_context *pctx);
+
+#endif /* FREEDRENO_BLEND_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_clear.c b/src/gallium/drivers/freedreno/freedreno_clear.c
new file mode 100644
index 0000000..1e25d6f
--- /dev/null
+++ b/src/gallium/drivers/freedreno/freedreno_clear.c
@@ -0,0 +1,200 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark at freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark at freedesktop.org>
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "util/u_pack_color.h"
+
+#include "freedreno_clear.h"
+#include "freedreno_context.h"
+#include "freedreno_state.h"
+#include "freedreno_program.h"
+#include "freedreno_zsa.h"
+#include "freedreno_util.h"
+
+static uint32_t
+pack_rgba(enum pipe_format format, const float *rgba)
+{
+	union util_color uc;
+	util_pack_color(rgba, format, &uc);
+	return uc.ui;
+}
+
+static void
+fd_clear(struct pipe_context *pctx, unsigned buffers,
+		const union pipe_color_union *color, double depth, unsigned stencil)
+{
+	struct fd_context *ctx = fd_context(pctx);
+	struct fd_ringbuffer *ring = ctx->ring;
+	struct pipe_framebuffer_state *fb = &ctx->framebuffer.base;
+	uint32_t reg, colr = 0;
+
+	ctx->cleared |= buffers;
+	ctx->needs_flush = true;
+
+	DBG("depth=%f, stencil=%u", depth, stencil);
+
+	if ((buffers & PIPE_CLEAR_COLOR) && fb->nr_cbufs)
+		colr  = pack_rgba(fb->cbufs[0]->format, color->f);
+
+	/* emit generic state now: */
+	fd_state_emit(pctx, ctx->dirty &
+			(FD_DIRTY_BLEND | FD_DIRTY_VIEWPORT | FD_DIRTY_FRAMEBUFFER));
+
+	fd_emit_vertex_bufs(ring, 0x9c, (struct fd_vertex_buf[]) {
+			{ .prsc = ctx->solid_vertexbuf, .size = 48 },
+		}, 1);
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_VGT_INDX_OFFSET));
+	OUT_RING(ring, 0);
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_VGT_VERTEX_REUSE_BLOCK_CNTL));
+	OUT_RING(ring, 0x0000028f);
+
+	fd_program_emit(ring, &ctx->solid_prog);
+
+	OUT_PKT0(ring, REG_TC_CNTL_STATUS, 1);
+	OUT_RING(ring, TC_CNTL_STATUS_L2_INVALIDATE);
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_CLEAR_COLOR));
+	OUT_RING(ring, colr);
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_A220_RB_LRZ_VSC_CONTROL));
+	OUT_RING(ring, 0x00000084);
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_RB_COPY_CONTROL));
+	reg = 0;
+	if (buffers & PIPE_CLEAR_DEPTH) {
+		reg |= RB_COPY_CONTROL_CLEAR_MASK(0xf) |
+				RB_COPY_CONTROL_DEPTH_CLEAR_ENABLE;
+	}
+	OUT_RING(ring, reg);
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_RB_DEPTH_CLEAR));
+	if (fd_stencil_enabled(ctx->zsa)) {
+		/* DEPTHX_24_8 */
+		reg = (((uint32_t)(0xffffff * depth)) << 8) | (stencil & 0xff);
+	} else if (fd_depth_enabled(ctx->zsa)) {
+		/* DEPTHX_16 */
+		reg = (uint32_t)(0xffffffff * depth);
+	} else {
+		reg = 0;
+	}
+	OUT_RING(ring, reg);
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_RB_DEPTHCONTROL));
+	reg = 0;
+	if (buffers & PIPE_CLEAR_DEPTH) {
+		reg |= RB_DEPTHCONTROL_ZFUNC(GL_ALWAYS) |
+				RB_DEPTHCONTROL_Z_ENABLE |
+				RB_DEPTHCONTROL_Z_WRITE_ENABLE |
+				RB_DEPTHCONTROL_EARLY_Z_ENABLE;
+	}
+	if (buffers & PIPE_CLEAR_STENCIL) {
+		reg |= RB_DEPTHCONTROL_STENCILFUNC(GL_ALWAYS) |
+				RB_DEPTHCONTROL_STENCIL_ENABLE |
+				RB_DEPTHCONTROL_STENCILZPASS(STENCIL_REPLACE);
+	}
+	OUT_RING(ring, reg);
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+	OUT_RING(ring, CP_REG(REG_PA_CL_CLIP_CNTL));
+	OUT_RING(ring, 0x00000000);        /* PA_CL_CLIP_CNTL */
+	OUT_RING(ring, PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST |  /* PA_SU_SC_MODE_CNTL */
+			PA_SU_SC_MODE_CNTL_POLYMODE_FRONT_PTYPE(DRAW_TRIANGLES) |
+			PA_SU_SC_MODE_CNTL_POLYMODE_BACK_PTYPE(DRAW_TRIANGLES));
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_PA_SC_AA_MASK));
+	OUT_RING(ring, 0x0000ffff);
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+	OUT_RING(ring, CP_REG(REG_PA_SC_WINDOW_SCISSOR_TL));
+	OUT_RING(ring, xy2d(0,0));	        /* PA_SC_WINDOW_SCISSOR_TL */
+	OUT_RING(ring, xy2d(fb->width,      /* PA_SC_WINDOW_SCISSOR_BR */
+			fb->height));
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_RB_COLOR_INFO));
+	OUT_RING(ring, RB_COLOR_INFO_COLOR_SWAP(1) |
+			RB_COLOR_INFO_COLOR_FORMAT(fd_pipe2color(fb->cbufs[0]->format)));
+
+	OUT_PKT3(ring, CP_DRAW_INDX, 3);
+	OUT_RING(ring, 0x00000000);
+	OUT_RING(ring, DRAW(DI_PT_RECTLIST, DI_SRC_SEL_AUTO_INDEX,
+			INDEX_SIZE_IGN, IGNORE_VISIBILITY));
+	OUT_RING(ring, 3);					/* NumIndices */
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_A220_RB_LRZ_VSC_CONTROL));
+	OUT_RING(ring, 0x00000000);
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_RB_COPY_CONTROL));
+	OUT_RING(ring, 0x00000000);
+
+	ctx->dirty |= FD_DIRTY_ZSA |
+			FD_DIRTY_RASTERIZER |
+			FD_DIRTY_SAMPLE_MASK |
+			FD_DIRTY_PROG |
+			FD_DIRTY_CONSTBUF;
+}
+
+static void
+fd_clear_render_target(struct pipe_context *pctx, struct pipe_surface *ps,
+		const union pipe_color_union *color,
+		unsigned x, unsigned y, unsigned w, unsigned h)
+{
+	DBG("TODO: x=%u, y=%u, w=%u, h=%u", x, y, w, h);
+}
+
+static void
+fd_clear_depth_stencil(struct pipe_context *pctx, struct pipe_surface *ps,
+		unsigned buffers, double depth, unsigned stencil,
+		unsigned x, unsigned y, unsigned w, unsigned h)
+{
+	DBG("TODO: buffers=%u, depth=%f, stencil=%u, x=%u, y=%u, w=%u, h=%u",
+			buffers, depth, stencil, x, y, w, h);
+}
+
+void
+fd_clear_init(struct pipe_context *pctx)
+{
+	pctx->clear = fd_clear;
+	pctx->clear_render_target = fd_clear_render_target;
+	pctx->clear_depth_stencil = fd_clear_depth_stencil;
+}
diff --git a/src/gallium/drivers/freedreno/freedreno_clear.h b/src/gallium/drivers/freedreno/freedreno_clear.h
new file mode 100644
index 0000000..31bb037
--- /dev/null
+++ b/src/gallium/drivers/freedreno/freedreno_clear.h
@@ -0,0 +1,37 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark at freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark at freedesktop.org>
+ */
+
+#ifndef FREEDRENO_CLEAR_H_
+#define FREEDRENO_CLEAR_H_
+
+#include "pipe/p_context.h"
+
+void fd_clear_init(struct pipe_context *pctx);
+
+
+#endif /* FREEDRENO_CLEAR_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_compiler.c b/src/gallium/drivers/freedreno/freedreno_compiler.c
new file mode 100644
index 0000000..606e902
--- /dev/null
+++ b/src/gallium/drivers/freedreno/freedreno_compiler.c
@@ -0,0 +1,1096 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark at freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark at freedesktop.org>
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_ureg.h"
+#include "tgsi/tgsi_info.h"
+#include "tgsi/tgsi_strings.h"
+#include "tgsi/tgsi_dump.h"
+
+#include "freedreno_program.h"
+#include "freedreno_compiler.h"
+#include "freedreno_util.h"
+
+#include "instr.h"
+#include "ir.h"
+
+struct fd_compile_context {
+	struct fd_program_stateobj *prog;
+	struct fd_shader_stateobj *so;
+
+	struct tgsi_parse_context parser;
+	unsigned type;
+
+	/* predicate stack: */
+	int pred_depth;
+	enum ir_pred pred_stack[8];
+
+	uint8_t num_regs[TGSI_FILE_COUNT];
+
+	/* maps input register idx to prog->export_linkage idx: */
+	uint8_t input_export_idx[64];
+
+	/* maps output register idx to prog->export_linkage idx: */
+	uint8_t output_export_idx[64];
+
+	/* idx/slot for last compiler generated immediate */
+	unsigned immediate_idx;
+
+	// TODO we can skip emit exports in the VS that the FS doesn't need..
+	// and get rid perhaps of num_param..
+	unsigned num_position, num_param;
+	unsigned position, psize;
+
+	uint64_t need_sync;
+
+	/* current exec CF instruction */
+	struct ir_cf *cf;
+};
+
+static int
+semantic_idx(struct tgsi_declaration_semantic *semantic)
+{
+	int idx = semantic->Name;
+	if (idx == TGSI_SEMANTIC_GENERIC)
+		idx = TGSI_SEMANTIC_COUNT + semantic->Index;
+	return idx;
+}
+
+/* assign/get the input/export register # for given semantic idx as
+ * returned by semantic_idx():
+ */
+static int
+export_linkage(struct fd_compile_context *ctx, int idx)
+{
+	struct fd_program_stateobj *prog = ctx->prog;
+
+	/* if first time we've seen this export, assign the next available slot: */
+	if (prog->export_linkage[idx] == 0xff)
+		prog->export_linkage[idx] = prog->num_exports++;
+
+	return prog->export_linkage[idx];
+}
+
+static unsigned
+compile_init(struct fd_compile_context *ctx, struct fd_program_stateobj *prog,
+		struct fd_shader_stateobj *so)
+{
+	unsigned ret;
+
+	ctx->prog = prog;
+	ctx->so = so;
+	ctx->cf = NULL;
+	ctx->pred_depth = 0;
+
+	ret = tgsi_parse_init(&ctx->parser, so->tokens);
+	if (ret != TGSI_PARSE_OK)
+		return ret;
+
+	ctx->type = ctx->parser.FullHeader.Processor.Processor;
+	ctx->position = ~0;
+	ctx->psize = ~0;
+	ctx->num_position = 0;
+	ctx->num_param = 0;
+	ctx->need_sync = 0;
+	ctx->immediate_idx = 0;
+
+	memset(ctx->num_regs, 0, sizeof(ctx->num_regs));
+	memset(ctx->input_export_idx, 0, sizeof(ctx->input_export_idx));
+	memset(ctx->output_export_idx, 0, sizeof(ctx->output_export_idx));
+
+	/* do first pass to extract declarations: */
+	while (!tgsi_parse_end_of_tokens(&ctx->parser)) {
+		tgsi_parse_token(&ctx->parser);
+
+		switch (ctx->parser.FullToken.Token.Type) {
+		case TGSI_TOKEN_TYPE_DECLARATION: {
+			struct tgsi_full_declaration *decl =
+					&ctx->parser.FullToken.FullDeclaration;
+			if (decl->Declaration.File == TGSI_FILE_OUTPUT) {
+				unsigned name = decl->Semantic.Name;
+
+				assert(decl->Declaration.Semantic);  // TODO is this ever not true?
+
+				ctx->output_export_idx[decl->Range.First] =
+						semantic_idx(&decl->Semantic);
+
+				if (ctx->type == TGSI_PROCESSOR_VERTEX) {
+					switch (name) {
+					case TGSI_SEMANTIC_POSITION:
+						ctx->position = ctx->num_regs[TGSI_FILE_OUTPUT];
+						ctx->num_position++;
+						break;
+					case TGSI_SEMANTIC_PSIZE:
+						ctx->psize = ctx->num_regs[TGSI_FILE_OUTPUT];
+						ctx->num_position++;
+					case TGSI_SEMANTIC_COLOR:
+					case TGSI_SEMANTIC_GENERIC:
+						ctx->num_param++;
+						break;
+					default:
+						DBG("unknown VS semantic name: %s",
+								tgsi_semantic_names[name]);
+						assert(0);
+					}
+				} else {
+					switch (name) {
+					case TGSI_SEMANTIC_COLOR:
+					case TGSI_SEMANTIC_GENERIC:
+						ctx->num_param++;
+						break;
+					default:
+						DBG("unknown PS semantic name: %s",
+								tgsi_semantic_names[name]);
+						assert(0);
+					}
+				}
+			} else if (decl->Declaration.File == TGSI_FILE_INPUT) {
+				ctx->input_export_idx[decl->Range.First] =
+						semantic_idx(&decl->Semantic);
+			}
+			ctx->num_regs[decl->Declaration.File] +=
+					1 + decl->Range.Last - decl->Range.First;
+			break;
+		}
+		case TGSI_TOKEN_TYPE_IMMEDIATE: {
+			struct tgsi_full_immediate *imm =
+					&ctx->parser.FullToken.FullImmediate;
+			unsigned n = ctx->so->num_immediates++;
+			memcpy(ctx->so->immediates[n].val, imm->u, 16);
+			break;
+		}
+		default:
+			break;
+		}
+	}
+
+	/* TGSI generated immediates are always entire vec4's, ones we
+	 * generate internally are not:
+	 */
+	ctx->immediate_idx = ctx->so->num_immediates * 4;
+
+	ctx->so->first_immediate = ctx->num_regs[TGSI_FILE_CONSTANT];
+
+	tgsi_parse_free(&ctx->parser);
+
+	return tgsi_parse_init(&ctx->parser, so->tokens);
+}
+
+static void
+compile_free(struct fd_compile_context *ctx)
+{
+	tgsi_parse_free(&ctx->parser);
+}
+
+static struct ir_cf *
+next_exec_cf(struct fd_compile_context *ctx)
+{
+	struct ir_cf *cf = ctx->cf;
+	if (!cf || cf->exec.instrs_count >= ARRAY_SIZE(ctx->cf->exec.instrs))
+		ctx->cf = cf = ir_cf_create(ctx->so->ir, EXEC);
+	return cf;
+}
+
+static void
+compile_vtx_fetch(struct fd_compile_context *ctx)
+{
+	struct ir_instruction **vfetch_instrs = ctx->so->vfetch_instrs;
+	int i;
+	for (i = 0; i < ctx->num_regs[TGSI_FILE_INPUT]; i++) {
+		struct ir_instruction *instr = ir_instr_create(
+				next_exec_cf(ctx), IR_FETCH);
+		instr->fetch.opc = VTX_FETCH;
+
+		ctx->need_sync |= 1 << (i+1);
+
+		ir_reg_create(instr, i+1, "xyzw", 0);
+		ir_reg_create(instr, 0, "x", 0);
+
+		if (i == 0)
+			instr->sync = true;
+
+		vfetch_instrs[i] = instr;
+	}
+	ctx->so->num_vfetch_instrs = i;
+	ctx->cf = NULL;
+}
+
+/*
+ * For vertex shaders (VS):
+ * --- ------ -------------
+ *
+ *   Inputs:     R1-R(num_input)
+ *   Constants:  C0-C(num_const-1)
+ *   Immediates: C(num_const)-C(num_const+num_imm-1)
+ *   Outputs:    export0-export(n) and export62, export63
+ *      n is # of outputs minus gl_Position (export62) and gl_PointSize (export63)
+ *   Temps:      R(num_input+1)-R(num_input+num_temps)
+ *
+ * R0 could be clobbered after the vertex fetch instructions.. so we
+ * could use it for one of the temporaries.
+ *
+ * TODO: maybe the vertex fetch part could fetch first input into R0 as
+ * the last vtx fetch instruction, which would let us use the same
+ * register layout in either case.. although this is not what the blob
+ * compiler does.
+ *
+ *
+ * For frag shaders (PS):
+ * --- ---- -------------
+ *
+ *   Inputs:     R0-R(num_input-1)
+ *   Constants:  same as VS
+ *   Immediates: same as VS
+ *   Outputs:    export0-export(num_outputs)
+ *   Temps:      R(num_input)-R(num_input+num_temps-1)
+ *
+ * In either case, immediates are are postpended to the constants
+ * (uniforms).
+ *
+ */
+
+static unsigned
+get_temp_gpr(struct fd_compile_context *ctx, int idx)
+{
+	unsigned num = idx + ctx->num_regs[TGSI_FILE_INPUT];
+	if (ctx->type == TGSI_PROCESSOR_VERTEX)
+		num++;
+	return num;
+}
+
+static struct ir_register *
+add_dst_reg(struct fd_compile_context *ctx, struct ir_instruction *alu,
+		const struct tgsi_dst_register *dst)
+{
+	unsigned flags = 0, num = 0;
+	char swiz[5];
+
+	switch (dst->File) {
+	case TGSI_FILE_OUTPUT:
+		flags |= IR_REG_EXPORT;
+		if (ctx->type == TGSI_PROCESSOR_VERTEX) {
+			if (dst->Index == ctx->position) {
+				num = 62;
+			} else if (dst->Index == ctx->psize) {
+				num = 63;
+			} else {
+				num = export_linkage(ctx,
+						ctx->output_export_idx[dst->Index]);
+			}
+		} else {
+			num = dst->Index;
+		}
+		break;
+	case TGSI_FILE_TEMPORARY:
+		num = get_temp_gpr(ctx, dst->Index);
+		break;
+	default:
+		DBG("unsupported dst register file: %s",
+				tgsi_file_names[dst->File]);
+		assert(0);
+		break;
+	}
+
+	swiz[0] = (dst->WriteMask & TGSI_WRITEMASK_X) ? 'x' : '_';
+	swiz[1] = (dst->WriteMask & TGSI_WRITEMASK_Y) ? 'y' : '_';
+	swiz[2] = (dst->WriteMask & TGSI_WRITEMASK_Z) ? 'z' : '_';
+	swiz[3] = (dst->WriteMask & TGSI_WRITEMASK_W) ? 'w' : '_';
+	swiz[4] = '\0';
+
+	return ir_reg_create(alu, num, swiz, flags);
+}
+
+static struct ir_register *
+add_src_reg(struct fd_compile_context *ctx, struct ir_instruction *alu,
+		const struct tgsi_src_register *src)
+{
+	static const char swiz_vals[] = {
+			'x', 'y', 'z', 'w',
+	};
+	char swiz[5];
+	unsigned flags = 0, num = 0;
+
+	switch (src->File) {
+	case TGSI_FILE_CONSTANT:
+		num = src->Index;
+		flags |= IR_REG_CONST;
+		break;
+	case TGSI_FILE_INPUT:
+		if (ctx->type == TGSI_PROCESSOR_VERTEX) {
+			num = src->Index + 1;
+		} else {
+			num = export_linkage(ctx,
+					ctx->input_export_idx[src->Index]);
+		}
+		break;
+	case TGSI_FILE_TEMPORARY:
+		num = get_temp_gpr(ctx, src->Index);
+		break;
+	case TGSI_FILE_IMMEDIATE:
+		num = src->Index + ctx->num_regs[TGSI_FILE_CONSTANT];
+		flags |= IR_REG_CONST;
+		break;
+	default:
+		DBG("unsupported src register file: %s",
+				tgsi_file_names[src->File]);
+		assert(0);
+		break;
+	}
+
+	if (src->Absolute)
+		flags |= IR_REG_ABS;
+	if (src->Negate)
+		flags |= IR_REG_NEGATE;
+
+	swiz[0] = swiz_vals[src->SwizzleX];
+	swiz[1] = swiz_vals[src->SwizzleY];
+	swiz[2] = swiz_vals[src->SwizzleZ];
+	swiz[3] = swiz_vals[src->SwizzleW];
+	swiz[4] = '\0';
+
+	if ((ctx->need_sync & (uint64_t)(1 << num)) &&
+			!(flags & IR_REG_CONST)) {
+		alu->sync = true;
+		ctx->need_sync &= ~(uint64_t)(1 << num);
+	}
+
+	return ir_reg_create(alu, num, swiz, flags);
+}
+
+static void
+add_vector_clamp(struct tgsi_full_instruction *inst, struct ir_instruction *alu)
+{
+	switch (inst->Instruction.Saturate) {
+	case TGSI_SAT_NONE:
+		break;
+	case TGSI_SAT_ZERO_ONE:
+		alu->alu.vector_clamp = true;
+		break;
+	case TGSI_SAT_MINUS_PLUS_ONE:
+		DBG("unsupported saturate");
+		assert(0);
+		break;
+	}
+}
+
+static void
+add_scalar_clamp(struct tgsi_full_instruction *inst, struct ir_instruction *alu)
+{
+	switch (inst->Instruction.Saturate) {
+	case TGSI_SAT_NONE:
+		break;
+	case TGSI_SAT_ZERO_ONE:
+		alu->alu.scalar_clamp = true;
+		break;
+	case TGSI_SAT_MINUS_PLUS_ONE:
+		DBG("unsupported saturate");
+		assert(0);
+		break;
+	}
+}
+
+static void
+add_regs_vector_1(struct fd_compile_context *ctx,
+		struct tgsi_full_instruction *inst, struct ir_instruction *alu)
+{
+	assert(inst->Instruction.NumSrcRegs == 1);
+	assert(inst->Instruction.NumDstRegs == 1);
+
+	add_dst_reg(ctx, alu, &inst->Dst[0].Register);
+	add_src_reg(ctx, alu, &inst->Src[0].Register);
+	add_src_reg(ctx, alu, &inst->Src[0].Register);
+	add_vector_clamp(inst, alu);
+}
+
+static void
+add_regs_vector_2(struct fd_compile_context *ctx,
+		struct tgsi_full_instruction *inst, struct ir_instruction *alu)
+{
+	assert(inst->Instruction.NumSrcRegs == 2);
+	assert(inst->Instruction.NumDstRegs == 1);
+
+	add_dst_reg(ctx, alu, &inst->Dst[0].Register);
+	add_src_reg(ctx, alu, &inst->Src[0].Register);
+	add_src_reg(ctx, alu, &inst->Src[1].Register);
+	add_vector_clamp(inst, alu);
+}
+
+static void
+add_regs_vector_3(struct fd_compile_context *ctx,
+		struct tgsi_full_instruction *inst, struct ir_instruction *alu)
+{
+	assert(inst->Instruction.NumSrcRegs == 3);
+	assert(inst->Instruction.NumDstRegs == 1);
+
+	add_dst_reg(ctx, alu, &inst->Dst[0].Register);
+	/* maybe should re-arrange the syntax some day, but
+	 * in assembler/disassembler and what ir.c expects
+	 * is: MULADDv Rdst = Rsrc2 + Rsrc0 * Rscr1
+	 */
+	add_src_reg(ctx, alu, &inst->Src[2].Register);
+	add_src_reg(ctx, alu, &inst->Src[0].Register);
+	add_src_reg(ctx, alu, &inst->Src[1].Register);
+	add_vector_clamp(inst, alu);
+}
+
+static void
+add_regs_dummy_vector(struct ir_instruction *alu)
+{
+	/* create dummy, non-written vector dst/src regs
+	 * for unused vector instr slot:
+	 */
+	ir_reg_create(alu, 0, "____", 0); /* vector dst */
+	ir_reg_create(alu, 0, NULL, 0);   /* vector src1 */
+	ir_reg_create(alu, 0, NULL, 0);   /* vector src2 */
+}
+
+static void
+add_regs_scalar_1(struct fd_compile_context *ctx,
+		struct tgsi_full_instruction *inst, struct ir_instruction *alu)
+{
+	assert(inst->Instruction.NumSrcRegs == 1);
+	assert(inst->Instruction.NumDstRegs == 1);
+
+	add_regs_dummy_vector(alu);
+
+	add_dst_reg(ctx, alu, &inst->Dst[0].Register);
+	add_src_reg(ctx, alu, &inst->Src[0].Register);
+	add_scalar_clamp(inst, alu);
+}
+
+/*
+ * Helpers for TGSI instructions that don't map to a single shader instr:
+ */
+
+/* Get temp src/dst to use for a sequence of instructions generated by a
+ * single TGSI op.. if possible, use the final dst register as the temporary
+ * to avoid allocating a new register, but if necessary allocate one.
+ */
+static bool
+get_temporary(struct fd_compile_context *ctx,
+		struct tgsi_dst_register *orig_dst,
+		struct tgsi_dst_register *tmp_dst,
+		struct tgsi_src_register *tmp_src)
+{
+	bool using_temp = false;
+
+	*tmp_dst = *orig_dst;
+
+	/* if orig dst is output, use temporary: */
+	if (orig_dst->File == TGSI_FILE_OUTPUT) {
+		using_temp = true;
+		tmp_dst->Index = get_temp_gpr(ctx,
+				ctx->num_regs[TGSI_FILE_TEMPORARY]);
+		tmp_dst->File  = TGSI_FILE_TEMPORARY;
+	}
+
+	tmp_src->File      = tmp_dst->File;
+	tmp_src->Indirect  = tmp_dst->Indirect;
+	tmp_src->Dimension = tmp_dst->Dimension;
+	tmp_src->Index     = tmp_dst->Index;
+	tmp_src->Absolute  = 0;
+	tmp_src->Negate    = 0;
+	tmp_src->SwizzleX  = TGSI_SWIZZLE_X;
+	tmp_src->SwizzleY  = TGSI_SWIZZLE_Y;
+	tmp_src->SwizzleZ  = TGSI_SWIZZLE_Z;
+	tmp_src->SwizzleW  = TGSI_SWIZZLE_W;
+
+	return using_temp;
+}
+
+static void
+get_predicate(struct fd_compile_context *ctx, struct tgsi_dst_register *dst,
+		struct tgsi_src_register *src)
+{
+	dst->File      = TGSI_FILE_TEMPORARY;
+	dst->WriteMask = TGSI_WRITEMASK_W;
+	dst->Indirect  = 0;
+	dst->Dimension = 0;
+	dst->Index     = get_temp_gpr(ctx,
+			ctx->num_regs[TGSI_FILE_TEMPORARY] + 1);
+
+	if (src) {
+		src->File      = dst->File;
+		src->Indirect  = dst->Indirect;
+		src->Dimension = dst->Dimension;
+		src->Index     = dst->Index;
+		src->Absolute  = 0;
+		src->Negate    = 0;
+		src->SwizzleX  = TGSI_SWIZZLE_W;
+		src->SwizzleY  = TGSI_SWIZZLE_W;
+		src->SwizzleZ  = TGSI_SWIZZLE_W;
+		src->SwizzleW  = TGSI_SWIZZLE_W;
+	}
+}
+
+static void
+push_predicate(struct fd_compile_context *ctx, struct tgsi_src_register *src)
+{
+	struct ir_instruction *alu;
+	struct tgsi_dst_register pred_dst;
+
+	/* NOTE blob compiler seems to always puts PRED_* instrs in a CF by
+	 * themselves:
+	 */
+	ctx->cf = NULL;
+
+	if (ctx->pred_depth == 0) {
+		get_predicate(ctx, &pred_dst, NULL);
+
+		alu = ir_instr_create_alu(next_exec_cf(ctx), ~0, PRED_SETNEs);
+		add_regs_dummy_vector(alu);
+		add_dst_reg(ctx, alu, &pred_dst);
+		add_src_reg(ctx, alu, src);
+	} else {
+		struct tgsi_src_register pred_src;
+
+		get_predicate(ctx, &pred_dst, &pred_src);
+
+		alu = ir_instr_create_alu(next_exec_cf(ctx), MULv, ~0);
+		add_dst_reg(ctx, alu, &pred_dst);
+		add_src_reg(ctx, alu, &pred_src);
+		add_src_reg(ctx, alu, src);
+
+		// XXX need to make PRED_SETE_PUSHv IR_PRED_NONE.. but need to make
+		// sure src reg is valid if it was calculated with a predicate
+		// condition..
+		alu->pred = IR_PRED_NONE;
+	}
+
+	/* save previous pred state to restor in pop_predicate(): */
+	ctx->pred_stack[ctx->pred_depth++] = ctx->so->ir->pred;
+
+	ctx->cf = NULL;
+}
+
+static void
+pop_predicate(struct fd_compile_context *ctx)
+{
+	/* NOTE blob compiler seems to always puts PRED_* instrs in a CF by
+	 * themselves:
+	 */
+	ctx->cf = NULL;
+
+	/* restore previous predicate state: */
+	ctx->so->ir->pred = ctx->pred_stack[--ctx->pred_depth];
+
+	if (ctx->pred_depth != 0) {
+		struct ir_instruction *alu;
+		struct tgsi_dst_register pred_dst;
+		struct tgsi_src_register pred_src;
+
+		get_predicate(ctx, &pred_dst, &pred_src);
+
+		alu = ir_instr_create_alu(next_exec_cf(ctx), ~0, PRED_SET_POPs);
+		add_regs_dummy_vector(alu);
+		add_dst_reg(ctx, alu, &pred_dst);
+		add_src_reg(ctx, alu, &pred_src);
+		alu->pred = IR_PRED_NONE;
+	}
+
+	ctx->cf = NULL;
+}
+
+static void
+get_immediate(struct fd_compile_context *ctx,
+		struct tgsi_src_register *reg, uint32_t val)
+{
+	unsigned neg, swiz, idx, i;
+	/* actually maps 1:1 currently.. not sure if that is safe to rely on: */
+	static const unsigned swiz2tgsi[] = {
+			TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W,
+	};
+
+	for (i = 0; i < ctx->immediate_idx; i++) {
+		swiz = i % 4;
+		idx  = i / 4;
+
+		if (ctx->so->immediates[idx].val[swiz] == val) {
+			neg = 0;
+			break;
+		}
+
+		if (ctx->so->immediates[idx].val[swiz] == -val) {
+			neg = 1;
+			break;
+		}
+	}
+
+	if (i == ctx->immediate_idx) {
+		/* need to generate a new immediate: */
+		swiz = i % 4;
+		idx  = i / 4;
+		neg  = 0;
+		ctx->so->immediates[idx].val[swiz] = val;
+		ctx->so->num_immediates = idx + 1;
+		ctx->immediate_idx++;
+	}
+
+	reg->File      = TGSI_FILE_IMMEDIATE;
+	reg->Indirect  = 0;
+	reg->Dimension = 0;
+	reg->Index     = idx;
+	reg->Absolute  = 0;
+	reg->Negate    = neg;
+	reg->SwizzleX  = swiz2tgsi[swiz];
+	reg->SwizzleY  = swiz2tgsi[swiz];
+	reg->SwizzleZ  = swiz2tgsi[swiz];
+	reg->SwizzleW  = swiz2tgsi[swiz];
+}
+
+/* POW(a,b) = EXP2(b * LOG2(a)) */
+static void
+translate_pow(struct fd_compile_context *ctx,
+		struct tgsi_full_instruction *inst)
+{
+	struct tgsi_dst_register tmp_dst;
+	struct tgsi_src_register tmp_src;
+	struct ir_instruction *alu;
+
+	get_temporary(ctx, &inst->Dst[0].Register, &tmp_dst, &tmp_src);
+
+	alu = ir_instr_create_alu(next_exec_cf(ctx), ~0, LOG_CLAMP);
+	add_regs_dummy_vector(alu);
+	add_dst_reg(ctx, alu, &tmp_dst);
+	add_src_reg(ctx, alu, &inst->Src[0].Register);
+
+	alu = ir_instr_create_alu(next_exec_cf(ctx), MULv, ~0);
+	add_dst_reg(ctx, alu, &tmp_dst);
+	add_src_reg(ctx, alu, &tmp_src);
+	add_src_reg(ctx, alu, &inst->Src[1].Register);
+
+	/* NOTE: some of the instructions, like EXP_IEEE, seem hard-
+	 * coded to take their input from the w component.
+	 */
+	switch(inst->Dst[0].Register.WriteMask) {
+	case TGSI_WRITEMASK_X:
+		tmp_src.SwizzleW = TGSI_SWIZZLE_X;
+		break;
+	case TGSI_WRITEMASK_Y:
+		tmp_src.SwizzleW = TGSI_SWIZZLE_Y;
+		break;
+	case TGSI_WRITEMASK_Z:
+		tmp_src.SwizzleW = TGSI_SWIZZLE_Z;
+		break;
+	case TGSI_WRITEMASK_W:
+		tmp_src.SwizzleW = TGSI_SWIZZLE_W;
+		break;
+	default:
+		DBG("invalid writemask!");
+		assert(0);
+		break;
+	}
+
+	alu = ir_instr_create_alu(next_exec_cf(ctx), ~0, EXP_IEEE);
+	add_regs_dummy_vector(alu);
+	add_dst_reg(ctx, alu, &inst->Dst[0].Register);
+	add_src_reg(ctx, alu, &tmp_src);
+	add_scalar_clamp(inst, alu);
+}
+
+static void
+translate_tex(struct fd_compile_context *ctx,
+		struct tgsi_full_instruction *inst, unsigned opc)
+{
+	struct ir_instruction *instr;
+	struct tgsi_dst_register tmp_dst;
+	struct tgsi_src_register tmp_src;
+	const struct tgsi_src_register *coord;
+	bool using_temp;
+	int idx;
+
+	using_temp = get_temporary(ctx, &inst->Dst[0].Register, &tmp_dst, &tmp_src);
+
+	if (opc == TGSI_OPCODE_TXP) {
+		/* TXP - Projective Texture Lookup:
+		 *
+		 *  coord.x = src0.x / src.w
+		 *  coord.y = src0.y / src.w
+		 *  coord.z = src0.z / src.w
+		 *  coord.w = src0.w
+		 *  bias = 0.0
+		 *
+		 *  dst = texture_sample(unit, coord, bias)
+		 */
+		instr = ir_instr_create_alu(next_exec_cf(ctx), MAXv, RECIP_IEEE);
+
+		/* MAXv: */
+		add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "___w";
+		add_src_reg(ctx, instr, &inst->Src[0].Register);
+		add_src_reg(ctx, instr, &inst->Src[0].Register);
+
+		/* RECIP_IEEE: */
+		add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "x___";
+		add_src_reg(ctx, instr, &inst->Src[0].Register)->swizzle = "wwww";
+
+		instr = ir_instr_create_alu(next_exec_cf(ctx), MULv, ~0);
+		add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "xyz_";
+		add_src_reg(ctx, instr, &tmp_src)->swizzle = "xxxx";
+		add_src_reg(ctx, instr, &inst->Src[0].Register);
+
+		coord = &tmp_src;
+	} else {
+		coord = &inst->Src[0].Register;
+	}
+
+	instr = ir_instr_create(next_exec_cf(ctx), IR_FETCH);
+	instr->fetch.opc = TEX_FETCH;
+	assert(inst->Texture.NumOffsets <= 1); // TODO what to do in other cases?
+
+	/* save off the tex fetch to be patched later with correct const_idx: */
+	idx = ctx->so->num_tfetch_instrs++;
+	ctx->so->tfetch_instrs[idx].samp_id = inst->Src[1].Register.Index;
+	ctx->so->tfetch_instrs[idx].instr = instr;
+
+	add_dst_reg(ctx, instr, &tmp_dst);
+	add_src_reg(ctx, instr, coord);
+
+	/* dst register needs to be marked for sync: */
+	ctx->need_sync |= 1 << instr->regs[0]->num;
+
+	/* TODO we need some way to know if the tex fetch needs to sync on alu pipe.. */
+	instr->sync = true;
+
+	if (using_temp) {
+		/* texture fetch can't write directly to export, so if tgsi
+		 * is telling us the dst register is in output file, we load
+		 * the texture to a temp and the use ALU instruction to move
+		 * to output
+		 */
+		instr = ir_instr_create_alu(next_exec_cf(ctx), MAXv, ~0);
+
+		add_dst_reg(ctx, instr, &inst->Dst[0].Register);
+		add_src_reg(ctx, instr, &tmp_src);
+		add_src_reg(ctx, instr, &tmp_src);
+		add_vector_clamp(inst, instr);
+	}
+}
+
+/* SGE(a,b) = GTE((b - a), 1.0, 0.0) */
+/* SLT(a,b) = GTE((b - a), 0.0, 1.0) */
+static void
+translate_sge_slt(struct fd_compile_context *ctx,
+		struct tgsi_full_instruction *inst, unsigned opc)
+{
+	struct ir_instruction *instr;
+	struct tgsi_dst_register tmp_dst;
+	struct tgsi_src_register tmp_src;
+	struct tgsi_src_register tmp_const;
+	float c0, c1;
+
+	switch (opc) {
+	default:
+		assert(0);
+	case TGSI_OPCODE_SGE:
+		c0 = 0.0;
+		c1 = 1.0;
+		break;
+	case TGSI_OPCODE_SLT:
+		c0 = 1.0;
+		c1 = 0.0;
+		break;
+	}
+
+	get_temporary(ctx, &inst->Dst[0].Register, &tmp_dst, &tmp_src);
+
+	instr = ir_instr_create_alu(next_exec_cf(ctx), ADDv, ~0);
+	add_dst_reg(ctx, instr, &tmp_dst);
+	add_src_reg(ctx, instr, &inst->Src[0].Register);
+	add_src_reg(ctx, instr, &inst->Src[1].Register);
+	instr->regs[2]->flags |= IR_REG_NEGATE; /* src1 */
+
+	instr = ir_instr_create_alu(next_exec_cf(ctx), CNDGTEv, ~0);
+	add_dst_reg(ctx, instr, &inst->Dst[0].Register);
+	/* maybe should re-arrange the syntax some day, but
+	 * in assembler/disassembler and what ir.c expects
+	 * is: MULADDv Rdst = Rsrc2 + Rsrc0 * Rscr1
+	 */
+	get_immediate(ctx, &tmp_const, f2d(c0));
+	add_src_reg(ctx, instr, &tmp_const);
+	add_src_reg(ctx, instr, &tmp_src);
+	get_immediate(ctx, &tmp_const, f2d(c1));
+	add_src_reg(ctx, instr, &tmp_const);
+}
+
+static void
+translate_trig(struct fd_compile_context *ctx,
+		struct tgsi_full_instruction *inst,
+		unsigned opc)
+{
+	struct ir_instruction *instr;
+	struct tgsi_dst_register tmp_dst;
+	struct tgsi_src_register tmp_src;
+	struct tgsi_src_register tmp_const;
+	instr_scalar_opc_t op;
+
+	switch (opc) {
+	default:
+		assert(0);
+	case TGSI_OPCODE_SIN:
+		op = SIN;
+		break;
+	case TGSI_OPCODE_COS:
+		op = COS;
+		break;
+	}
+
+	get_temporary(ctx, &inst->Dst[0].Register, &tmp_dst, &tmp_src);
+
+	tmp_dst.WriteMask = TGSI_WRITEMASK_X;
+	tmp_src.SwizzleX = tmp_src.SwizzleY =
+			tmp_src.SwizzleZ = tmp_src.SwizzleW = TGSI_SWIZZLE_X;
+
+	/* maybe should re-arrange the syntax some day, but
+	 * in assembler/disassembler and what ir.c expects
+	 * is: MULADDv Rdst = Rsrc2 + Rsrc0 * Rscr1
+	 */
+	instr = ir_instr_create_alu(next_exec_cf(ctx), MULADDv, ~0);
+	add_dst_reg(ctx, instr, &tmp_dst);
+	get_immediate(ctx, &tmp_const, f2d(0.5));
+	add_src_reg(ctx, instr, &tmp_const);
+	add_src_reg(ctx, instr, &inst->Src[0].Register);
+	get_immediate(ctx, &tmp_const, f2d(0.159155));
+	add_src_reg(ctx, instr, &tmp_const);
+
+	instr = ir_instr_create_alu(next_exec_cf(ctx), FRACv, ~0);
+	add_dst_reg(ctx, instr, &tmp_dst);
+	add_src_reg(ctx, instr, &tmp_src);
+	add_src_reg(ctx, instr, &tmp_src);
+
+	instr = ir_instr_create_alu(next_exec_cf(ctx), MULADDv, ~0);
+	add_dst_reg(ctx, instr, &tmp_dst);
+	get_immediate(ctx, &tmp_const, f2d(-3.141593));
+	add_src_reg(ctx, instr, &tmp_const);
+	add_src_reg(ctx, instr, &tmp_src);
+	get_immediate(ctx, &tmp_const, f2d(6.283185));
+	add_src_reg(ctx, instr, &tmp_const);
+
+	instr = ir_instr_create_alu(next_exec_cf(ctx), ~0, op);
+	add_regs_dummy_vector(instr);
+	add_dst_reg(ctx, instr, &inst->Dst[0].Register);
+	add_src_reg(ctx, instr, &tmp_src);
+}
+
+/*
+ * Main part of compiler/translator:
+ */
+
+static void
+translate_instruction(struct fd_compile_context *ctx,
+		struct tgsi_full_instruction *inst)
+{
+	unsigned opc = inst->Instruction.Opcode;
+	struct ir_instruction *instr;
+	static struct ir_cf *cf;
+
+	if (opc == TGSI_OPCODE_END)
+		return;
+
+	if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) {
+		unsigned num = inst->Dst[0].Register.Index;
+		/* seems like we need to ensure that position vs param/pixel
+		 * exports don't end up in the same EXEC clause..  easy way
+		 * to do this is force a new EXEC clause on first appearance
+		 * of an position or param/pixel export.
+		 */
+		if ((num == ctx->position) || (num == ctx->psize)) {
+			if (ctx->num_position > 0) {
+				ctx->cf = NULL;
+				ir_cf_create_alloc(ctx->so->ir, SQ_POSITION,
+						ctx->num_position - 1);
+				ctx->num_position = 0;
+			}
+		} else {
+			if (ctx->num_param > 0) {
+				ctx->cf = NULL;
+				ir_cf_create_alloc(ctx->so->ir, SQ_PARAMETER_PIXEL,
+						ctx->num_param - 1);
+				ctx->num_param = 0;
+			}
+		}
+	}
+
+	cf = next_exec_cf(ctx);
+
+	/* TODO turn this into a table: */
+	switch (opc) {
+	case TGSI_OPCODE_MOV:
+		instr = ir_instr_create_alu(cf, MAXv, ~0);
+		add_regs_vector_1(ctx, inst, instr);
+		break;
+	case TGSI_OPCODE_RCP:
+		instr = ir_instr_create_alu(cf, ~0, RECIP_IEEE);
+		add_regs_scalar_1(ctx, inst, instr);
+		break;
+	case TGSI_OPCODE_RSQ:
+		instr = ir_instr_create_alu(cf, ~0, RECIPSQ_IEEE);
+		add_regs_scalar_1(ctx, inst, instr);
+		break;
+	case TGSI_OPCODE_MUL:
+		instr = ir_instr_create_alu(cf, MULv, ~0);
+		add_regs_vector_2(ctx, inst, instr);
+		break;
+	case TGSI_OPCODE_ADD:
+		instr = ir_instr_create_alu(cf, ADDv, ~0);
+		add_regs_vector_2(ctx, inst, instr);
+		break;
+	case TGSI_OPCODE_DP3:
+		instr = ir_instr_create_alu(cf, DOT3v, ~0);
+		add_regs_vector_2(ctx, inst, instr);
+		break;
+	case TGSI_OPCODE_DP4:
+		instr = ir_instr_create_alu(cf, DOT4v, ~0);
+		add_regs_vector_2(ctx, inst, instr);
+		break;
+	case TGSI_OPCODE_MIN:
+		instr = ir_instr_create_alu(cf, MINv, ~0);
+		add_regs_vector_2(ctx, inst, instr);
+		break;
+	case TGSI_OPCODE_MAX:
+		instr = ir_instr_create_alu(cf, MAXv, ~0);
+		add_regs_vector_2(ctx, inst, instr);
+		break;
+	case TGSI_OPCODE_SLT:
+	case TGSI_OPCODE_SGE:
+		translate_sge_slt(ctx, inst, opc);
+		break;
+	case TGSI_OPCODE_MAD:
+		instr = ir_instr_create_alu(cf, MULADDv, ~0);
+		add_regs_vector_3(ctx, inst, instr);
+		break;
+	case TGSI_OPCODE_FRC:
+		instr = ir_instr_create_alu(cf, FRACv, ~0);
+		add_regs_vector_1(ctx, inst, instr);
+		break;
+	case TGSI_OPCODE_FLR:
+		instr = ir_instr_create_alu(cf, FLOORv, ~0);
+		add_regs_vector_1(ctx, inst, instr);
+		break;
+	case TGSI_OPCODE_EX2:
+		instr = ir_instr_create_alu(cf, ~0, EXP_IEEE);
+		add_regs_scalar_1(ctx, inst, instr);
+		break;
+	case TGSI_OPCODE_POW:
+		translate_pow(ctx, inst);
+		break;
+	case TGSI_OPCODE_ABS:
+		instr = ir_instr_create_alu(cf, MAXv, ~0);
+		add_regs_vector_1(ctx, inst, instr);
+		instr->regs[1]->flags |= IR_REG_NEGATE; /* src0 */
+		break;
+	case TGSI_OPCODE_COS:
+	case TGSI_OPCODE_SIN:
+		translate_trig(ctx, inst, opc);
+		break;
+	case TGSI_OPCODE_TEX:
+	case TGSI_OPCODE_TXP:
+		translate_tex(ctx, inst, opc);
+		break;
+	case TGSI_OPCODE_CMP:
+		instr = ir_instr_create_alu(cf, CNDGTEv, ~0);
+		add_regs_vector_3(ctx, inst, instr);
+		// TODO this should be src0 if regs where in sane order..
+		instr->regs[2]->flags ^= IR_REG_NEGATE; /* src1 */
+		break;
+	case TGSI_OPCODE_IF:
+		push_predicate(ctx, &inst->Src[0].Register);
+		ctx->so->ir->pred = IR_PRED_EQ;
+		break;
+	case TGSI_OPCODE_ELSE:
+		ctx->so->ir->pred = IR_PRED_NE;
+		/* not sure if this is required in all cases, but blob compiler
+		 * won't combine EQ and NE in same CF:
+		 */
+		ctx->cf = NULL;
+		break;
+	case TGSI_OPCODE_ENDIF:
+		pop_predicate(ctx);
+		break;
+	case TGSI_OPCODE_F2I:
+		instr = ir_instr_create_alu(cf, TRUNCv, ~0);
+		add_regs_vector_1(ctx, inst, instr);
+		break;
+	default:
+		DBG("unknown TGSI opc: %s", tgsi_get_opcode_name(opc));
+		tgsi_dump(ctx->so->tokens, 0);
+		assert(0);
+		break;
+	}
+}
+
+static void
+compile_instructions(struct fd_compile_context *ctx)
+{
+	while (!tgsi_parse_end_of_tokens(&ctx->parser)) {
+		tgsi_parse_token(&ctx->parser);
+
+		switch (ctx->parser.FullToken.Token.Type) {
+		case TGSI_TOKEN_TYPE_INSTRUCTION:
+			translate_instruction(ctx,
+					&ctx->parser.FullToken.FullInstruction);
+			break;
+		default:
+			break;
+		}
+	}
+
+	ctx->cf->cf_type = EXEC_END;
+}
+
+int
+fd_compile_shader(struct fd_program_stateobj *prog,
+		struct fd_shader_stateobj *so)
+{
+	struct fd_compile_context ctx;
+
+	ir_shader_destroy(so->ir);
+	so->ir = ir_shader_create();
+	so->num_vfetch_instrs = so->num_tfetch_instrs = so->num_immediates = 0;
+
+	if (compile_init(&ctx, prog, so) != TGSI_PARSE_OK)
+		return -1;
+
+	if (ctx.type == TGSI_PROCESSOR_VERTEX) {
+		compile_vtx_fetch(&ctx);
+	} else if (ctx.type == TGSI_PROCESSOR_FRAGMENT) {
+		prog->num_exports = 0;
+		memset(prog->export_linkage, 0xff,
+				sizeof(prog->export_linkage));
+	}
+
+	compile_instructions(&ctx);
+
+	compile_free(&ctx);
+
+	return 0;
+}
+
diff --git a/src/gallium/drivers/freedreno/freedreno_compiler.h b/src/gallium/drivers/freedreno/freedreno_compiler.h
new file mode 100644
index 0000000..ce09788
--- /dev/null
+++ b/src/gallium/drivers/freedreno/freedreno_compiler.h
@@ -0,0 +1,38 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark at freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark at freedesktop.org>
+ */
+
+#ifndef FREEDRENO_COMPILER_H_
+#define FREEDRENO_COMPILER_H_
+
+#include "freedreno_program.h"
+#include "freedreno_util.h"
+
+int fd_compile_shader(struct fd_program_stateobj *prog,
+		struct fd_shader_stateobj *so);
+
+#endif /* FREEDRENO_COMPILER_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_context.c b/src/gallium/drivers/freedreno/freedreno_context.c
new file mode 100644
index 0000000..df42a1f
--- /dev/null
+++ b/src/gallium/drivers/freedreno/freedreno_context.c
@@ -0,0 +1,200 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark at freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark at freedesktop.org>
+ */
+
+#include "freedreno_context.h"
+#include "freedreno_vbo.h"
+#include "freedreno_blend.h"
+#include "freedreno_rasterizer.h"
+#include "freedreno_zsa.h"
+#include "freedreno_state.h"
+#include "freedreno_resource.h"
+#include "freedreno_clear.h"
+#include "freedreno_program.h"
+#include "freedreno_texture.h"
+#include "freedreno_gmem.h"
+#include "freedreno_util.h"
+
+/* there are two cases where we currently need to wait for render complete:
+ * 1) pctx->flush() .. since at the moment we have no way for DDX to sync
+ *    the presentation blit with the 3d core
+ * 2) wrap-around for ringbuffer.. possibly we can do something more
+ *    Intelligent here.  Right now we need to ensure there is enough room
+ *    at the end of the drawcmds in the cmdstream buffer for all the per-
+ *    tile cmds.  We do this the lamest way possible, by making the ringbuffer
+ *    big, and flushing and resetting back to the beginning if we get too
+ *    close to the end.
+ */
+static void
+fd_context_wait(struct pipe_context *pctx)
+{
+	struct fd_context *ctx = fd_context(pctx);
+	uint32_t ts = fd_ringbuffer_timestamp(ctx->ring);
+
+	DBG("wait: %u", ts);
+
+	fd_pipe_wait(ctx->screen->pipe, ts);
+	fd_ringbuffer_reset(ctx->ring);
+	fd_ringmarker_mark(ctx->draw_start);
+}
+
+/* emit accumulated render cmds, needed for example if render target has
+ * changed, or for flush()
+ */
+void
+fd_context_render(struct pipe_context *pctx)
+{
+	struct fd_context *ctx = fd_context(pctx);
+
+	DBG("needs_flush: %d", ctx->needs_flush);
+
+	if (!ctx->needs_flush)
+		return;
+
+	fd_gmem_render_tiles(pctx);
+
+	DBG("%p/%p/%p", ctx->ring->start, ctx->ring->cur, ctx->ring->end);
+
+	/* if size in dwords is more than half the buffer size, then wait and
+	 * wrap around:
+	 */
+	if ((ctx->ring->cur - ctx->ring->start) > ctx->ring->size/8)
+		fd_context_wait(pctx);
+
+	ctx->needs_flush = false;
+	ctx->cleared = ctx->restore = ctx->resolve = 0;
+}
+
+static void
+fd_context_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,
+		enum pipe_flush_flags flags)
+{
+	DBG("fence=%p", fence);
+
+#if 0
+	if (fence) {
+		fd_fence_ref(ctx->screen->fence.current,
+				(struct fd_fence **)fence);
+	}
+#endif
+
+	fd_context_render(pctx);
+	fd_context_wait(pctx);
+}
+
+static void
+fd_context_destroy(struct pipe_context *pctx)
+{
+	struct fd_context *ctx = fd_context(pctx);
+
+	DBG("");
+
+	if (ctx->blitter)
+		util_blitter_destroy(ctx->blitter);
+
+	fd_ringmarker_del(ctx->draw_start);
+	fd_ringmarker_del(ctx->draw_end);
+	fd_ringbuffer_del(ctx->ring);
+
+	fd_prog_fini(pctx);
+
+	FREE(ctx);
+}
+
+static struct pipe_resource *
+create_solid_vertexbuf(struct pipe_context *pctx)
+{
+	static const float init_shader_const[] = {
+			/* for clear/gmem2mem: */
+			-1.000000, +1.000000, +1.000000, +1.100000,
+			+1.000000, +1.000000, -1.000000, -1.100000,
+			+1.000000, +1.100000, -1.100000, +1.000000,
+			/* for mem2gmem: (vertices) */
+			-1.000000, +1.000000, +1.000000, +1.000000,
+			+1.000000, +1.000000, -1.000000, -1.000000,
+			+1.000000, +1.000000, -1.000000, +1.000000,
+			/* for mem2gmem: (tex coords) */
+			+0.000000, +0.000000, +1.000000, +0.000000,
+			+0.000000, +1.000000, +1.000000, +1.000000,
+	};
+	struct pipe_resource *prsc = pipe_buffer_create(pctx->screen,
+			PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, sizeof(init_shader_const));
+	pipe_buffer_write(pctx, prsc, 0,
+			sizeof(init_shader_const), init_shader_const);
+	return prsc;
+}
+
+struct pipe_context *
+fd_context_create(struct pipe_screen *pscreen, void *priv)
+{
+	struct fd_screen *screen = fd_screen(pscreen);
+	struct fd_context *ctx = CALLOC_STRUCT(fd_context);
+	struct pipe_context *pctx;
+
+	if (!ctx)
+		return NULL;
+
+	DBG("");
+
+	ctx->screen = screen;
+
+	ctx->ring = fd_ringbuffer_new(screen->pipe, 0x100000);
+	ctx->draw_start = fd_ringmarker_new(ctx->ring);
+	ctx->draw_end = fd_ringmarker_new(ctx->ring);
+
+	pctx = &ctx->base;
+	pctx->screen = pscreen;
+	pctx->priv = priv;
+	pctx->flush = fd_context_flush;
+	pctx->destroy = fd_context_destroy;
+
+	util_slab_create(&ctx->transfer_pool, sizeof(struct pipe_transfer),
+			16, UTIL_SLAB_SINGLETHREADED);
+
+	fd_vbo_init(pctx);
+	fd_blend_init(pctx);
+	fd_rasterizer_init(pctx);
+	fd_zsa_init(pctx);
+	fd_state_init(pctx);
+	fd_resource_context_init(pctx);
+	fd_clear_init(pctx);
+	fd_prog_init(pctx);
+	fd_texture_init(pctx);
+
+	ctx->blitter = util_blitter_create(pctx);
+	if (!ctx->blitter) {
+		fd_context_destroy(pctx);
+		return NULL;
+	}
+
+	/* construct vertex state used for solid ops (clear, and gmem<->mem) */
+	ctx->solid_vertexbuf = create_solid_vertexbuf(pctx);
+
+	fd_state_emit_setup(pctx);
+
+	return pctx;
+}
diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h
new file mode 100644
index 0000000..6fff8f6
--- /dev/null
+++ b/src/gallium/drivers/freedreno/freedreno_context.h
@@ -0,0 +1,184 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark at freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark at freedesktop.org>
+ */
+
+#ifndef FREEDRENO_CONTEXT_H_
+#define FREEDRENO_CONTEXT_H_
+
+#include "draw/draw_context.h"
+#include "pipe/p_context.h"
+#include "util/u_blitter.h"
+#include "util/u_slab.h"
+#include "util/u_string.h"
+
+#include "freedreno_screen.h"
+
+struct fd_blend_stateobj;
+struct fd_rasterizer_stateobj;
+struct fd_zsa_stateobj;
+struct fd_sampler_stateobj;
+struct fd_vertex_stateobj;
+struct fd_shader_stateobj;
+
+struct fd_texture_stateobj {
+	struct pipe_sampler_view *textures[PIPE_MAX_SAMPLERS];
+	unsigned num_textures;
+	struct fd_sampler_stateobj *samplers[PIPE_MAX_SAMPLERS];
+	unsigned num_samplers;
+	unsigned dirty_samplers;
+};
+
+struct fd_program_stateobj {
+	struct fd_shader_stateobj *vp, *fp;
+	enum {
+		FD_SHADER_DIRTY_VP = (1 << 0),
+		FD_SHADER_DIRTY_FP = (1 << 1),
+	} dirty;
+	uint8_t num_exports;
+	/* Indexed by semantic name or TGSI_SEMANTIC_COUNT + semantic index
+	 * for TGSI_SEMANTIC_GENERIC.  Special vs exports (position and point-
+	 * size) are not included in this
+	 */
+	uint8_t export_linkage[63];
+};
+
+struct fd_constbuf_stateobj {
+	struct pipe_constant_buffer cb[PIPE_MAX_CONSTANT_BUFFERS];
+	uint32_t enabled_mask;
+	uint32_t dirty_mask;
+};
+
+struct fd_vertexbuf_stateobj {
+	struct pipe_vertex_buffer vb[PIPE_MAX_ATTRIBS];
+	unsigned count;
+	uint32_t enabled_mask;
+	uint32_t dirty_mask;
+};
+
+struct fd_framebuffer_stateobj {
+	struct pipe_framebuffer_state base;
+	uint16_t bin_h, nbins_y;
+	uint16_t bin_w, nbins_x;
+	uint32_t pa_su_sc_mode_cntl;
+};
+
+struct fd_context {
+	struct pipe_context base;
+
+	struct fd_screen *screen;
+	struct blitter_context *blitter;
+
+	struct util_slab_mempool transfer_pool;
+
+	/* shaders used by clear, and gmem->mem blits: */
+	struct fd_program_stateobj solid_prog; // TODO move to screen?
+
+	/* shaders used by mem->gmem blits: */
+	struct fd_program_stateobj blit_prog; // TODO move to screen?
+
+	/* vertex buff used for clear/gmem->mem vertices, and mem->gmem
+	 * vertices and tex coords:
+	 */
+	struct pipe_resource *solid_vertexbuf;
+
+	/* do we need to mem2gmem before rendering.  We don't, if for example,
+	 * there was a glClear() that invalidated the entire previous buffer
+	 * contents.  Keep track of which buffer(s) are cleared, or needs
+	 * restore.  Masks of PIPE_CLEAR_*
+	 */
+	enum {
+		/* align bitmask values w/ PIPE_CLEAR_*.. since that is convenient.. */
+		FD_BUFFER_COLOR   = PIPE_CLEAR_COLOR,
+		FD_BUFFER_DEPTH   = PIPE_CLEAR_DEPTH,
+		FD_BUFFER_STENCIL = PIPE_CLEAR_STENCIL,
+		FD_BUFFER_ALL     = FD_BUFFER_COLOR | FD_BUFFER_DEPTH | FD_BUFFER_STENCIL,
+	} cleared, restore, resolve;
+
+	bool needs_flush;
+
+	struct fd_ringbuffer *ring;
+	struct fd_ringmarker *draw_start, *draw_end;
+
+	/* scissor can't really be changed mid-render.. we probably need
+	 * to flush out all pending draws and then start a new tile pass
+	 * w/ new stencil state..
+	 */
+	struct pipe_scissor_state scissor;
+
+	/* which state objects need to be re-emit'd: */
+	enum {
+		FD_DIRTY_BLEND       = (1 << 0),
+		FD_DIRTY_RASTERIZER  = (1 << 1),
+		FD_DIRTY_ZSA         = (1 << 2),
+		FD_DIRTY_FRAGTEX     = (1 << 3),
+		FD_DIRTY_VERTTEX     = (1 << 4),
+		FD_DIRTY_PROG        = (1 << 5),
+		FD_DIRTY_VTX         = (1 << 6),
+		FD_DIRTY_BLEND_COLOR = (1 << 7),
+		FD_DIRTY_STENCIL_REF = (1 << 8),
+		FD_DIRTY_SAMPLE_MASK = (1 << 9),
+		FD_DIRTY_FRAMEBUFFER = (1 << 10),
+		FD_DIRTY_STIPPLE     = (1 << 12),
+		FD_DIRTY_VIEWPORT    = (1 << 12),
+		FD_DIRTY_CONSTBUF    = (1 << 13),
+		FD_DIRTY_VERTEXBUF   = (1 << 14),
+		FD_DIRTY_INDEXBUF    = (1 << 15),
+		FD_DIRTY_SCISSOR     = (1 << 16),
+	} dirty;
+
+	struct fd_blend_stateobj *blend;
+	struct fd_rasterizer_stateobj *rasterizer;
+	struct fd_zsa_stateobj *zsa;
+
+	struct fd_texture_stateobj verttex, fragtex;
+
+	struct fd_program_stateobj prog;
+
+	struct fd_vertex_stateobj *vtx;
+
+	struct pipe_blend_color blend_color;
+	struct pipe_stencil_ref stencil_ref;
+	unsigned sample_mask;
+	struct fd_framebuffer_stateobj framebuffer;
+	struct pipe_poly_stipple stipple;
+	struct pipe_viewport_state viewport;
+	struct fd_constbuf_stateobj constbuf[PIPE_SHADER_TYPES];
+	struct fd_vertexbuf_stateobj vertexbuf;
+	struct pipe_index_buffer indexbuf;
+};
+
+static INLINE struct fd_context *
+fd_context(struct pipe_context *pctx)
+{
+	return (struct fd_context *)pctx;
+}
+
+struct pipe_context * fd_context_create(struct pipe_screen *pscreen, void *priv);
+
+void fd_context_render(struct pipe_context *pctx);
+
+#endif /* FREEDRENO_CONTEXT_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_fence.c b/src/gallium/drivers/freedreno/freedreno_fence.c
new file mode 100644
index 0000000..e637465
--- /dev/null
+++ b/src/gallium/drivers/freedreno/freedreno_fence.c
@@ -0,0 +1,52 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark at freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark at freedesktop.org>
+ */
+
+#include "freedreno_fence.h"
+#include "freedreno_util.h"
+
+boolean
+fd_fence_wait(struct fd_fence *fence)
+{
+	DBG("TODO: ");
+	return false;
+}
+
+boolean
+fd_fence_signalled(struct fd_fence *fence)
+{
+	DBG("TODO: ");
+	return false;
+}
+
+void
+fd_fence_del(struct fd_fence *fence)
+{
+
+}
+
+
diff --git a/src/gallium/drivers/freedreno/freedreno_fence.h b/src/gallium/drivers/freedreno/freedreno_fence.h
new file mode 100644
index 0000000..7e8bee3
--- /dev/null
+++ b/src/gallium/drivers/freedreno/freedreno_fence.h
@@ -0,0 +1,65 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark at freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark at freedesktop.org>
+ */
+
+#ifndef FREEDRENO_FENCE_H_
+#define FREEDRENO_FENCE_H_
+
+#include "util/u_inlines.h"
+#include "util/u_double_list.h"
+
+
+struct fd_fence {
+	int ref;
+};
+
+boolean fd_fence_wait(struct fd_fence *fence);
+boolean fd_fence_signalled(struct fd_fence *fence);
+void fd_fence_del(struct fd_fence *fence);
+
+static INLINE void
+fd_fence_ref(struct fd_fence *fence, struct fd_fence **ref)
+{
+	if (fence)
+		++fence->ref;
+
+	if (*ref) {
+		if (--(*ref)->ref == 0)
+			fd_fence_del(*ref);
+	}
+
+	*ref = fence;
+}
+
+static INLINE struct fd_fence *
+fd_fence(struct pipe_fence_handle *fence)
+{
+	return (struct fd_fence *)fence;
+}
+
+
+#endif /* FREEDRENO_FENCE_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.c b/src/gallium/drivers/freedreno/freedreno_gmem.c
new file mode 100644
index 0000000..43ed502
--- /dev/null
+++ b/src/gallium/drivers/freedreno/freedreno_gmem.c
@@ -0,0 +1,479 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark at freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark at freedesktop.org>
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "util/u_pack_color.h"
+
+#include "freedreno_gmem.h"
+#include "freedreno_context.h"
+#include "freedreno_state.h"
+#include "freedreno_program.h"
+#include "freedreno_resource.h"
+#include "freedreno_zsa.h"
+#include "freedreno_util.h"
+
+/*
+ * GMEM is the small (ie. 256KiB for a200, 512KiB for a220, etc) tile buffer
+ * inside the GPU.  All rendering happens to GMEM.  Larger render targets
+ * are split into tiles that are small enough for the color (and depth and/or
+ * stencil, if enabled) buffers to fit within GMEM.  Before rendering a tile,
+ * if there was not a clear invalidating the previous tile contents, we need
+ * to restore the previous tiles contents (system mem -> GMEM), and after all
+ * the draw calls, before moving to the next tile, we need to save the tile
+ * contents (GMEM -> system mem).
+ *
+ * The code in this file handles dealing with GMEM and tiling.
+ *
+ * The structure of the ringbuffer ends up being:
+ *
+ *     +--<---<-- IB ---<---+---<---+---<---<---<--+
+ *     |                    |       |              |
+ *     v                    ^       ^              ^
+ *   ------------------------------------------------------
+ *     | clear/draw cmds | Tile0 | Tile1 | .... | TileN |
+ *   ------------------------------------------------------
+ *                       ^
+ *                       |
+ *                       address submitted in issueibcmds
+ *
+ * Where the per-tile section handles scissor setup, mem2gmem restore (if
+ * needed), IB to draw cmds earlier in the ringbuffer, and then gmem2mem
+ * resolve.
+ */
+
+/* transfer from gmem to system memory (ie. normal RAM) */
+
+static void
+emit_gmem2mem_surf(struct fd_ringbuffer *ring, uint32_t swap, uint32_t base,
+		struct pipe_surface *psurf)
+{
+	struct fd_resource *rsc = fd_resource(psurf->texture);
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_RB_COLOR_INFO));
+	OUT_RING(ring, RB_COLOR_INFO_COLOR_SWAP(swap) |
+			RB_COLOR_INFO_COLOR_BASE(base / 1024) |
+			RB_COLOR_INFO_COLOR_FORMAT(fd_pipe2color(psurf->format)));
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 5);
+	OUT_RING(ring, CP_REG(REG_RB_COPY_CONTROL));
+	OUT_RING(ring, 0x00000000);             /* RB_COPY_CONTROL */
+	OUT_RELOC(ring, rsc->bo, 0, 0);         /* RB_COPY_DEST_BASE */
+	OUT_RING(ring, rsc->pitch >> 5);        /* RB_COPY_DEST_PITCH */
+	OUT_RING(ring, RB_COPY_DEST_INFO_FORMAT(fd_pipe2color(psurf->format)) |
+			RB_COPY_DEST_INFO_LINEAR |      /* RB_COPY_DEST_INFO */
+			RB_COPY_DEST_INFO_SWAP(swap) |
+			RB_COPY_DEST_INFO_WRITE_RED |
+			RB_COPY_DEST_INFO_WRITE_GREEN |
+			RB_COPY_DEST_INFO_WRITE_BLUE |
+			RB_COPY_DEST_INFO_WRITE_ALPHA);
+
+	OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
+	OUT_RING(ring, 0x0000000);
+
+	OUT_PKT3(ring, CP_DRAW_INDX, 3);
+	OUT_RING(ring, 0x00000000);
+	OUT_RING(ring, DRAW(DI_PT_RECTLIST, DI_SRC_SEL_AUTO_INDEX,
+			INDEX_SIZE_IGN, IGNORE_VISIBILITY));
+	OUT_RING(ring, 3);					/* NumIndices */
+}
+
+static void
+emit_gmem2mem(struct fd_context *ctx, struct fd_ringbuffer *ring,
+		uint32_t xoff, uint32_t yoff, uint32_t bin_w, uint32_t bin_h)
+{
+	struct fd_framebuffer_stateobj *fb = &ctx->framebuffer;
+	struct pipe_framebuffer_state *pfb = &fb->base;
+
+	fd_emit_vertex_bufs(ring, 0x9c, (struct fd_vertex_buf[]) {
+			{ .prsc = ctx->solid_vertexbuf, .size = 48 },
+		}, 1);
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_VGT_INDX_OFFSET));
+	OUT_RING(ring, 0);
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_VGT_VERTEX_REUSE_BLOCK_CNTL));
+	OUT_RING(ring, 0x0000028f);
+
+	fd_program_emit(ring, &ctx->solid_prog);
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_PA_SC_AA_MASK));
+	OUT_RING(ring, 0x0000ffff);
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_RB_DEPTHCONTROL));
+	OUT_RING(ring, RB_DEPTHCONTROL_EARLY_Z_ENABLE);
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_PA_SU_SC_MODE_CNTL));
+	OUT_RING(ring, PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST |  /* PA_SU_SC_MODE_CNTL */
+			PA_SU_SC_MODE_CNTL_POLYMODE_FRONT_PTYPE(DRAW_TRIANGLES) |
+			PA_SU_SC_MODE_CNTL_POLYMODE_BACK_PTYPE(DRAW_TRIANGLES));
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_PA_CL_VTE_CNTL));
+	OUT_RING(ring, PA_CL_VTE_CNTL_VTX_W0_FMT |
+			PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA |
+			PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA |
+			PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA |
+			PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA);
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_PA_CL_CLIP_CNTL));
+	OUT_RING(ring, 0x00000000);
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_RB_MODECONTROL));
+	OUT_RING(ring, RB_MODECONTROL_EDRAM_MODE(EDRAM_COPY));
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_RB_COPY_DEST_OFFSET));
+	OUT_RING(ring, RB_COPY_DEST_OFFSET_X(xoff) | RB_COPY_DEST_OFFSET_Y(yoff));
+
+	if (ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL))
+		emit_gmem2mem_surf(ring, 0, bin_w * bin_h, pfb->zsbuf);
+
+	if (ctx->resolve & FD_BUFFER_COLOR)
+		emit_gmem2mem_surf(ring, 1, 0, pfb->cbufs[0]);
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_RB_MODECONTROL));
+	OUT_RING(ring, RB_MODECONTROL_EDRAM_MODE(COLOR_DEPTH));
+}
+
+/* transfer from system memory to gmem */
+
+static void
+emit_mem2gmem_surf(struct fd_ringbuffer *ring, uint32_t swap, uint32_t base,
+		struct pipe_surface *psurf)
+{
+	struct fd_resource *rsc = fd_resource(psurf->texture);
+	uint32_t swiz;
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_RB_COLOR_INFO));
+	OUT_RING(ring, RB_COLOR_INFO_COLOR_SWAP(swap) |
+			RB_COLOR_INFO_COLOR_BASE(base / 1024) |
+			RB_COLOR_INFO_COLOR_FORMAT(fd_pipe2color(psurf->format)));
+
+	swiz = fd_tex_swiz(psurf->format, PIPE_SWIZZLE_RED, PIPE_SWIZZLE_GREEN,
+			PIPE_SWIZZLE_BLUE, PIPE_SWIZZLE_ALPHA);
+
+	/* emit fb as a texture: */
+	OUT_PKT3(ring, CP_SET_CONSTANT, 7);
+	OUT_RING(ring, 0x00010000);
+	OUT_RING(ring, SQ_TEX0_CLAMP_X(SQ_TEX_WRAP) |
+			SQ_TEX0_CLAMP_Y(SQ_TEX_WRAP) |
+			SQ_TEX0_CLAMP_Z(SQ_TEX_WRAP) |
+			SQ_TEX0_PITCH(rsc->pitch));
+	OUT_RELOC(ring, rsc->bo, 0,
+			fd_pipe2surface(psurf->format) | 0x800);
+	OUT_RING(ring, SQ_TEX2_WIDTH(psurf->width) |
+			SQ_TEX2_HEIGHT(psurf->height));
+	OUT_RING(ring, 0x01000000 | // XXX
+			swiz |
+			SQ_TEX3_XY_MAG_FILTER(SQ_TEX_FILTER_POINT) |
+			SQ_TEX3_XY_MIN_FILTER(SQ_TEX_FILTER_POINT));
+	OUT_RING(ring, 0x00000000);
+	OUT_RING(ring, 0x00000200);
+
+	OUT_PKT3(ring, CP_DRAW_INDX, 3);
+	OUT_RING(ring, 0x00000000);
+	OUT_RING(ring, DRAW(DI_PT_RECTLIST, DI_SRC_SEL_AUTO_INDEX,
+			INDEX_SIZE_IGN, IGNORE_VISIBILITY));
+	OUT_RING(ring, 3);					/* NumIndices */
+}
+
+static void
+emit_mem2gmem(struct fd_context *ctx, struct fd_ringbuffer *ring,
+		uint32_t xoff, uint32_t yoff, uint32_t bin_w, uint32_t bin_h)
+{
+	struct fd_framebuffer_stateobj *fb = &ctx->framebuffer;
+	struct pipe_framebuffer_state *pfb = &fb->base;
+	float x0, y0, x1, y1;
+
+	fd_emit_vertex_bufs(ring, 0x9c, (struct fd_vertex_buf[]) {
+			{ .prsc = ctx->solid_vertexbuf, .size = 48, .offset = 0x30 },
+			{ .prsc = ctx->solid_vertexbuf, .size = 32, .offset = 0x60 },
+		}, 2);
+
+	/* write texture coordinates to vertexbuf: */
+	x0 = ((float)xoff) / ((float)pfb->width);
+	x1 = ((float)xoff + bin_w) / ((float)pfb->width);
+	y0 = ((float)yoff) / ((float)pfb->height);
+	y1 = ((float)yoff + bin_h) / ((float)pfb->height);
+	OUT_PKT3(ring, CP_MEM_WRITE, 9);
+	OUT_RELOC(ring, fd_resource(ctx->solid_vertexbuf)->bo, 0x60, 0);
+	OUT_RING(ring, f2d(x0));
+	OUT_RING(ring, f2d(y0));
+	OUT_RING(ring, f2d(x1));
+	OUT_RING(ring, f2d(y0));
+	OUT_RING(ring, f2d(x0));
+	OUT_RING(ring, f2d(y1));
+	OUT_RING(ring, f2d(x1));
+	OUT_RING(ring, f2d(y1));
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_VGT_INDX_OFFSET));
+	OUT_RING(ring, 0);
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_VGT_VERTEX_REUSE_BLOCK_CNTL));
+	OUT_RING(ring, 0x0000003b);
+
+	fd_program_emit(ring, &ctx->blit_prog);
+
+	OUT_PKT0(ring, REG_TC_CNTL_STATUS, 1);
+	OUT_RING(ring, TC_CNTL_STATUS_L2_INVALIDATE);
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_RB_DEPTHCONTROL));
+	OUT_RING(ring, RB_DEPTHCONTROL_EARLY_Z_ENABLE);
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_PA_SU_SC_MODE_CNTL));
+	OUT_RING(ring, PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST |
+			PA_SU_SC_MODE_CNTL_POLYMODE_FRONT_PTYPE(DRAW_TRIANGLES) |
+			PA_SU_SC_MODE_CNTL_POLYMODE_BACK_PTYPE(DRAW_TRIANGLES));
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_PA_SC_AA_MASK));
+	OUT_RING(ring, 0x0000ffff);
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_RB_COLORCONTROL));
+	OUT_RING(ring, RB_COLORCONTROL_ALPHA_FUNC(PIPE_FUNC_ALWAYS) |
+			RB_COLORCONTROL_BLEND_DISABLE |
+			RB_COLORCONTROL_ROP_CODE(12) |
+			RB_COLORCONTROL_DITHER_MODE(DITHER_DISABLE) |
+			RB_COLORCONTROL_DITHER_TYPE(DITHER_PIXEL));
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_RB_BLEND_CONTROL));
+	OUT_RING(ring, RB_BLENDCONTROL_COLOR_SRCBLEND(RB_BLEND_ONE) |
+			RB_BLENDCONTROL_COLOR_COMB_FCN(COMB_DST_PLUS_SRC) |
+			RB_BLENDCONTROL_COLOR_DESTBLEND(RB_BLEND_ZERO) |
+			RB_BLENDCONTROL_ALPHA_SRCBLEND(RB_BLEND_ONE) |
+			RB_BLENDCONTROL_ALPHA_COMB_FCN(COMB_DST_PLUS_SRC) |
+			RB_BLENDCONTROL_ALPHA_DESTBLEND(RB_BLEND_ZERO));
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+	OUT_RING(ring, CP_REG(REG_PA_SC_WINDOW_SCISSOR_TL));
+	OUT_RING(ring, PA_SC_WINDOW_OFFSET_DISABLE |
+			xy2d(0,0));                     /* PA_SC_WINDOW_SCISSOR_TL */
+	OUT_RING(ring, xy2d(bin_w, bin_h));     /* PA_SC_WINDOW_SCISSOR_BR */
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 5);
+	OUT_RING(ring, CP_REG(REG_PA_CL_VPORT_XSCALE));
+	OUT_RING(ring, f2d((float)bin_w/2.0));  /* PA_CL_VPORT_XSCALE */
+	OUT_RING(ring, f2d((float)bin_w/2.0));  /* PA_CL_VPORT_XOFFSET */
+	OUT_RING(ring, f2d(-(float)bin_h/2.0)); /* PA_CL_VPORT_YSCALE */
+	OUT_RING(ring, f2d((float)bin_h/2.0));  /* PA_CL_VPORT_YOFFSET */
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_PA_CL_VTE_CNTL));
+	OUT_RING(ring, PA_CL_VTE_CNTL_VTX_XY_FMT |
+			PA_CL_VTE_CNTL_VTX_Z_FMT |       // XXX check this???
+			PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA |
+			PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA |
+			PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA |
+			PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA);
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_PA_CL_CLIP_CNTL));
+	OUT_RING(ring, 0x00000000);
+
+	if (ctx->restore & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL))
+		emit_mem2gmem_surf(ring, 0, bin_w * bin_h, pfb->zsbuf);
+
+	if (ctx->resolve & FD_BUFFER_COLOR)
+		emit_mem2gmem_surf(ring, 1, 0, pfb->cbufs[0]);
+
+	/* TODO blob driver seems to toss in a CACHE_FLUSH after each DRAW_INDX.. */
+}
+
+void
+fd_gmem_render_tiles(struct pipe_context *pctx)
+{
+	struct fd_context *ctx = fd_context(pctx);
+	struct fd_framebuffer_stateobj *fb = &ctx->framebuffer;
+	struct pipe_framebuffer_state *pfb = &fb->base;
+	struct fd_ringbuffer *ring;
+	uint32_t i, yoff = 0;
+	ring = ctx->ring;
+
+	DBG("rendering %dx%d tiles (%s/%s)", fb->nbins_x, fb->nbins_y,
+			util_format_name(pfb->cbufs[0]->format),
+			pfb->zsbuf ? util_format_name(pfb->zsbuf->format) : "none");
+
+	/* mark the end of the clear/draw cmds before emitting per-tile cmds: */
+	fd_ringmarker_mark(ctx->draw_end);
+
+	for (i = 0; i < fb->nbins_y; i++) {
+		uint32_t j, xoff = 0;
+		uint32_t bin_h = fb->bin_h;
+
+		/* clip bin height: */
+		bin_h = min(bin_h, pfb->height - yoff);
+
+		for (j = 0; j < fb->nbins_x; j++) {
+			uint32_t bin_w = fb->bin_w;
+
+			/* clip bin width: */
+			bin_w = min(bin_w, pfb->width - xoff);
+
+			DBG("bin_h=%d, yoff=%d, bin_w=%d, xoff=%d",
+					bin_h, yoff, bin_w, xoff);
+
+			fd_emit_framebuffer_state(ring, &ctx->framebuffer, ctx->zsa);
+
+			/* setup screen scissor for current tile (same for mem2gmem): */
+			OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+			OUT_RING(ring, CP_REG(REG_PA_SC_SCREEN_SCISSOR_TL));
+			OUT_RING(ring, xy2d(0,0));           /* PA_SC_SCREEN_SCISSOR_TL */
+			OUT_RING(ring, xy2d(bin_w, bin_h));  /* PA_SC_SCREEN_SCISSOR_BR */
+
+			if (ctx->restore)
+				emit_mem2gmem(ctx, ring, xoff, yoff, bin_w, bin_h);
+
+			/* setup window scissor and offset for current tile (different
+			 * from mem2gmem):
+			 */
+			OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+			OUT_RING(ring, CP_REG(REG_PA_SC_WINDOW_OFFSET));
+			OUT_RING(ring, PA_SC_WINDOW_OFFSET_X(-xoff) |
+					PA_SC_WINDOW_OFFSET_Y(-yoff));/* PA_SC_WINDOW_OFFSET */
+
+			/* emit IB to drawcmds: */
+			OUT_IB  (ring, ctx->draw_start, ctx->draw_end);
+
+			OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+			OUT_RING(ring, CP_REG(REG_PA_SC_WINDOW_OFFSET));
+			OUT_RING(ring, 0x00000000);          /* PA_SC_WINDOW_OFFSET */
+
+			/* emit gmem2mem to transfer tile back to system memory: */
+			emit_gmem2mem(ctx, ring, xoff, yoff, bin_w, bin_h);
+
+			xoff += bin_w;
+		}
+
+		yoff += bin_h;
+	}
+
+	/* GPU executes starting from tile cmds, which IB back to draw cmds: */
+	fd_ringmarker_flush(ctx->draw_end);
+
+	/* mark start for next draw cmds: */
+	fd_ringmarker_mark(ctx->draw_start);
+
+	/* Note that because the per-tile setup and mem2gmem/gmem2mem are emitted
+	 * after the draw/clear calls, but executed before, we need to preemptively
+	 * flag some state as dirty before the first draw/clear call.
+	 *
+	 * TODO maybe we need to mark all state as dirty to not worry about state
+	 * being clobbered by other contexts?
+	 */
+	ctx->dirty |= FD_DIRTY_ZSA |
+			FD_DIRTY_RASTERIZER |
+			FD_DIRTY_FRAMEBUFFER |
+			FD_DIRTY_SAMPLE_MASK |
+			FD_DIRTY_VIEWPORT |
+			FD_DIRTY_CONSTBUF |
+			FD_DIRTY_PROG |
+			/* probably only needed if we need to mem2gmem on the next
+			 * draw..  but not sure if there is a good way to know?
+			 */
+			FD_DIRTY_SCISSOR |
+			FD_DIRTY_VERTTEX |
+			FD_DIRTY_FRAGTEX |
+			FD_DIRTY_BLEND;
+}
+
+void
+fd_gmem_calculate_tiles(struct pipe_context *pctx)
+{
+	struct fd_context *ctx = fd_context(pctx);
+	struct fd_framebuffer_stateobj *fb = &ctx->framebuffer;
+	struct pipe_framebuffer_state *pfb = &fb->base;
+	uint32_t nbins_x = 1, nbins_y = 1;
+	uint32_t bin_w, bin_h;
+	uint32_t cpp = util_format_get_blocksize(pfb->cbufs[0]->format);
+	uint32_t gmem_size = ctx->screen->gmemsize_bytes;
+	uint32_t max_width = 992;
+
+// TODO we probably could optimize this a bit if we know that
+// Z or stencil is not enabled for any of the draw calls..
+//	if (fd_stencil_enabled(ctx->zsa) || fd_depth_enabled(ctx->zsa)) {
+		gmem_size /= 2;
+		max_width = 256;
+//	}
+
+	bin_w = ALIGN(pfb->width, 32);
+	bin_h = ALIGN(pfb->height, 32);
+
+	/* first, find a bin width that satisfies the maximum width
+	 * restrictions:
+	 */
+	while (bin_w > max_width) {
+		nbins_x++;
+		bin_w = ALIGN(pfb->width / nbins_x, 32);
+	}
+
+	/* then find a bin height that satisfies the memory constraints:
+	 */
+	while ((bin_w * bin_h * cpp) > gmem_size) {
+		nbins_y++;
+		bin_h = ALIGN(pfb->height / nbins_y, 32);
+	}
+
+	if ((nbins_x > 1) || (nbins_y > 1)) {
+		fb->pa_su_sc_mode_cntl |= PA_SU_SC_MODE_CNTL_VTX_WINDOW_OFFSET_ENABLE;
+	} else {
+		fb->pa_su_sc_mode_cntl &= ~PA_SU_SC_MODE_CNTL_VTX_WINDOW_OFFSET_ENABLE;
+	}
+
+	DBG("using %d bins of size %dx%d", nbins_x*nbins_y, bin_w, bin_h);
+
+//if we use hw binning, tile sizes (in multiple of 32) need to
+//fit in 5 bits.. for now don't care because we aren't using
+//that:
+//	assert(!(bin_h/32 & ~0x1f));
+//	assert(!(bin_w/32 & ~0x1f));
+
+	fb->nbins_x = nbins_x;
+	fb->nbins_y = nbins_y;
+	fb->bin_w = bin_w;
+	fb->bin_h = bin_h;
+
+}
diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.h b/src/gallium/drivers/freedreno/freedreno_gmem.h
new file mode 100644
index 0000000..7b46f6b
--- /dev/null
+++ b/src/gallium/drivers/freedreno/freedreno_gmem.h
@@ -0,0 +1,37 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark at freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark at freedesktop.org>
+ */
+
+#ifndef FREEDRENO_GMEM_H_
+#define FREEDRENO_GMEM_H_
+
+#include "pipe/p_context.h"
+
+void fd_gmem_render_tiles(struct pipe_context *pctx);
+void fd_gmem_calculate_tiles(struct pipe_context *pctx);
+
+#endif /* FREEDRENO_GMEM_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_pm4.h b/src/gallium/drivers/freedreno/freedreno_pm4.h
new file mode 100644
index 0000000..a536f9c
--- /dev/null
+++ b/src/gallium/drivers/freedreno/freedreno_pm4.h
@@ -0,0 +1,86 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark at freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark at freedesktop.org>
+ */
+
+#ifndef FREEDRENO_PM4_H_
+#define FREEDRENO_PM4_H_
+
+#define CP_TYPE0_PKT              (0 << 30)
+#define CP_TYPE1_PKT              (1 << 30)
+#define CP_TYPE2_PKT              (2 << 30)
+#define CP_TYPE3_PKT              (3 << 30)
+
+
+#define CP_ME_INIT                0x48
+#define CP_NOP                    0x10
+#define CP_INDIRECT_BUFFER        0x3f
+#define CP_INDIRECT_BUFFER_PFD    0x37
+#define CP_WAIT_FOR_IDLE          0x26
+#define CP_WAIT_REG_MEM           0x3c
+#define CP_WAIT_REG_EQ            0x52
+#define CP_WAT_REG_GTE            0x53
+#define CP_WAIT_UNTIL_READ        0x5c
+#define CP_WAIT_IB_PFD_COMPLETE   0x5d
+#define CP_REG_RMW                0x21
+#define CP_REG_TO_MEM             0x3e
+#define CP_MEM_WRITE              0x3d
+#define CP_MEM_WRITE_CNTR         0x4f
+#define CP_COND_EXEC              0x44
+#define CP_COND_WRITE             0x45
+#define CP_EVENT_WRITE            0x46
+#define CP_EVENT_WRITE_SHD        0x58
+#define CP_EVENT_WRITE_CFL        0x59
+#define CP_EVENT_WRITE_ZPD        0x5b
+#define CP_DRAW_INDX              0x22
+#define CP_DRAW_INDX_2            0x36
+#define CP_DRAW_INDX_BIN          0x34
+#define CP_DRAW_INDX_2_BIN        0x35
+#define CP_VIZ_QUERY              0x23
+#define CP_SET_STATE              0x25
+#define CP_SET_CONSTANT           0x2d
+#define CP_IM_LOAD                0x27
+#define CP_IM_LOAD_IMMEDIATE      0x2b
+#define CP_LOAD_CONSTANT_CONTEXT  0x2e
+#define CP_INVALIDATE_STATE       0x3b
+#define CP_SET_SHADER_BASES       0x4a
+#define CP_SET_BIN_MASK           0x50
+#define CP_SET_BIN_SELECT         0x51
+#define CP_CONTEXT_UPDATE         0x5e
+#define CP_INTERRUPT              0x40
+#define CP_IM_STORE               0x2c
+#define CP_SET_BIN_BASE_OFFSET    0x4b      /* for a20x */
+#define CP_SET_DRAW_INIT_FLAGS    0x4b      /* for a22x */
+#define CP_SET_PROTECTED_MODE     0x5f
+#define CP_LOAD_STATE             0x30
+#define CP_COND_INDIRECT_BUFFER_PFE 0x3a
+#define CP_COND_INDIRECT_BUFFER_PFD 0x32
+
+
+#define CP_REG(reg) ((0x4 << 16) | ((reg) - 0x2000))
+
+
+#endif	/* FREEDRENO_PM4_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_program.c b/src/gallium/drivers/freedreno/freedreno_program.c
new file mode 100644
index 0000000..e57aff1
--- /dev/null
+++ b/src/gallium/drivers/freedreno/freedreno_program.c
@@ -0,0 +1,503 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark at freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark at freedesktop.org>
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_parse.h"
+
+#include "freedreno_program.h"
+#include "freedreno_compiler.h"
+#include "freedreno_vbo.h"
+#include "freedreno_texture.h"
+#include "freedreno_util.h"
+
+static struct fd_shader_stateobj *
+create_shader(enum shader_t type)
+{
+	struct fd_shader_stateobj *so = CALLOC_STRUCT(fd_shader_stateobj);
+	if (!so)
+		return NULL;
+	so->type = type;
+	return so;
+}
+
+static void
+delete_shader(struct fd_shader_stateobj *so)
+{
+	ir_shader_destroy(so->ir);
+	FREE(so->tokens);
+	FREE(so);
+}
+
+static struct fd_shader_stateobj *
+assemble(struct fd_shader_stateobj *so)
+{
+	free(so->bin);
+	so->bin = ir_shader_assemble(so->ir, &so->info);
+	if (!so->bin)
+		goto fail;
+
+	if (fd_mesa_debug & FD_DBG_DISASM) {
+		DBG("disassemble: type=%d", so->type);
+		disasm(so->bin, so->info.sizedwords, 0, so->type);
+	}
+
+	return so;
+
+fail:
+	debug_error("assemble failed!");
+	delete_shader(so);
+	return NULL;
+}
+
+static struct fd_shader_stateobj *
+compile(struct fd_program_stateobj *prog, struct fd_shader_stateobj *so)
+{
+	int ret;
+
+	if (fd_mesa_debug & FD_DBG_DISASM) {
+		DBG("dump tgsi: type=%d", so->type);
+		tgsi_dump(so->tokens, 0);
+	}
+
+	ret = fd_compile_shader(prog, so);
+	if (ret)
+		goto fail;
+
+	/* NOTE: we don't assemble yet because for VS we don't know the
+	 * type information for vertex fetch yet.. so those need to be
+	 * patched up later before assembling.
+	 */
+
+	so->info.sizedwords = 0;
+
+	return so;
+
+fail:
+	debug_error("compile failed!");
+	delete_shader(so);
+	return NULL;
+}
+
+static void
+emit(struct fd_ringbuffer *ring, struct fd_shader_stateobj *so)
+{
+	unsigned i;
+
+	if (so->info.sizedwords == 0)
+		assemble(so);
+
+	OUT_PKT3(ring, CP_IM_LOAD_IMMEDIATE, 2 + so->info.sizedwords);
+	OUT_RING(ring, (so->type == SHADER_VERTEX) ? 0 : 1);
+	OUT_RING(ring, so->info.sizedwords);
+	for (i = 0; i < so->info.sizedwords; i++)
+		OUT_RING(ring, so->bin[i]);
+}
+
+static void *
+fd_fp_state_create(struct pipe_context *pctx,
+		const struct pipe_shader_state *cso)
+{
+	struct fd_shader_stateobj *so = create_shader(SHADER_FRAGMENT);
+	if (!so)
+		return NULL;
+	so->tokens = tgsi_dup_tokens(cso->tokens);
+	return so;
+}
+
+static void
+fd_fp_state_delete(struct pipe_context *pctx, void *hwcso)
+{
+	struct fd_shader_stateobj *so = hwcso;
+	delete_shader(so);
+}
+
+static void
+fd_fp_state_bind(struct pipe_context *pctx, void *hwcso)
+{
+	struct fd_context *ctx = fd_context(pctx);
+	ctx->prog.fp = hwcso;
+	ctx->prog.dirty |= FD_SHADER_DIRTY_FP;
+	ctx->dirty |= FD_DIRTY_PROG;
+}
+
+static void *
+fd_vp_state_create(struct pipe_context *pctx,
+		const struct pipe_shader_state *cso)
+{
+	struct fd_shader_stateobj *so = create_shader(SHADER_VERTEX);
+	if (!so)
+		return NULL;
+	so->tokens = tgsi_dup_tokens(cso->tokens);
+	return so;
+}
+
+static void
+fd_vp_state_delete(struct pipe_context *pctx, void *hwcso)
+{
+	struct fd_shader_stateobj *so = hwcso;
+	delete_shader(so);
+}
+
+static void
+fd_vp_state_bind(struct pipe_context *pctx, void *hwcso)
+{
+	struct fd_context *ctx = fd_context(pctx);
+	ctx->prog.vp = hwcso;
+	ctx->prog.dirty |= FD_SHADER_DIRTY_VP;
+	ctx->dirty |= FD_DIRTY_PROG;
+}
+
+static void
+patch_vtx_fetches(struct fd_context *ctx, struct fd_shader_stateobj *so,
+		struct fd_vertex_stateobj *vtx)
+{
+	unsigned i;
+
+	assert(so->num_vfetch_instrs == vtx->num_elements);
+
+	/* update vtx fetch instructions: */
+	for (i = 0; i < so->num_vfetch_instrs; i++) {
+		struct ir_instruction *instr = so->vfetch_instrs[i];
+		struct pipe_vertex_element *elem = &vtx->pipe[i];
+		struct pipe_vertex_buffer *vb =
+				&ctx->vertexbuf.vb[elem->vertex_buffer_index];
+		enum pipe_format format = elem->src_format;
+		const struct util_format_description *desc =
+				util_format_description(format);
+		unsigned j;
+
+		/* Find the first non-VOID channel. */
+		for (j = 0; j < 4; j++)
+			if (desc->channel[j].type != UTIL_FORMAT_TYPE_VOID)
+				break;
+
+		/* CI/CIS can probably be set in compiler instead: */
+		instr->fetch.const_idx = 20 + (i / 3);
+		instr->fetch.const_idx_sel = i % 3;
+
+		instr->fetch.fmt = fd_pipe2surface(format);
+		instr->fetch.is_normalized = desc->channel[j].normalized;
+		instr->fetch.is_signed =
+				desc->channel[j].type == UTIL_FORMAT_TYPE_SIGNED;
+		instr->fetch.stride = vb->stride ? : 1;
+		instr->fetch.offset = elem->src_offset;
+
+		for (j = 0; j < 4; j++)
+			instr->regs[0]->swizzle[j] = "xyzw01__"[desc->swizzle[j]];
+
+		assert(instr->fetch.fmt != FMT_INVALID);
+
+		DBG("vtx[%d]: %s (%d), ci=%d, cis=%d, id=%d, swizzle=%s",
+				i, util_format_name(format),
+				instr->fetch.fmt,
+				instr->fetch.const_idx,
+				instr->fetch.const_idx_sel,
+				elem->instance_divisor,
+				instr->regs[0]->swizzle);
+	}
+
+	/* trigger re-assemble: */
+	so->info.sizedwords = 0;
+}
+
+static void
+patch_tex_fetches(struct fd_context *ctx, struct fd_shader_stateobj *so,
+		struct fd_texture_stateobj *tex)
+{
+	unsigned i;
+
+	/* update tex fetch instructions: */
+	for (i = 0; i < so->num_tfetch_instrs; i++) {
+		struct ir_instruction *instr = so->tfetch_instrs[i].instr;
+		unsigned samp_id = so->tfetch_instrs[i].samp_id;
+		unsigned const_idx = fd_get_const_idx(ctx, tex, samp_id);
+
+		if (const_idx != instr->fetch.const_idx) {
+			instr->fetch.const_idx = const_idx;
+			/* trigger re-assemble: */
+			so->info.sizedwords = 0;
+		}
+	}
+}
+
+void
+fd_program_validate(struct fd_context *ctx)
+{
+	struct fd_program_stateobj *prog = &ctx->prog;
+
+	/* if vertex or frag shader is dirty, we may need to recompile. Compile
+	 * frag shader first, as that assigns the register slots for exports
+	 * from the vertex shader.  And therefore if frag shader has changed we
+	 * need to recompile both vert and frag shader.
+	 */
+	if (prog->dirty & FD_SHADER_DIRTY_FP)
+		compile(prog, prog->fp);
+
+	if (prog->dirty & (FD_SHADER_DIRTY_FP | FD_SHADER_DIRTY_VP))
+		compile(prog, prog->vp);
+
+	if (prog->dirty)
+		ctx->dirty |= FD_DIRTY_PROG;
+
+	prog->dirty = 0;
+
+	/* if necessary, fix up vertex fetch instructions: */
+	if (ctx->dirty & (FD_DIRTY_VTX | FD_DIRTY_VERTEXBUF | FD_DIRTY_PROG))
+		patch_vtx_fetches(ctx, prog->vp, ctx->vtx);
+
+	/* if necessary, fix up texture fetch instructions: */
+	if (ctx->dirty & (FD_DIRTY_VERTTEX | FD_DIRTY_FRAGTEX | FD_DIRTY_PROG)) {
+		patch_tex_fetches(ctx, prog->vp, &ctx->verttex);
+		patch_tex_fetches(ctx, prog->fp, &ctx->fragtex);
+	}
+}
+
+void
+fd_program_emit(struct fd_ringbuffer *ring,
+		struct fd_program_stateobj *prog)
+{
+	struct ir_shader_info *vsi = &prog->vp->info;
+	struct ir_shader_info *fsi = &prog->fp->info;
+	uint8_t vs_gprs, fs_gprs, vs_export;
+
+	emit(ring, prog->vp);
+	emit(ring, prog->fp);
+
+	vs_gprs = (vsi->max_reg < 0) ? 0x80 : vsi->max_reg;
+	fs_gprs = (fsi->max_reg < 0) ? 0x80 : fsi->max_reg;
+	vs_export = max(1, prog->num_exports) - 1;
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_SQ_PROGRAM_CNTL));
+	OUT_RING(ring, SQ_PROGRAM_CNTL_PS_EXPORT_MODE(POSITION_2_VECTORS_SPRITE) |
+			SQ_PROGRAM_CNTL_VS_RESOURCE |
+			SQ_PROGRAM_CNTL_PS_RESOURCE |
+			SQ_PROGRAM_CNTL_VS_EXPORT_COUNT(vs_export) |
+			SQ_PROGRAM_CNTL_PS_REGS(fs_gprs) |
+			SQ_PROGRAM_CNTL_VS_REGS(vs_gprs));
+}
+
+/* Creates shader:
+ *    EXEC ADDR(0x2) CNT(0x1)
+ *       (S)FETCH:	SAMPLE	R0.xyzw = R0.xyx CONST(0) LOCATION(CENTER)
+ *    ALLOC PARAM/PIXEL SIZE(0x0)
+ *    EXEC_END ADDR(0x3) CNT(0x1)
+ *          ALU:	MAXv	export0 = R0, R0	; gl_FragColor
+ *    NOP
+ */
+static struct fd_shader_stateobj *
+create_blit_fp(void)
+{
+	struct fd_shader_stateobj *so = create_shader(SHADER_FRAGMENT);
+	struct ir_cf *cf;
+	struct ir_instruction *instr;
+
+	if (!so)
+		return NULL;
+
+	so->ir = ir_shader_create();
+
+	cf = ir_cf_create(so->ir, EXEC);
+
+	instr = ir_instr_create_tex_fetch(cf, 0);
+	ir_reg_create(instr, 0, "xyzw", 0);
+	ir_reg_create(instr, 0, "xyx", 0);
+	instr->sync = true;
+
+	cf = ir_cf_create_alloc(so->ir, SQ_PARAMETER_PIXEL, 0);
+	cf = ir_cf_create(so->ir, EXEC_END);
+
+	instr = ir_instr_create_alu(cf, MAXv, ~0);
+	ir_reg_create(instr, 0, NULL, IR_REG_EXPORT);
+	ir_reg_create(instr, 0, NULL, 0);
+	ir_reg_create(instr, 0, NULL, 0);
+
+	return assemble(so);
+}
+
+/* Creates shader:
+*     EXEC ADDR(0x3) CNT(0x2)
+*           FETCH:	VERTEX	R1.xy01 = R0.x FMT_32_32_FLOAT UNSIGNED STRIDE(8) CONST(26, 1)
+*           FETCH:	VERTEX	R2.xyz1 = R0.x FMT_32_32_32_FLOAT UNSIGNED STRIDE(12) CONST(26, 0)
+*     ALLOC POSITION SIZE(0x0)
+*     EXEC ADDR(0x5) CNT(0x1)
+*           ALU:	MAXv	export62 = R2, R2	; gl_Position
+*     ALLOC PARAM/PIXEL SIZE(0x0)
+*     EXEC_END ADDR(0x6) CNT(0x1)
+*           ALU:	MAXv	export0 = R1, R1
+*     NOP
+ */
+static struct fd_shader_stateobj *
+create_blit_vp(void)
+{
+	struct fd_shader_stateobj *so = create_shader(SHADER_VERTEX);
+	struct ir_cf *cf;
+	struct ir_instruction *instr;
+
+	if (!so)
+		return NULL;
+
+	so->ir = ir_shader_create();
+
+	cf = ir_cf_create(so->ir, EXEC);
+
+	instr = ir_instr_create_vtx_fetch(cf, 26, 1, FMT_32_32_FLOAT, false, 8);
+	instr->fetch.is_normalized = true;
+	ir_reg_create(instr, 1, "xy01", 0);
+	ir_reg_create(instr, 0, "x", 0);
+
+	instr = ir_instr_create_vtx_fetch(cf, 26, 0, FMT_32_32_32_FLOAT, false, 12);
+	instr->fetch.is_normalized = true;
+	ir_reg_create(instr, 2, "xyz1", 0);
+	ir_reg_create(instr, 0, "x", 0);
+
+	cf = ir_cf_create_alloc(so->ir, SQ_POSITION, 0);
+	cf = ir_cf_create(so->ir, EXEC);
+
+	instr = ir_instr_create_alu(cf, MAXv, ~0);
+	ir_reg_create(instr, 62, NULL, IR_REG_EXPORT);
+	ir_reg_create(instr, 2, NULL, 0);
+	ir_reg_create(instr, 2, NULL, 0);
+
+	cf = ir_cf_create_alloc(so->ir, SQ_PARAMETER_PIXEL, 0);
+	cf = ir_cf_create(so->ir, EXEC_END);
+
+	instr = ir_instr_create_alu(cf, MAXv, ~0);
+	ir_reg_create(instr, 0, NULL, IR_REG_EXPORT);
+	ir_reg_create(instr, 1, NULL, 0);
+	ir_reg_create(instr, 1, NULL, 0);
+
+
+	return assemble(so);
+
+}
+
+/* Creates shader:
+ *    ALLOC PARAM/PIXEL SIZE(0x0)
+ *    EXEC_END ADDR(0x1) CNT(0x1)
+ *          ALU:	MAXv	export0 = C0, C0	; gl_FragColor
+ */
+static struct fd_shader_stateobj *
+create_solid_fp(void)
+{
+	struct fd_shader_stateobj *so = create_shader(SHADER_FRAGMENT);
+	struct ir_cf *cf;
+	struct ir_instruction *instr;
+
+	if (!so)
+		return NULL;
+
+	so->ir = ir_shader_create();
+
+	cf = ir_cf_create_alloc(so->ir, SQ_PARAMETER_PIXEL, 0);
+	cf = ir_cf_create(so->ir, EXEC_END);
+
+	instr = ir_instr_create_alu(cf, MAXv, ~0);
+	ir_reg_create(instr, 0, NULL, IR_REG_EXPORT);
+	ir_reg_create(instr, 0, NULL, IR_REG_CONST);
+	ir_reg_create(instr, 0, NULL, IR_REG_CONST);
+
+	return assemble(so);
+}
+
+/* Creates shader:
+ *    EXEC ADDR(0x3) CNT(0x1)
+ *       (S)FETCH:	VERTEX	R1.xyz1 = R0.x FMT_32_32_32_FLOAT
+ *                           UNSIGNED STRIDE(12) CONST(26, 0)
+ *    ALLOC POSITION SIZE(0x0)
+ *    EXEC ADDR(0x4) CNT(0x1)
+ *          ALU:	MAXv	export62 = R1, R1	; gl_Position
+ *    ALLOC PARAM/PIXEL SIZE(0x0)
+ *    EXEC_END ADDR(0x5) CNT(0x0)
+ */
+static struct fd_shader_stateobj *
+create_solid_vp(void)
+{
+	struct fd_shader_stateobj *so = create_shader(SHADER_VERTEX);
+	struct ir_cf *cf;
+	struct ir_instruction *instr;
+
+	if (!so)
+		return NULL;
+
+	so->ir = ir_shader_create();
+
+	cf = ir_cf_create(so->ir, EXEC);
+
+	instr = ir_instr_create_vtx_fetch(cf, 26, 0, FMT_32_32_32_FLOAT, false, 12);
+	ir_reg_create(instr, 1, "xyz1", 0);
+	ir_reg_create(instr, 0, "x", 0);
+
+	cf = ir_cf_create_alloc(so->ir, SQ_POSITION, 0);
+	cf = ir_cf_create(so->ir, EXEC);
+
+	instr = ir_instr_create_alu(cf, MAXv, ~0);
+	ir_reg_create(instr, 62, NULL, IR_REG_EXPORT);
+	ir_reg_create(instr, 1, NULL, 0);
+	ir_reg_create(instr, 1, NULL, 0);
+
+	cf = ir_cf_create_alloc(so->ir, SQ_PARAMETER_PIXEL, 0);
+	cf = ir_cf_create(so->ir, EXEC_END);
+
+	return assemble(so);
+}
+
+void
+fd_prog_init(struct pipe_context *pctx)
+{
+	struct fd_context *ctx = fd_context(pctx);
+
+	pctx->create_fs_state = fd_fp_state_create;
+	pctx->bind_fs_state = fd_fp_state_bind;
+	pctx->delete_fs_state = fd_fp_state_delete;
+
+	pctx->create_vs_state = fd_vp_state_create;
+	pctx->bind_vs_state = fd_vp_state_bind;
+	pctx->delete_vs_state = fd_vp_state_delete;
+
+	ctx->solid_prog.fp = create_solid_fp();
+	ctx->solid_prog.vp = create_solid_vp();
+	ctx->blit_prog.fp = create_blit_fp();
+	ctx->blit_prog.vp = create_blit_vp();
+}
+
+void
+fd_prog_fini(struct pipe_context *pctx)
+{
+	struct fd_context *ctx = fd_context(pctx);
+
+	delete_shader(ctx->solid_prog.vp);
+	delete_shader(ctx->solid_prog.fp);
+	delete_shader(ctx->blit_prog.vp);
+	delete_shader(ctx->blit_prog.fp);
+}
diff --git a/src/gallium/drivers/freedreno/freedreno_program.h b/src/gallium/drivers/freedreno/freedreno_program.h
new file mode 100644
index 0000000..e73cf1b
--- /dev/null
+++ b/src/gallium/drivers/freedreno/freedreno_program.h
@@ -0,0 +1,82 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark at freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark at freedesktop.org>
+ */
+
+#ifndef FREEDRENO_PROGRAM_H_
+#define FREEDRENO_PROGRAM_H_
+
+#include "pipe/p_context.h"
+
+#include "freedreno_context.h"
+
+#include "ir.h"
+#include "disasm.h"
+
+struct fd_shader_stateobj {
+	enum shader_t type;
+
+	uint32_t *bin;
+
+	struct tgsi_token *tokens;
+
+	/* note that we defer compiling shader until we know both vs and ps..
+	 * and if one changes, we potentially need to recompile in order to
+	 * get varying linkages correct:
+	 */
+	struct ir_shader_info info;
+	struct ir_shader *ir;
+
+	/* for vertex shaders, the fetch instructions which need to be
+	 * patched up before assembly:
+	 */
+	unsigned num_vfetch_instrs;
+	struct ir_instruction *vfetch_instrs[64];
+
+	/* for all shaders, any tex fetch instructions which need to be
+	 * patched before assembly:
+	 */
+	unsigned num_tfetch_instrs;
+	struct {
+		unsigned samp_id;
+		struct ir_instruction *instr;
+	} tfetch_instrs[64];
+
+	unsigned first_immediate;     /* const reg # of first immediate */
+	unsigned num_immediates;
+	struct {
+		uint32_t val[4];
+	} immediates[64];
+};
+
+void fd_program_emit(struct fd_ringbuffer *ring,
+		struct fd_program_stateobj *prog);
+void fd_program_validate(struct fd_context *ctx);
+
+void fd_prog_init(struct pipe_context *pctx);
+void fd_prog_fini(struct pipe_context *pctx);
+
+#endif /* FREEDRENO_PROGRAM_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_rasterizer.c b/src/gallium/drivers/freedreno/freedreno_rasterizer.c
new file mode 100644
index 0000000..2d69133
--- /dev/null
+++ b/src/gallium/drivers/freedreno/freedreno_rasterizer.c
@@ -0,0 +1,151 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark at freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark at freedesktop.org>
+ */
+
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+
+#include "freedreno_rasterizer.h"
+#include "freedreno_context.h"
+#include "freedreno_util.h"
+
+
+static enum pa_su_sc_draw
+polygon_mode(unsigned mode)
+{
+	switch (mode) {
+	case PIPE_POLYGON_MODE_POINT:
+		return DRAW_POINTS;
+	case PIPE_POLYGON_MODE_LINE:
+		return DRAW_LINES;
+	case PIPE_POLYGON_MODE_FILL:
+		return DRAW_TRIANGLES;
+	default:
+		DBG("invalid polygon mode: %u", mode);
+		return 0;
+	}
+}
+
+static void *
+fd_rasterizer_state_create(struct pipe_context *pctx,
+		const struct pipe_rasterizer_state *cso)
+{
+	struct fd_rasterizer_stateobj *so;
+	float psize_min, psize_max;
+
+	so = CALLOC_STRUCT(fd_rasterizer_stateobj);
+	if (!so)
+		return NULL;
+
+	if (cso->point_size_per_vertex) {
+		psize_min = util_get_min_point_size(cso);
+		psize_max = 8192;
+	} else {
+		/* Force the point size to be as if the vertex output was disabled. */
+		psize_min = cso->point_size;
+		psize_max = cso->point_size;
+	}
+
+	so->base = *cso;
+
+	so->pa_sc_line_stipple = cso->line_stipple_enable ?
+		PA_SC_LINE_STIPPLE_LINE_PATTERN(cso->line_stipple_pattern) |
+		PA_SC_LINE_STIPPLE_REPEAT_COUNT(cso->line_stipple_factor) : 0;
+
+	so->pa_cl_clip_cntl = 0; // TODO
+
+	so->pa_su_vtx_cntl =
+		PA_SU_VTX_CNTL_PIX_CENTER(cso->gl_rasterization_rules ? PIXCENTER_OGL : PIXCENTER_D3D) |
+		PA_SU_VTX_CNTL_QUANT_MODE(ONE_SIXTEENTH);
+
+	so->pa_su_point_size =
+		PA_SU_POINT_SIZE_HEIGHT(cso->point_size/2) |
+		PA_SU_POINT_SIZE_WIDTH(cso->point_size/2);
+
+	so->pa_su_point_minmax =
+		PA_SU_POINT_MINMAX_MIN_SIZE(psize_min/2) |
+		PA_SU_POINT_MINMAX_MAX_SIZE(psize_max/2);
+
+	so->pa_su_line_cntl =
+		PA_SU_LINE_CNTL_WIDTH(cso->line_width/2);
+
+	so->pa_su_sc_mode_cntl =
+		PA_SU_SC_MODE_CNTL_VTX_WINDOW_OFFSET_ENABLE |
+		PA_SU_SC_MODE_CNTL_POLYMODE_FRONT_PTYPE(polygon_mode(cso->fill_front)) |
+		PA_SU_SC_MODE_CNTL_POLYMODE_BACK_PTYPE(polygon_mode(cso->fill_back));
+
+	if (cso->cull_face & PIPE_FACE_FRONT)
+		so->pa_su_sc_mode_cntl |= PA_SU_SC_MODE_CNTL_CULL_FRONT;
+	if (cso->cull_face & PIPE_FACE_BACK)
+		so->pa_su_sc_mode_cntl |= PA_SU_SC_MODE_CNTL_CULL_BACK;
+	if (!cso->flatshade_first)
+		so->pa_su_sc_mode_cntl |= PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST;
+	if (!cso->front_ccw)
+		so->pa_su_sc_mode_cntl |= PA_SU_SC_MODE_CNTL_FACE;
+	if (cso->line_stipple_enable)
+		so->pa_su_sc_mode_cntl |= PA_SU_SC_MODE_CNTL_LINE_STIPPLE_ENABLE;
+	if (cso->multisample)
+		so->pa_su_sc_mode_cntl |= PA_SU_SC_MODE_CNTL_MSAA_ENABLE;
+
+	if (cso->fill_front != PIPE_POLYGON_MODE_FILL ||
+			cso->fill_back != PIPE_POLYGON_MODE_FILL)
+		so->pa_su_sc_mode_cntl |= PA_SU_SC_MODE_CNTL_POLYMODE(POLY_DUALMODE);
+	else
+		so->pa_su_sc_mode_cntl |= PA_SU_SC_MODE_CNTL_POLYMODE(POLY_DISABLED);
+
+	if (cso->offset_tri)
+		so->pa_su_sc_mode_cntl |=
+			PA_SU_SC_MODE_CNTL_POLY_OFFSET_FRONT_ENABLE |
+			PA_SU_SC_MODE_CNTL_POLY_OFFSET_BACK_ENABLE |
+			PA_SU_SC_MODE_CNTL_POLY_OFFSET_PARA_ENABLE;
+
+	return so;
+}
+
+static void
+fd_rasterizer_state_bind(struct pipe_context *pctx, void *hwcso)
+{
+	struct fd_context *ctx = fd_context(pctx);
+	ctx->rasterizer = hwcso;
+	ctx->dirty |= FD_DIRTY_RASTERIZER;
+}
+
+static void
+fd_rasterizer_state_delete(struct pipe_context *pctx, void *hwcso)
+{
+	FREE(hwcso);
+}
+
+void
+fd_rasterizer_init(struct pipe_context *pctx)
+{
+	pctx->create_rasterizer_state = fd_rasterizer_state_create;
+	pctx->bind_rasterizer_state = fd_rasterizer_state_bind;
+	pctx->delete_rasterizer_state = fd_rasterizer_state_delete;
+}
diff --git a/src/gallium/drivers/freedreno/freedreno_rasterizer.h b/src/gallium/drivers/freedreno/freedreno_rasterizer.h
new file mode 100644
index 0000000..519a05e
--- /dev/null
+++ b/src/gallium/drivers/freedreno/freedreno_rasterizer.h
@@ -0,0 +1,48 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark at freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark at freedesktop.org>
+ */
+
+#ifndef FREEDRENO_RASTERIZER_H_
+#define FREEDRENO_RASTERIZER_H_
+
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+
+struct fd_rasterizer_stateobj {
+	struct pipe_rasterizer_state base;
+	uint32_t pa_sc_line_stipple;
+	uint32_t pa_cl_clip_cntl;
+	uint32_t pa_su_vtx_cntl;
+	uint32_t pa_su_point_size;
+	uint32_t pa_su_point_minmax;
+	uint32_t pa_su_line_cntl;
+	uint32_t pa_su_sc_mode_cntl;
+};
+
+void fd_rasterizer_init(struct pipe_context *pctx);
+
+#endif /* FREEDRENO_RASTERIZER_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c
new file mode 100644
index 0000000..53446ff
--- /dev/null
+++ b/src/gallium/drivers/freedreno/freedreno_resource.c
@@ -0,0 +1,248 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark at freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark at freedesktop.org>
+ */
+
+#include "util/u_format.h"
+#include "util/u_inlines.h"
+#include "util/u_transfer.h"
+#include "util/u_string.h"
+
+#include "freedreno_resource.h"
+#include "freedreno_screen.h"
+#include "freedreno_surface.h"
+#include "freedreno_context.h"
+#include "freedreno_util.h"
+
+static struct pipe_transfer *
+fd_resource_transfer_get(struct pipe_context *pctx,
+		struct pipe_resource *prsc,
+		unsigned level, unsigned usage,
+		const struct pipe_box *box)
+{
+	struct fd_context *ctx = fd_context(pctx);
+	struct fd_resource *rsc = fd_resource(prsc);
+	struct pipe_transfer *ptrans = util_slab_alloc(&ctx->transfer_pool);
+
+	if (!ptrans)
+		return NULL;
+
+	ptrans->resource = prsc;
+	ptrans->level = level;
+	ptrans->usage = usage;
+	ptrans->box = *box;
+	ptrans->stride = rsc->pitch * rsc->cpp;
+	ptrans->layer_stride = ptrans->stride;
+
+	return ptrans;
+}
+
+static void *
+fd_resource_transfer_map(struct pipe_context *pctx,
+		struct pipe_resource *prsc,
+		unsigned level, unsigned usage,
+		const struct pipe_box *box,
+		struct pipe_transfer **pptrans)
+{
+	struct pipe_transfer *ptrans =
+			fd_resource_transfer_get(pctx, prsc, level, usage, box);
+	struct fd_resource *rsc = fd_resource(prsc);
+	char *buf = fd_bo_map(rsc->bo);
+
+	*pptrans = ptrans;
+
+	return buf + (rsc->cpp * ptrans->box.x) +
+			(rsc->cpp * rsc->pitch * ptrans->box.y);
+}
+
+static void
+fd_resource_transfer_destroy(struct pipe_context *pctx,
+		struct pipe_transfer *ptrans)
+{
+	struct fd_context *ctx = fd_context(pctx);
+	util_slab_free(&ctx->transfer_pool, ptrans);
+}
+
+static void
+fd_resource_transfer_unmap(struct pipe_context *pctx,
+		struct pipe_transfer *ptrans)
+{
+	fd_resource_transfer_destroy(pctx, ptrans);
+}
+
+static void
+fd_resource_destroy(struct pipe_screen *pscreen,
+		struct pipe_resource *prsc)
+{
+	struct fd_resource *rsc = fd_resource(prsc);
+	fd_bo_del(rsc->bo);
+	FREE(rsc);
+}
+
+static boolean
+fd_resource_get_handle(struct pipe_screen *pscreen,
+		struct pipe_resource *prsc,
+		struct winsys_handle *handle)
+{
+	struct fd_resource *rsc = fd_resource(prsc);
+
+	return fd_screen_bo_get_handle(pscreen, rsc->bo, rsc->pitch, handle);
+}
+
+
+const struct u_resource_vtbl fd_resource_vtbl = {
+		.resource_get_handle      = fd_resource_get_handle,
+		.resource_destroy         = fd_resource_destroy,
+		.transfer_map             = fd_resource_transfer_map,
+		.transfer_flush_region    = u_default_transfer_flush_region,
+		.transfer_unmap           = fd_resource_transfer_unmap,
+		.transfer_inline_write    = u_default_transfer_inline_write,
+};
+
+/**
+ * Create a new texture object, using the given template info.
+ */
+static struct pipe_resource *
+fd_resource_create(struct pipe_screen *pscreen,
+		const struct pipe_resource *tmpl)
+{
+	struct fd_screen *screen = fd_screen(pscreen);
+	struct fd_resource *rsc = CALLOC_STRUCT(fd_resource);
+	struct pipe_resource *prsc = &rsc->base.b;
+	uint32_t flags, size;
+
+	DBG("target=%d, format=%s, %ux%u@%u, array_size=%u, last_level=%u, "
+			"nr_samples=%u, usage=%u, bind=%x, flags=%x",
+			tmpl->target, util_format_name(tmpl->format),
+			tmpl->width0, tmpl->height0, tmpl->depth0,
+			tmpl->array_size, tmpl->last_level, tmpl->nr_samples,
+			tmpl->usage, tmpl->bind, tmpl->flags);
+
+	if (!rsc)
+		return NULL;
+
+	*prsc = *tmpl;
+
+	pipe_reference_init(&prsc->reference, 1);
+	prsc->screen = pscreen;
+
+	rsc->base.vtbl = &fd_resource_vtbl;
+	rsc->pitch = ALIGN(tmpl->width0, 32);
+	rsc->cpp = util_format_get_blocksize(tmpl->format);
+
+	size = rsc->pitch * tmpl->height0 * rsc->cpp;
+	flags = DRM_FREEDRENO_GEM_TYPE_KMEM; /* TODO */
+
+	rsc->bo = fd_bo_new(screen->dev, size, flags);
+
+	return prsc;
+}
+
+/**
+ * Create a texture from a winsys_handle. The handle is often created in
+ * another process by first creating a pipe texture and then calling
+ * resource_get_handle.
+ */
+static struct pipe_resource *
+fd_resource_from_handle(struct pipe_screen *pscreen,
+		const struct pipe_resource *tmpl,
+		struct winsys_handle *handle)
+{
+	struct fd_resource *rsc = CALLOC_STRUCT(fd_resource);
+	struct pipe_resource *prsc = &rsc->base.b;
+
+	DBG("target=%d, format=%s, %ux%u@%u, array_size=%u, last_level=%u, "
+			"nr_samples=%u, usage=%u, bind=%x, flags=%x",
+			tmpl->target, util_format_name(tmpl->format),
+			tmpl->width0, tmpl->height0, tmpl->depth0,
+			tmpl->array_size, tmpl->last_level, tmpl->nr_samples,
+			tmpl->usage, tmpl->bind, tmpl->flags);
+
+	if (!rsc)
+		return NULL;
+
+	*prsc = *tmpl;
+
+	pipe_reference_init(&prsc->reference, 1);
+	prsc->screen = pscreen;
+
+	rsc->bo = fd_screen_bo_from_handle(pscreen, handle, &rsc->pitch);
+
+	rsc->base.vtbl = &fd_resource_vtbl;
+	rsc->pitch = ALIGN(tmpl->width0, 32);
+
+	return prsc;
+}
+
+/**
+ * Copy a block of pixels from one resource to another.
+ * The resource must be of the same format.
+ * Resources with nr_samples > 1 are not allowed.
+ */
+static void
+fd_resource_copy_region(struct pipe_context *pctx,
+		struct pipe_resource *dst,
+		unsigned dst_level,
+		unsigned dstx, unsigned dsty, unsigned dstz,
+		struct pipe_resource *src,
+		unsigned src_level,
+		const struct pipe_box *src_box)
+{
+	DBG("TODO: ");
+	// TODO
+}
+
+/* Optimal hardware path for blitting pixels.
+ * Scaling, format conversion, up- and downsampling (resolve) are allowed.
+ */
+static void
+fd_blit(struct pipe_context *pctx, const struct pipe_blit_info *info)
+{
+	DBG("TODO: ");
+	// TODO
+}
+
+void
+fd_resource_screen_init(struct pipe_screen *pscreen)
+{
+	pscreen->resource_create = fd_resource_create;
+	pscreen->resource_from_handle = fd_resource_from_handle;
+	pscreen->resource_get_handle = u_resource_get_handle_vtbl;
+	pscreen->resource_destroy = u_resource_destroy_vtbl;
+}
+
+void
+fd_resource_context_init(struct pipe_context *pctx)
+{
+	pctx->transfer_map = u_transfer_map_vtbl;
+	pctx->transfer_flush_region = u_transfer_flush_region_vtbl;
+	pctx->transfer_unmap = u_transfer_unmap_vtbl;
+	pctx->transfer_inline_write = u_transfer_inline_write_vtbl;
+	pctx->create_surface = fd_create_surface;
+	pctx->surface_destroy = fd_surface_destroy;
+	pctx->resource_copy_region = fd_resource_copy_region;
+	pctx->blit = fd_blit;
+}
diff --git a/src/gallium/drivers/freedreno/freedreno_resource.h b/src/gallium/drivers/freedreno/freedreno_resource.h
new file mode 100644
index 0000000..875cffa
--- /dev/null
+++ b/src/gallium/drivers/freedreno/freedreno_resource.h
@@ -0,0 +1,49 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark at freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark at freedesktop.org>
+ */
+
+#ifndef FREEDRENO_RESOURCE_H_
+#define FREEDRENO_RESOURCE_H_
+
+#include "util/u_transfer.h"
+
+struct fd_resource {
+	struct u_resource base;
+	struct fd_bo *bo;
+	uint32_t pitch, cpp;
+};
+
+static INLINE struct fd_resource *
+fd_resource(struct pipe_resource *ptex)
+{
+	return (struct fd_resource *)ptex;
+}
+
+void fd_resource_screen_init(struct pipe_screen *pscreen);
+void fd_resource_context_init(struct pipe_context *pctx);
+
+#endif /* FREEDRENO_RESOURCE_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
new file mode 100644
index 0000000..5310fc7
--- /dev/null
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -0,0 +1,471 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark at freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark at freedesktop.org>
+ */
+
+
+#include "pipe/p_defines.h"
+#include "pipe/p_screen.h"
+#include "pipe/p_state.h"
+
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+#include "util/u_format_s3tc.h"
+#include "util/u_string.h"
+
+#include "os/os_time.h"
+
+#include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
+
+#include "freedreno_context.h"
+#include "freedreno_screen.h"
+#include "freedreno_resource.h"
+#include "freedreno_fence.h"
+#include "freedreno_util.h"
+
+/* XXX this should go away */
+#include "state_tracker/drm_driver.h"
+
+int fd_mesa_debug = 0;
+
+static const char *
+fd_screen_get_name(struct pipe_screen *pscreen)
+{
+	static char buffer[128];
+	util_snprintf(buffer, sizeof(buffer), "FD%03d",
+			fd_screen(pscreen)->device_id);
+	return buffer;
+}
+
+static const char *
+fd_screen_get_vendor(struct pipe_screen *pscreen)
+{
+	return "freedreno";
+}
+
+static uint64_t
+fd_screen_get_timestamp(struct pipe_screen *pscreen)
+{
+	int64_t cpu_time = os_time_get() * 1000;
+	return cpu_time + fd_screen(pscreen)->cpu_gpu_time_delta;
+}
+
+static void
+fd_screen_fence_ref(struct pipe_screen *pscreen,
+		struct pipe_fence_handle **ptr,
+		struct pipe_fence_handle *pfence)
+{
+	fd_fence_ref(fd_fence(pfence), (struct fd_fence **)ptr);
+}
+
+static boolean
+fd_screen_fence_signalled(struct pipe_screen *screen,
+		struct pipe_fence_handle *pfence)
+{
+	return fd_fence_signalled(fd_fence(pfence));
+}
+
+static boolean
+fd_screen_fence_finish(struct pipe_screen *screen,
+		struct pipe_fence_handle *pfence,
+		uint64_t timeout)
+{
+	return fd_fence_wait(fd_fence(pfence));
+}
+
+static void
+fd_screen_destroy(struct pipe_screen *pscreen)
+{
+	// TODO
+	DBG("TODO");
+}
+
+/*
+EGL Version 1.4
+EGL Vendor Qualcomm, Inc
+EGL Extensions EGL_QUALCOMM_shared_image EGL_KHR_image EGL_AMD_create_image EGL_KHR_lock_surface EGL_KHR_lock_surface2 EGL_KHR_fence_sync EGL_IMG_context_priorityEGL_ANDROID_image_native_buffer
+GL extensions: GL_AMD_compressed_ATC_texture GL_AMD_performance_monitor GL_AMD_program_binary_Z400 GL_EXT_texture_filter_anisotropic GL_EXT_texture_format_BGRA8888 GL_EXT_texture_type_2_10_10_10_REV GL_NV_fence GL_OES_compressed_ETC1_RGB8_texture GL_OES_depth_texture GL_OES_depth24 GL_OES_EGL_image GL_OES_EGL_image_external GL_OES_element_index_uint GL_OES_fbo_render_mipmap GL_OES_fragment_precision_high GL_OES_get_program_binary GL_OES_packed_depth_stencil GL_OES_rgb8_rgba8 GL_OES_standard_derivatives GL_OES_texture_3D GL_OES_texture_float GL_OES_texture_half_float GL_OES_texture_half_float_linear GL_OES_texture_npot GL_OES_vertex_half_float GL_OES_vertex_type_10_10_10_2 GL_QCOM_alpha_test GL_QCOM_binning_control GL_QCOM_driver_control GL_QCOM_perfmon_global_mode GL_QCOM_extended_get GL_QCOM_extended_get2 GL_QCOM_tiled_rendering GL_QCOM_writeonly_rendering GL_AMD_compressed_3DC_texture
+GL_MAX_3D_TEXTURE_SIZE_OES: 1024 0 0 0
+no GL_MAX_SAMPLES_ANGLE: GL_INVALID_ENUM
+no GL_MAX_SAMPLES_APPLE: GL_INVALID_ENUM
+GL_MAX_TEXTURE_MAX_ANISOTROPY_EXT: 16 0 0 0
+no GL_MAX_SAMPLES_IMG: GL_INVALID_ENUM
+GL_MAX_TEXTURE_SIZE: 4096 0 0 0
+GL_MAX_VIEWPORT_DIMS: 4096 4096 0 0
+GL_MAX_VERTEX_ATTRIBS: 16 0 0 0
+GL_MAX_VERTEX_UNIFORM_VECTORS: 251 0 0 0
+GL_MAX_VARYING_VECTORS: 8 0 0 0
+GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS: 20 0 0 0
+GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS: 4 0 0 0
+GL_MAX_TEXTURE_IMAGE_UNITS: 16 0 0 0
+GL_MAX_FRAGMENT_UNIFORM_VECTORS: 221 0 0 0
+GL_MAX_CUBE_MAP_TEXTURE_SIZE: 4096 0 0 0
+GL_MAX_RENDERBUFFER_SIZE: 4096 0 0 0
+no GL_TEXTURE_NUM_LEVELS_QCOM: GL_INVALID_ENUM
+ */
+static int
+fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
+{
+	/* this is probably not totally correct.. but it's a start: */
+	switch (param) {
+	/* Supported features (boolean caps). */
+	case PIPE_CAP_NPOT_TEXTURES:
+	case PIPE_CAP_TWO_SIDED_STENCIL:
+	case PIPE_CAP_ANISOTROPIC_FILTER:
+	case PIPE_CAP_POINT_SPRITE:
+	case PIPE_CAP_TEXTURE_SHADOW_MAP:
+	case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
+	case PIPE_CAP_BLEND_EQUATION_SEPARATE:
+	case PIPE_CAP_TEXTURE_SWIZZLE:
+	case PIPE_CAP_SHADER_STENCIL_EXPORT:
+	case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
+	case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
+	case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
+	case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
+	case PIPE_CAP_SM3:
+	case PIPE_CAP_SEAMLESS_CUBE_MAP:
+	case PIPE_CAP_PRIMITIVE_RESTART:
+	case PIPE_CAP_CONDITIONAL_RENDER:
+	case PIPE_CAP_TEXTURE_BARRIER:
+	case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
+	case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
+	case PIPE_CAP_TGSI_INSTANCEID:
+	case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
+	case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
+	case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
+	case PIPE_CAP_COMPUTE:
+	case PIPE_CAP_START_INSTANCE:
+	case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
+	case PIPE_CAP_TEXTURE_MULTISAMPLE:
+	case PIPE_CAP_USER_CONSTANT_BUFFERS:
+		return 1;
+
+	case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
+		return 256;
+
+	case PIPE_CAP_GLSL_FEATURE_LEVEL:
+		return 120;
+
+	/* Unsupported features. */
+	case PIPE_CAP_INDEP_BLEND_ENABLE:
+	case PIPE_CAP_INDEP_BLEND_FUNC:
+	case PIPE_CAP_DEPTH_CLIP_DISABLE:
+	case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
+	case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
+	case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
+	case PIPE_CAP_SCALED_RESOLVE:
+	case PIPE_CAP_TGSI_CAN_COMPACT_VARYINGS:
+	case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
+	case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
+	case PIPE_CAP_VERTEX_COLOR_CLAMPED:
+	case PIPE_CAP_USER_VERTEX_BUFFERS:
+	case PIPE_CAP_USER_INDEX_BUFFERS:
+		return 0;
+
+	/* Stream output. */
+	case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
+	case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
+	case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
+	case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
+		return 0;
+
+	/* Texturing. */
+	case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+	case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+	case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+		return 14;
+	case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
+		return 9192;
+	case PIPE_CAP_MAX_COMBINED_SAMPLERS:
+		return 20;
+
+	/* Render targets. */
+	case PIPE_CAP_MAX_RENDER_TARGETS:
+		return 1;
+
+	/* Timer queries. */
+	case PIPE_CAP_QUERY_TIME_ELAPSED:
+	case PIPE_CAP_OCCLUSION_QUERY:
+	case PIPE_CAP_QUERY_TIMESTAMP:
+		return 0;
+
+	case PIPE_CAP_MIN_TEXEL_OFFSET:
+		return -8;
+
+	case PIPE_CAP_MAX_TEXEL_OFFSET:
+		return 7;
+
+	default:
+		DBG("unknown param %d", param);
+		return 0;
+	}
+}
+
+static float
+fd_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
+{
+	switch (param) {
+	case PIPE_CAPF_MAX_LINE_WIDTH:
+	case PIPE_CAPF_MAX_LINE_WIDTH_AA:
+	case PIPE_CAPF_MAX_POINT_WIDTH:
+	case PIPE_CAPF_MAX_POINT_WIDTH_AA:
+		return 8192.0f;
+	case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
+		return 16.0f;
+	case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
+		return 16.0f;
+	case PIPE_CAPF_GUARD_BAND_LEFT:
+	case PIPE_CAPF_GUARD_BAND_TOP:
+	case PIPE_CAPF_GUARD_BAND_RIGHT:
+	case PIPE_CAPF_GUARD_BAND_BOTTOM:
+		return 0.0f;
+	default:
+		DBG("unknown paramf %d", param);
+		return 0;
+	}
+}
+
+static int
+fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
+		enum pipe_shader_cap param)
+{
+	switch(shader)
+	{
+	case PIPE_SHADER_FRAGMENT:
+	case PIPE_SHADER_VERTEX:
+		break;
+	case PIPE_SHADER_COMPUTE:
+	case PIPE_SHADER_GEOMETRY:
+		/* maye we could emulate.. */
+		return 0;
+	default:
+		DBG("unknown shader type %d", shader);
+		return 0;
+	}
+
+	/* this is probably not totally correct.. but it's a start: */
+	switch (param) {
+	case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
+	case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
+	case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
+	case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
+		return 16384;
+	case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
+		return 8; /* XXX */
+	case PIPE_SHADER_CAP_MAX_INPUTS:
+		return 32;
+	case PIPE_SHADER_CAP_MAX_TEMPS:
+		return 256; /* Max native temporaries. */
+	case PIPE_SHADER_CAP_MAX_ADDRS:
+		/* XXX Isn't this equal to TEMPS? */
+		return 1; /* Max native address registers */
+	case PIPE_SHADER_CAP_MAX_CONSTS:
+	case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
+		return 64;
+	case PIPE_SHADER_CAP_MAX_PREDS:
+		return 0; /* nothing uses this */
+	case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
+		return 1;
+	case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
+	case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
+	case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
+	case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
+		return 1;
+	case PIPE_SHADER_CAP_SUBROUTINES:
+		return 0;
+	case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
+	case PIPE_SHADER_CAP_INTEGERS:
+		return 0;
+	case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
+		return 16;
+	case PIPE_SHADER_CAP_PREFERRED_IR:
+		return PIPE_SHADER_IR_TGSI;
+	default:
+		DBG("unknown shader param %d", param);
+		return 0;
+	}
+	return 0;
+}
+
+static boolean
+fd_screen_is_format_supported(struct pipe_screen *pscreen,
+		enum pipe_format format,
+		enum pipe_texture_target target,
+		unsigned sample_count,
+		unsigned usage)
+{
+	unsigned retval = 0;
+
+	if ((target >= PIPE_MAX_TEXTURE_TYPES) ||
+			(sample_count > 1) || /* TODO add MSAA */
+			!util_format_is_supported(format, usage)) {
+		DBG("not supported: format=%s, target=%d, sample_count=%d, usage=%x",
+				util_format_name(format), target, sample_count, usage);
+		return FALSE;
+	}
+
+	/* TODO figure out how to render to other formats.. */
+	if ((usage & PIPE_BIND_RENDER_TARGET) &&
+			((format != PIPE_FORMAT_B8G8R8A8_UNORM) &&
+			 (format != PIPE_FORMAT_B8G8R8X8_UNORM))) {
+		DBG("not supported render target: format=%s, target=%d, sample_count=%d, usage=%x",
+				util_format_name(format), target, sample_count, usage);
+		return FALSE;
+	}
+
+	if ((usage & (PIPE_BIND_SAMPLER_VIEW |
+				PIPE_BIND_VERTEX_BUFFER)) &&
+			(fd_pipe2surface(format) != FMT_INVALID)) {
+		retval |= usage & (PIPE_BIND_SAMPLER_VIEW |
+				PIPE_BIND_VERTEX_BUFFER);
+	}
+
+	if ((usage & (PIPE_BIND_RENDER_TARGET |
+				PIPE_BIND_DISPLAY_TARGET |
+				PIPE_BIND_SCANOUT |
+				PIPE_BIND_SHARED)) &&
+			(fd_pipe2color(format) != COLORX_INVALID)) {
+		retval |= usage & (PIPE_BIND_RENDER_TARGET |
+				PIPE_BIND_DISPLAY_TARGET |
+				PIPE_BIND_SCANOUT |
+				PIPE_BIND_SHARED);
+	}
+
+	if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
+			(fd_pipe2depth(format) != DEPTHX_INVALID)) {
+		retval |= PIPE_BIND_DEPTH_STENCIL;
+	}
+
+	if ((usage & PIPE_BIND_INDEX_BUFFER) &&
+			(fd_pipe2index(format) != INDEX_SIZE_INVALID)) {
+		retval |= PIPE_BIND_INDEX_BUFFER;
+	}
+
+	if (usage & PIPE_BIND_TRANSFER_READ)
+		retval |= PIPE_BIND_TRANSFER_READ;
+	if (usage & PIPE_BIND_TRANSFER_WRITE)
+		retval |= PIPE_BIND_TRANSFER_WRITE;
+
+	if (retval != usage) {
+		DBG("not supported: format=%s, target=%d, sample_count=%d, "
+				"usage=%x, retval=%x", util_format_name(format),
+				target, sample_count, usage, retval);
+	}
+
+	return retval == usage;
+}
+
+boolean
+fd_screen_bo_get_handle(struct pipe_screen *pscreen,
+		struct fd_bo *bo,
+		unsigned stride,
+		struct winsys_handle *whandle)
+{
+	whandle->stride = stride;
+
+	if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) {
+		return fd_bo_get_name(bo, &whandle->handle) == 0;
+	} else if (whandle->type == DRM_API_HANDLE_TYPE_KMS) {
+		whandle->handle = fd_bo_handle(bo);
+		return TRUE;
+	} else {
+		return FALSE;
+	}
+}
+
+struct fd_bo *
+fd_screen_bo_from_handle(struct pipe_screen *pscreen,
+		struct winsys_handle *whandle,
+		unsigned *out_stride)
+{
+	struct fd_screen *screen = fd_screen(pscreen);
+	struct fd_bo *bo;
+
+	bo = fd_bo_from_name(screen->dev, whandle->handle);
+	if (!bo) {
+		DBG("ref name 0x%08x failed", whandle->handle);
+		return NULL;
+	}
+
+	*out_stride = whandle->stride;
+
+	return bo;
+}
+
+struct pipe_screen *
+fd_screen_create(struct fd_device *dev)
+{
+	struct fd_screen *screen = CALLOC_STRUCT(fd_screen);
+	struct pipe_screen *pscreen;
+	uint64_t val;
+
+	char *fd_dbg = getenv("FD_MESA_DEBUG");
+	if (fd_dbg)
+		fd_mesa_debug = atoi(fd_dbg);
+
+	if (!screen)
+		return NULL;
+
+	DBG("");
+
+	screen->dev = dev;
+
+	// maybe this should be in context?
+	screen->pipe = fd_pipe_new(screen->dev, FD_PIPE_3D);
+
+	fd_pipe_get_param(screen->pipe, FD_GMEM_SIZE, &val);
+	screen->gmemsize_bytes = val;
+
+	fd_pipe_get_param(screen->pipe, FD_DEVICE_ID, &val);
+	screen->device_id = val;
+
+	pscreen = &screen->base;
+
+	pscreen->destroy = fd_screen_destroy;
+	pscreen->get_param = fd_screen_get_param;
+	pscreen->get_paramf = fd_screen_get_paramf;
+	pscreen->get_shader_param = fd_screen_get_shader_param;
+	pscreen->context_create = fd_context_create;
+	pscreen->is_format_supported = fd_screen_is_format_supported;
+
+	fd_resource_screen_init(pscreen);
+
+	pscreen->get_name = fd_screen_get_name;
+	pscreen->get_vendor = fd_screen_get_vendor;
+
+	pscreen->get_timestamp = fd_screen_get_timestamp;
+
+	pscreen->fence_reference = fd_screen_fence_ref;
+	pscreen->fence_signalled = fd_screen_fence_signalled;
+	pscreen->fence_finish = fd_screen_fence_finish;
+
+	util_format_s3tc_init();
+
+	return pscreen;
+}
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.h b/src/gallium/drivers/freedreno/freedreno_screen.h
new file mode 100644
index 0000000..720ee05
--- /dev/null
+++ b/src/gallium/drivers/freedreno/freedreno_screen.h
@@ -0,0 +1,70 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark at freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark at freedesktop.org>
+ */
+
+#ifndef FREEDRENO_SCREEN_H_
+#define FREEDRENO_SCREEN_H_
+
+#include <freedreno_drmif.h>
+#include <freedreno_ringbuffer.h>
+
+#include "pipe/p_screen.h"
+#include "util/u_memory.h"
+
+typedef uint32_t u32;
+
+struct fd_bo;
+
+struct fd_screen {
+	struct pipe_screen base;
+
+	uint32_t gmemsize_bytes;
+	uint32_t device_id;
+
+	struct fd_device *dev;
+	struct fd_pipe *pipe;
+
+	int64_t cpu_gpu_time_delta;
+};
+
+static INLINE struct fd_screen *
+fd_screen(struct pipe_screen *pscreen)
+{
+	return (struct fd_screen *)pscreen;
+}
+
+boolean fd_screen_bo_get_handle(struct pipe_screen *pscreen,
+		struct fd_bo *bo,
+		unsigned stride,
+		struct winsys_handle *whandle);
+struct fd_bo * fd_screen_bo_from_handle(struct pipe_screen *pscreen,
+		struct winsys_handle *whandle,
+		unsigned *out_stride);
+
+struct pipe_screen * fd_screen_create(struct fd_device *dev);
+
+#endif /* FREEDRENO_SCREEN_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_state.c b/src/gallium/drivers/freedreno/freedreno_state.c
new file mode 100644
index 0000000..fe8389f
--- /dev/null
+++ b/src/gallium/drivers/freedreno/freedreno_state.c
@@ -0,0 +1,639 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark at freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark at freedesktop.org>
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_helpers.h"
+
+#include "freedreno_state.h"
+#include "freedreno_context.h"
+#include "freedreno_zsa.h"
+#include "freedreno_rasterizer.h"
+#include "freedreno_blend.h"
+#include "freedreno_program.h"
+#include "freedreno_resource.h"
+#include "freedreno_texture.h"
+#include "freedreno_gmem.h"
+#include "freedreno_util.h"
+
+static void
+fd_set_blend_color(struct pipe_context *pctx,
+		const struct pipe_blend_color *blend_color)
+{
+	struct fd_context *ctx = fd_context(pctx);
+	ctx->blend_color = *blend_color;
+	ctx->dirty |= FD_DIRTY_BLEND_COLOR;
+}
+
+static void
+fd_set_stencil_ref(struct pipe_context *pctx,
+		const struct pipe_stencil_ref *stencil_ref)
+{
+	struct fd_context *ctx = fd_context(pctx);
+	ctx->stencil_ref =* stencil_ref;
+	ctx->dirty |= FD_DIRTY_STENCIL_REF;
+}
+
+static void
+fd_set_clip_state(struct pipe_context *pctx,
+		const struct pipe_clip_state *clip)
+{
+	DBG("TODO: ");
+}
+
+static void
+fd_set_sample_mask(struct pipe_context *pctx, unsigned sample_mask)
+{
+	struct fd_context *ctx = fd_context(pctx);
+	ctx->sample_mask = (uint16_t)sample_mask;
+	ctx->dirty |= FD_DIRTY_SAMPLE_MASK;
+}
+
+/* notes from calim on #dri-devel:
+ * index==0 will be non-UBO (ie. glUniformXYZ()) all packed together padded
+ * out to vec4's
+ * I should be able to consider that I own the user_ptr until the next
+ * set_constant_buffer() call, at which point I don't really care about the
+ * previous values.
+ * index>0 will be UBO's.. well, I'll worry about that later
+ */
+static void
+fd_set_constant_buffer(struct pipe_context *pctx, uint shader, uint index,
+		struct pipe_constant_buffer *cb)
+{
+	struct fd_context *ctx = fd_context(pctx);
+	struct fd_constbuf_stateobj *so = &ctx->constbuf[shader];
+
+	/* Note that the state tracker can unbind constant buffers by
+	 * passing NULL here.
+	 */
+	if (unlikely(!cb)) {
+		so->enabled_mask &= ~(1 << index);
+		so->dirty_mask &= ~(1 << index);
+		pipe_resource_reference(&so->cb[index].buffer, NULL);
+		return;
+	}
+
+	pipe_resource_reference(&so->cb[index].buffer, cb->buffer);
+	so->cb[index].buffer_offset = cb->buffer_offset;
+	so->cb[index].buffer_size   = cb->buffer_size;
+	so->cb[index].user_buffer   = cb->user_buffer;
+
+	so->enabled_mask |= 1 << index;
+	so->dirty_mask |= 1 << index;
+	ctx->dirty |= FD_DIRTY_CONSTBUF;
+}
+
+static void
+fd_set_framebuffer_state(struct pipe_context *pctx,
+		const struct pipe_framebuffer_state *framebuffer)
+{
+	struct fd_context *ctx = fd_context(pctx);
+	struct pipe_framebuffer_state *cso = &ctx->framebuffer.base;
+	unsigned i;
+
+	DBG("%d: cbufs[0]=%p, zsbuf=%p", ctx->needs_flush,
+			cso->cbufs[0], cso->zsbuf);
+
+	fd_context_render(pctx);
+
+	for (i = 0; i < framebuffer->nr_cbufs; i++)
+		pipe_surface_reference(&cso->cbufs[i], framebuffer->cbufs[i]);
+	for (; i < ctx->framebuffer.base.nr_cbufs; i++)
+		pipe_surface_reference(&cso->cbufs[i], NULL);
+
+	cso->nr_cbufs = framebuffer->nr_cbufs;
+	cso->width = framebuffer->width;
+	cso->height = framebuffer->height;
+
+	pipe_surface_reference(&cso->zsbuf, framebuffer->zsbuf);
+
+	if (cso->nr_cbufs > 0)
+		fd_gmem_calculate_tiles(pctx);
+
+	ctx->dirty |= FD_DIRTY_FRAMEBUFFER;
+}
+
+static void
+fd_set_polygon_stipple(struct pipe_context *pctx,
+		const struct pipe_poly_stipple *stipple)
+{
+	struct fd_context *ctx = fd_context(pctx);
+	ctx->stipple = *stipple;
+	ctx->dirty |= FD_DIRTY_STIPPLE;
+}
+
+static void
+fd_set_scissor_state(struct pipe_context *pctx,
+		const struct pipe_scissor_state *scissor)
+{
+	struct fd_context *ctx = fd_context(pctx);
+
+	ctx->scissor = *scissor;
+	ctx->dirty |= FD_DIRTY_SCISSOR;
+}
+
+static void
+fd_set_viewport_state(struct pipe_context *pctx,
+		const struct pipe_viewport_state *viewport)
+{
+	struct fd_context *ctx = fd_context(pctx);
+	ctx->viewport = *viewport;
+	ctx->dirty |= FD_DIRTY_VIEWPORT;
+}
+
+static void
+fd_set_vertex_buffers(struct pipe_context *pctx,
+		unsigned start_slot, unsigned count,
+		const struct pipe_vertex_buffer *vb)
+{
+	struct fd_context *ctx = fd_context(pctx);
+	struct fd_vertexbuf_stateobj *so = &ctx->vertexbuf;
+
+	util_set_vertex_buffers_mask(so->vb, &so->enabled_mask, vb, start_slot, count);
+	so->count = util_last_bit(so->enabled_mask);
+
+	ctx->dirty |= FD_DIRTY_VERTEXBUF;
+}
+
+static void
+fd_set_index_buffer(struct pipe_context *pctx,
+		const struct pipe_index_buffer *ib)
+{
+	struct fd_context *ctx = fd_context(pctx);
+
+	if (ib) {
+		pipe_resource_reference(&ctx->indexbuf.buffer, ib->buffer);
+		ctx->indexbuf.index_size = ib->index_size;
+		ctx->indexbuf.offset = ib->offset;
+		ctx->indexbuf.user_buffer = ib->user_buffer;
+	} else {
+		pipe_resource_reference(&ctx->indexbuf.buffer, NULL);
+	}
+
+	ctx->dirty |= FD_DIRTY_INDEXBUF;
+}
+
+void
+fd_state_init(struct pipe_context *pctx)
+{
+	pctx->set_blend_color = fd_set_blend_color;
+	pctx->set_stencil_ref = fd_set_stencil_ref;
+	pctx->set_clip_state = fd_set_clip_state;
+	pctx->set_sample_mask = fd_set_sample_mask;
+	pctx->set_constant_buffer = fd_set_constant_buffer;
+	pctx->set_framebuffer_state = fd_set_framebuffer_state;
+	pctx->set_polygon_stipple = fd_set_polygon_stipple;
+	pctx->set_scissor_state = fd_set_scissor_state;
+	pctx->set_viewport_state = fd_set_viewport_state;
+
+	pctx->set_vertex_buffers = fd_set_vertex_buffers;
+	pctx->set_index_buffer = fd_set_index_buffer;
+}
+
+/* NOTE: just define the position for const regs statically.. the blob
+ * driver doesn't seem to change these dynamically, and I can't really
+ * think of a good reason to so..
+ */
+#define VS_CONST_BASE 0x20
+#define PS_CONST_BASE 0x120
+
+static void
+emit_constants(struct fd_ringbuffer *ring, uint32_t base,
+		struct fd_constbuf_stateobj *constbuf,
+		struct fd_shader_stateobj *shader)
+{
+	uint32_t enabled_mask = constbuf->enabled_mask;
+	uint32_t start_base = base;
+	unsigned i;
+
+	// XXX TODO only emit dirty consts.. but we need to keep track if
+	// they are clobbered by a clear, gmem2mem, or mem2gmem..
+	constbuf->dirty_mask = enabled_mask;
+
+	/* emit user constants: */
+	while (enabled_mask) {
+		unsigned index = ffs(enabled_mask) - 1;
+		struct pipe_constant_buffer *cb = &constbuf->cb[index];
+		unsigned size = ALIGN(cb->buffer_size, 4) / 4; /* size in dwords */
+
+		// I expect that size should be a multiple of vec4's:
+		assert(size == ALIGN(size, 4));
+
+		/* hmm, sometimes we still seem to end up with consts bound,
+		 * even if shader isn't using them, which ends up overwriting
+		 * const reg's used for immediates.. this is a hack to work
+		 * around that:
+		 */
+		if (shader && ((base - start_base) >= (shader->first_immediate * 4)))
+			break;
+
+		if (constbuf->dirty_mask & (1 << index)) {
+			const uint32_t *dwords;
+
+			if (cb->user_buffer) {
+				dwords = cb->user_buffer;
+			} else {
+				struct fd_resource *rsc = fd_resource(cb->buffer);
+				dwords = fd_bo_map(rsc->bo);
+			}
+
+			dwords = (uint32_t *)(((uint8_t *)dwords) + cb->buffer_offset);
+
+			OUT_PKT3(ring, CP_SET_CONSTANT, size + 1);
+			OUT_RING(ring, base);
+			for (i = 0; i < size; i++)
+				OUT_RING(ring, *(dwords++));
+
+			constbuf->dirty_mask &= ~(1 << index);
+		}
+
+		base += size;
+		enabled_mask &= ~(1 << index);
+	}
+
+	/* emit shader immediates: */
+	if (shader) {
+		for (i = 0; i < shader->num_immediates; i++) {
+			OUT_PKT3(ring, CP_SET_CONSTANT, 5);
+			OUT_RING(ring, base);
+			OUT_RING(ring, shader->immediates[i].val[0]);
+			OUT_RING(ring, shader->immediates[i].val[1]);
+			OUT_RING(ring, shader->immediates[i].val[2]);
+			OUT_RING(ring, shader->immediates[i].val[3]);
+			base += 4;
+		}
+	}
+}
+
+/* this works at least for a220 and earlier.. if later gpu's gain more than
+ * 32 texture units, might need to bump this up to uint64_t
+ */
+typedef uint32_t texmask;
+
+static texmask
+emit_texture(struct fd_ringbuffer *ring, struct fd_context *ctx,
+		struct fd_texture_stateobj *tex, unsigned samp_id, texmask emitted)
+{
+	unsigned const_idx = fd_get_const_idx(ctx, tex, samp_id);
+	struct fd_sampler_stateobj *sampler;
+	struct fd_pipe_sampler_view *view;
+
+	if (emitted & (1 << const_idx))
+		return 0;
+
+	sampler = tex->samplers[samp_id];
+	view = fd_pipe_sampler_view(tex->textures[samp_id]);
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 7);
+	OUT_RING(ring, 0x00010000 + (0x6 * const_idx));
+
+	OUT_RING(ring, sampler->tex0 | view->tex0);
+	OUT_RELOC(ring, view->tex_resource->bo, 0, view->fmt);
+	OUT_RING(ring, view->tex2);
+	OUT_RING(ring, sampler->tex3 | view->tex3);
+	OUT_RING(ring, sampler->tex4);
+	OUT_RING(ring, sampler->tex5);
+
+	return (1 << const_idx);
+}
+
+static void
+emit_textures(struct fd_ringbuffer *ring, struct fd_context *ctx)
+{
+	texmask emitted = 0;
+	unsigned i;
+
+	for (i = 0; i < ctx->verttex.num_samplers; i++)
+		if (ctx->verttex.samplers[i])
+			emitted |= emit_texture(ring, ctx, &ctx->verttex, i, emitted);
+
+	for (i = 0; i < ctx->fragtex.num_samplers; i++)
+		if (ctx->fragtex.samplers[i])
+			emitted |= emit_texture(ring, ctx, &ctx->fragtex, i, emitted);
+}
+
+void
+fd_emit_vertex_bufs(struct fd_ringbuffer *ring, uint32_t val,
+		struct fd_vertex_buf *vbufs, uint32_t n)
+{
+	unsigned i;
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 1 + (2 * n));
+	OUT_RING(ring, (0x1 << 16) | (val & 0xffff));
+	for (i = 0; i < n; i++) {
+		struct fd_resource *rsc = fd_resource(vbufs[i].prsc);
+		OUT_RELOC(ring, rsc->bo, vbufs[i].offset, 3);
+		OUT_RING (ring, vbufs[i].size);
+	}
+}
+
+void
+fd_emit_framebuffer_state(struct fd_ringbuffer *ring,
+		struct fd_framebuffer_stateobj *fb, struct fd_zsa_stateobj *zsa)
+{
+	uint32_t reg, base;
+
+	/* this should be true because bin_w/bin_h should be multiples of 32: */
+	assert(((fb->bin_w * fb->bin_h) % 1024) == 0);
+
+	/* depth/stencil starts after color buffer in GMEM: */
+	base = (fb->bin_w * fb->bin_h) / 1024;
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 4);
+	OUT_RING(ring, CP_REG(REG_RB_SURFACE_INFO));
+	OUT_RING(ring, fb->bin_w);                   /* RB_SURFACE_INFO */
+	OUT_RING(ring, RB_COLOR_INFO_COLOR_SWAP(1) | /* RB_COLOR_INFO */
+			RB_COLOR_INFO_COLOR_FORMAT(fd_pipe2color(fb->base.cbufs[0]->format)));
+	reg = RB_DEPTH_INFO_DEPTH_BASE(ALIGN(base, 4));
+	if (fd_depth_enabled(zsa)) {
+		reg |= RB_DEPTH_INFO_DEPTH_FORMAT(DEPTHX_24_8);
+	} else if (fd_stencil_enabled(zsa)) {
+		reg |= RB_DEPTH_INFO_DEPTH_FORMAT(DEPTHX_16);
+	}
+	OUT_RING(ring, reg);                         /* RB_DEPTH_INFO */
+}
+
+void
+fd_state_emit(struct pipe_context *pctx, uint32_t dirty)
+{
+	struct fd_context *ctx = fd_context(pctx);
+	struct fd_ringbuffer *ring = ctx->ring;
+
+	/* NOTE: we probably want to eventually refactor this so each state
+	 * object handles emitting it's own state..  although the mapping of
+	 * state to registers is not always orthogonal, sometimes a single
+	 * register contains bitfields coming from multiple state objects,
+	 * so not sure the best way to deal with that yet.
+	 */
+
+	if (dirty & FD_DIRTY_SAMPLE_MASK) {
+		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+		OUT_RING(ring, CP_REG(REG_PA_SC_AA_MASK));
+		OUT_RING(ring, ctx->sample_mask);
+	}
+
+	if (dirty & FD_DIRTY_ZSA) {
+		struct pipe_stencil_ref *sr = &ctx->stencil_ref;
+
+		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+		OUT_RING(ring, CP_REG(REG_RB_DEPTHCONTROL));
+		OUT_RING(ring, ctx->zsa->rb_depthcontrol);
+
+		OUT_PKT3(ring, CP_SET_CONSTANT, 4);
+		OUT_RING(ring, CP_REG(REG_RB_STENCILREFMASK_BF));
+		OUT_RING(ring, ctx->zsa->rb_stencilrefmask_bf |
+				RB_STENCILREFMASK_STENCILREF(sr->ref_value[1]));
+		OUT_RING(ring, ctx->zsa->rb_stencilrefmask |
+				RB_STENCILREFMASK_STENCILREF(sr->ref_value[0]));
+		OUT_RING(ring, ctx->zsa->rb_alpha_ref);
+	}
+
+	if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_FRAMEBUFFER)) {
+		OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+		OUT_RING(ring, CP_REG(REG_PA_CL_CLIP_CNTL));
+		OUT_RING(ring, ctx->rasterizer->pa_cl_clip_cntl);
+		OUT_RING(ring, ctx->rasterizer->pa_su_sc_mode_cntl |
+				ctx->framebuffer.pa_su_sc_mode_cntl);
+
+		OUT_PKT3(ring, CP_SET_CONSTANT, 5);
+		OUT_RING(ring, CP_REG(REG_PA_SU_POINT_SIZE));
+		OUT_RING(ring, ctx->rasterizer->pa_su_point_size);
+		OUT_RING(ring, ctx->rasterizer->pa_su_point_minmax);
+		OUT_RING(ring, ctx->rasterizer->pa_su_line_cntl);
+		OUT_RING(ring, ctx->rasterizer->pa_sc_line_stipple);
+
+		OUT_PKT3(ring, CP_SET_CONSTANT, 6);
+		OUT_RING(ring, CP_REG(REG_PA_SU_VTX_CNTL));
+		OUT_RING(ring, ctx->rasterizer->pa_su_vtx_cntl);
+		OUT_RING(ring, f2d(1.0));                /* PA_CL_GB_VERT_CLIP_ADJ */
+		OUT_RING(ring, f2d(1.0));                /* PA_CL_GB_VERT_DISC_ADJ */
+		OUT_RING(ring, f2d(1.0));                /* PA_CL_GB_HORZ_CLIP_ADJ */
+		OUT_RING(ring, f2d(1.0));                /* PA_CL_GB_HORZ_DISC_ADJ */
+	}
+
+	if (dirty & FD_DIRTY_FRAMEBUFFER)
+		fd_emit_framebuffer_state(ring, &ctx->framebuffer, ctx->zsa);
+
+	if (dirty & FD_DIRTY_SCISSOR) {
+		OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+		OUT_RING(ring, CP_REG(REG_PA_SC_WINDOW_SCISSOR_TL));
+		OUT_RING(ring, xy2d(ctx->scissor.minx,   /* PA_SC_WINDOW_SCISSOR_TL */
+				ctx->scissor.miny));
+		OUT_RING(ring, xy2d(ctx->scissor.maxx,   /* PA_SC_WINDOW_SCISSOR_BR */
+				ctx->scissor.maxy));
+	}
+
+	if (dirty & FD_DIRTY_VIEWPORT) {
+		OUT_PKT3(ring, CP_SET_CONSTANT, 7);
+		OUT_RING(ring, CP_REG(REG_PA_CL_VPORT_XSCALE));
+		OUT_RING(ring, f2d(ctx->viewport.scale[0]));       /* PA_CL_VPORT_XSCALE */
+		OUT_RING(ring, f2d(ctx->viewport.translate[0]));   /* PA_CL_VPORT_XOFFSET */
+		OUT_RING(ring, f2d(ctx->viewport.scale[1]));       /* PA_CL_VPORT_YSCALE */
+		OUT_RING(ring, f2d(ctx->viewport.translate[1]));   /* PA_CL_VPORT_YOFFSET */
+		OUT_RING(ring, f2d(ctx->viewport.scale[2]));       /* PA_CL_VPORT_ZSCALE */
+		OUT_RING(ring, f2d(ctx->viewport.translate[2]));   /* PA_CL_VPORT_ZOFFSET */
+
+		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+		OUT_RING(ring, CP_REG(REG_PA_CL_VTE_CNTL));
+		OUT_RING(ring, PA_CL_VTE_CNTL_VTX_W0_FMT |
+				PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA |
+				PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA |
+				PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA |
+				PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA |
+				PA_CL_VTE_CNTL_VPORT_Z_SCALE_ENA |
+				PA_CL_VTE_CNTL_VPORT_Z_OFFSET_ENA);
+	}
+
+	if (dirty & (FD_DIRTY_PROG | FD_DIRTY_VTX | FD_DIRTY_VERTTEX | FD_DIRTY_FRAGTEX)) {
+		fd_program_validate(ctx);
+		fd_program_emit(ring, &ctx->prog);
+	}
+
+	if (dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) {
+		emit_constants(ring,  VS_CONST_BASE * 4,
+				&ctx->constbuf[PIPE_SHADER_VERTEX],
+				(dirty & FD_DIRTY_PROG) ? ctx->prog.vp : NULL);
+		emit_constants(ring, PS_CONST_BASE * 4,
+				&ctx->constbuf[PIPE_SHADER_FRAGMENT],
+				(dirty & FD_DIRTY_PROG) ? ctx->prog.fp : NULL);
+	}
+
+	if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_ZSA)) {
+		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+		OUT_RING(ring, CP_REG(REG_RB_COLORCONTROL));
+		OUT_RING(ring, ctx->zsa->rb_colorcontrol | ctx->blend->rb_colorcontrol);
+	}
+
+	if (dirty & FD_DIRTY_BLEND) {
+		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+		OUT_RING(ring, CP_REG(REG_RB_BLEND_CONTROL));
+		OUT_RING(ring, ctx->blend->rb_blendcontrol);
+	}
+
+	if (dirty & (FD_DIRTY_VERTTEX | FD_DIRTY_FRAGTEX | FD_DIRTY_PROG))
+		emit_textures(ring, ctx);
+
+	ctx->dirty &= ~dirty;
+}
+
+/* emit per-context initialization:
+ */
+void
+fd_state_emit_setup(struct pipe_context *pctx)
+{
+	struct fd_context *ctx = fd_context(pctx);
+	struct fd_ringbuffer *ring = ctx->ring;
+
+	OUT_PKT0(ring, REG_TP0_CHICKEN, 1);
+	OUT_RING(ring, 0x00000002);
+
+	OUT_PKT3(ring, CP_INVALIDATE_STATE, 1);
+	OUT_RING(ring, 0x00007fff);
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_SQ_VS_CONST));
+	OUT_RING(ring, SQ_VS_CONST_BASE(VS_CONST_BASE) |
+			SQ_VS_CONST_SIZE(0x100));
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_SQ_PS_CONST));
+	OUT_RING(ring, SQ_PS_CONST_BASE(PS_CONST_BASE) |
+			SQ_PS_CONST_SIZE(0xe0));
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+	OUT_RING(ring, CP_REG(REG_VGT_MAX_VTX_INDX));
+	OUT_RING(ring, 0xffffffff);        /* VGT_MAX_VTX_INDX */
+	OUT_RING(ring, 0x00000000);        /* VGT_MIN_VTX_INDX */
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_VGT_INDX_OFFSET));
+	OUT_RING(ring, 0x00000000);
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_VGT_VERTEX_REUSE_BLOCK_CNTL));
+	OUT_RING(ring, 0x0000003b);
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_SQ_CONTEXT_MISC));
+	OUT_RING(ring, SQ_CONTEXT_MISC_SC_SAMPLE_CNTL(CENTERS_ONLY));
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_SQ_INTERPOLATOR_CNTL));
+	OUT_RING(ring, 0xffffffff);
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_PA_SC_AA_CONFIG));
+	OUT_RING(ring, 0x00000000);
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_PA_SC_LINE_CNTL));
+	OUT_RING(ring, 0x00000000);
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_PA_SC_WINDOW_OFFSET));
+	OUT_RING(ring, 0x00000000);
+
+	// XXX we change this dynamically for draw/clear.. vs gmem<->mem..
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_RB_MODECONTROL));
+	OUT_RING(ring, RB_MODECONTROL_EDRAM_MODE(COLOR_DEPTH));
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_RB_SAMPLE_POS));
+	OUT_RING(ring, 0x88888888);
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_RB_COLOR_DEST_MASK));
+	OUT_RING(ring, 0xffffffff);
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_RB_COPY_DEST_INFO));
+	OUT_RING(ring, RB_COPY_DEST_INFO_FORMAT(COLORX_4_4_4_4) |
+			RB_COPY_DEST_INFO_WRITE_RED |
+			RB_COPY_DEST_INFO_WRITE_GREEN |
+			RB_COPY_DEST_INFO_WRITE_BLUE |
+			RB_COPY_DEST_INFO_WRITE_ALPHA);
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+	OUT_RING(ring, CP_REG(REG_SQ_WRAPPING_0));
+	OUT_RING(ring, 0x00000000);        /* SQ_WRAPPING_0 */
+	OUT_RING(ring, 0x00000000);        /* SQ_WRAPPING_1 */
+
+	OUT_PKT3(ring, CP_SET_DRAW_INIT_FLAGS, 1);
+	OUT_RING(ring, 0x00000000);
+
+	OUT_PKT3(ring, CP_WAIT_REG_EQ, 4);
+	OUT_RING(ring, 0x000005d0);
+	OUT_RING(ring, 0x00000000);
+	OUT_RING(ring, 0x5f601000);
+	OUT_RING(ring, 0x00000001);
+
+	OUT_PKT0(ring, REG_SQ_INST_STORE_MANAGMENT, 1);
+	OUT_RING(ring, 0x00000180);
+
+	OUT_PKT3(ring, CP_INVALIDATE_STATE, 1);
+	OUT_RING(ring, 0x00000300);
+
+	OUT_PKT3(ring, CP_SET_SHADER_BASES, 1);
+	OUT_RING(ring, 0x80000180);
+
+	/* not sure what this form of CP_SET_CONSTANT is.. */
+	OUT_PKT3(ring, CP_SET_CONSTANT, 13);
+	OUT_RING(ring, 0x00000000);
+	OUT_RING(ring, 0x00000000);
+	OUT_RING(ring, 0x00000000);
+	OUT_RING(ring, 0x00000000);
+	OUT_RING(ring, 0x00000000);
+	OUT_RING(ring, 0x469c4000);
+	OUT_RING(ring, 0x3f800000);
+	OUT_RING(ring, 0x3f000000);
+	OUT_RING(ring, 0x00000000);
+	OUT_RING(ring, 0x40000000);
+	OUT_RING(ring, 0x3f400000);
+	OUT_RING(ring, 0x3ec00000);
+	OUT_RING(ring, 0x3e800000);
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_RB_COLOR_MASK));
+	OUT_RING(ring, RB_COLOR_MASK_WRITE_RED |
+			RB_COLOR_MASK_WRITE_GREEN |
+			RB_COLOR_MASK_WRITE_BLUE |
+			RB_COLOR_MASK_WRITE_ALPHA);
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 5);
+	OUT_RING(ring, CP_REG(REG_RB_BLEND_RED));
+	OUT_RING(ring, 0x00000000);        /* RB_BLEND_RED */
+	OUT_RING(ring, 0x00000000);        /* RB_BLEND_GREEN */
+	OUT_RING(ring, 0x00000000);        /* RB_BLEND_BLUE */
+	OUT_RING(ring, 0x000000ff);        /* RB_BLEND_ALPHA */
+
+	fd_ringbuffer_flush(ring);
+	fd_ringmarker_mark(ctx->draw_start);
+}
diff --git a/src/gallium/drivers/freedreno/freedreno_state.h b/src/gallium/drivers/freedreno/freedreno_state.h
new file mode 100644
index 0000000..73328eb
--- /dev/null
+++ b/src/gallium/drivers/freedreno/freedreno_state.h
@@ -0,0 +1,53 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark at freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark at freedesktop.org>
+ */
+
+#ifndef FREEDRENO_STATE_H_
+#define FREEDRENO_STATE_H_
+
+#include "pipe/p_context.h"
+
+struct fd_vertexbuf_stateobj;
+struct fd_zsa_stateobj;
+struct fd_framebuffer_stateobj;
+struct fd_ringbuffer;
+
+void fd_state_init(struct pipe_context *pctx);
+
+struct fd_vertex_buf {
+	unsigned offset, size;
+	struct pipe_resource *prsc;
+};
+
+void fd_emit_vertex_bufs(struct fd_ringbuffer *ring, uint32_t val,
+		struct fd_vertex_buf *vbufs, uint32_t n);
+void fd_emit_framebuffer_state(struct fd_ringbuffer *ring,
+		struct fd_framebuffer_stateobj *fb, struct fd_zsa_stateobj *zsa);
+void fd_state_emit(struct pipe_context *pctx, uint32_t dirty);
+void fd_state_emit_setup(struct pipe_context *pctx);
+
+#endif /* FREEDRENO_STATE_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_surface.c b/src/gallium/drivers/freedreno/freedreno_surface.c
new file mode 100644
index 0000000..250fe4b
--- /dev/null
+++ b/src/gallium/drivers/freedreno/freedreno_surface.c
@@ -0,0 +1,73 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark at freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark at freedesktop.org>
+ */
+
+#include "freedreno_surface.h"
+#include "freedreno_resource.h"
+#include "freedreno_util.h"
+
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+
+struct pipe_surface *
+fd_create_surface(struct pipe_context *pctx,
+		struct pipe_resource *ptex,
+		const struct pipe_surface *surf_tmpl)
+{
+//	struct fd_resource* tex = fd_resource(ptex);
+	struct fd_surface* surface = CALLOC_STRUCT(fd_surface);
+
+	assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer);
+
+	if (surface) {
+		struct pipe_surface *psurf = &surface->base;
+		unsigned level = surf_tmpl->u.tex.level;
+
+		pipe_reference_init(&psurf->reference, 1);
+		pipe_resource_reference(&psurf->texture, ptex);
+
+		psurf->context = pctx;
+		psurf->format = surf_tmpl->format;
+		psurf->width = u_minify(ptex->width0, level);
+		psurf->height = u_minify(ptex->height0, level);
+		psurf->u.tex.level = level;
+		psurf->u.tex.first_layer = surf_tmpl->u.tex.first_layer;
+		psurf->u.tex.last_layer = surf_tmpl->u.tex.last_layer;
+
+		// TODO
+		DBG("TODO: %ux%u", psurf->width, psurf->height);
+	}
+
+	return &surface->base;
+}
+
+void
+fd_surface_destroy(struct pipe_context *pctx, struct pipe_surface *psurf)
+{
+	pipe_resource_reference(&psurf->texture, NULL);
+	FREE(psurf);
+}
diff --git a/src/gallium/drivers/freedreno/freedreno_surface.h b/src/gallium/drivers/freedreno/freedreno_surface.h
new file mode 100644
index 0000000..3293f33
--- /dev/null
+++ b/src/gallium/drivers/freedreno/freedreno_surface.h
@@ -0,0 +1,54 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark at freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark at freedesktop.org>
+ */
+
+#ifndef FREEDRENO_SURFACE_H_
+#define FREEDRENO_SURFACE_H_
+
+#include "pipe/p_state.h"
+
+struct fd_surface {
+	struct pipe_surface base;
+	uint32_t offset;
+	uint32_t pitch;
+	uint32_t width;
+	uint16_t height;
+	uint16_t depth;
+};
+
+static INLINE struct fd_surface *
+fd_surface(struct pipe_surface *psurf)
+{
+	return (struct fd_surface *)psurf;
+}
+
+struct pipe_surface* fd_create_surface(struct pipe_context *pctx,
+		struct pipe_resource *ptex,
+		const struct pipe_surface *surf_tmpl);
+void fd_surface_destroy(struct pipe_context *pctx, struct pipe_surface *psurf);
+
+#endif /* FREEDRENO_SURFACE_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_texture.c b/src/gallium/drivers/freedreno/freedreno_texture.c
new file mode 100644
index 0000000..07bfbd3
--- /dev/null
+++ b/src/gallium/drivers/freedreno/freedreno_texture.c
@@ -0,0 +1,286 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark at freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark at freedesktop.org>
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+
+#include "freedreno_texture.h"
+#include "freedreno_util.h"
+
+static enum sq_tex_clamp
+tex_clamp(unsigned wrap)
+{
+	switch (wrap) {
+	case PIPE_TEX_WRAP_REPEAT:
+		return SQ_TEX_WRAP;
+	case PIPE_TEX_WRAP_CLAMP:
+		return SQ_TEX_CLAMP_HALF_BORDER;
+	case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+		return SQ_TEX_CLAMP_LAST_TEXEL;
+	case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+		return SQ_TEX_CLAMP_BORDER;
+	case PIPE_TEX_WRAP_MIRROR_REPEAT:
+		return SQ_TEX_MIRROR;
+	case PIPE_TEX_WRAP_MIRROR_CLAMP:
+		return SQ_TEX_MIRROR_ONCE_HALF_BORDER;
+	case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+		return SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
+	case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+		return SQ_TEX_MIRROR_ONCE_BORDER;
+	default:
+		DBG("invalid wrap: %u", wrap);
+		return 0;
+	}
+}
+
+static enum sq_tex_filter
+tex_filter(unsigned filter)
+{
+	switch (filter) {
+	case PIPE_TEX_FILTER_NEAREST:
+		return SQ_TEX_FILTER_POINT;
+	case PIPE_TEX_FILTER_LINEAR:
+		return SQ_TEX_FILTER_BILINEAR;
+	default:
+		DBG("invalid filter: %u", filter);
+		return 0;
+	}
+}
+
+static void *
+fd_sampler_state_create(struct pipe_context *pctx,
+		const struct pipe_sampler_state *cso)
+{
+	struct fd_sampler_stateobj *so = CALLOC_STRUCT(fd_sampler_stateobj);
+
+	if (!so)
+		return NULL;
+
+	so->base = *cso;
+
+	/* SQ_TEX0_PITCH() must be OR'd in later when we know the bound texture: */
+	so->tex0 =
+		SQ_TEX0_CLAMP_X(tex_clamp(cso->wrap_s)) |
+		SQ_TEX0_CLAMP_Y(tex_clamp(cso->wrap_t)) |
+		SQ_TEX0_CLAMP_Z(tex_clamp(cso->wrap_r));
+
+	so->tex3 =
+		SQ_TEX3_XY_MAG_FILTER(tex_filter(cso->mag_img_filter)) |
+		SQ_TEX3_XY_MIN_FILTER(tex_filter(cso->min_img_filter));
+
+	so->tex4 = 0x00000000; /* ??? */
+	so->tex5 = 0x00000200; /* ??? */
+
+	return so;
+}
+
+static void
+fd_sampler_state_delete(struct pipe_context *pctx, void *hwcso)
+{
+	FREE(hwcso);
+}
+
+static struct pipe_sampler_view *
+fd_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
+		const struct pipe_sampler_view *cso)
+{
+	struct fd_pipe_sampler_view *so = CALLOC_STRUCT(fd_pipe_sampler_view);
+	struct fd_resource *rsc = fd_resource(prsc);
+
+	if (!so)
+		return NULL;
+
+	so->base = *cso;
+	pipe_reference(NULL, &prsc->reference);
+	so->base.texture = prsc;
+	so->base.reference.count = 1;
+	so->base.context = pctx;
+
+	so->tex_resource =  rsc;
+	so->fmt = fd_pipe2surface(cso->format);
+
+	so->tex0 = SQ_TEX0_PITCH(rsc->pitch);
+	so->tex2 =
+		SQ_TEX2_HEIGHT(prsc->height0) |
+		SQ_TEX2_WIDTH(prsc->width0);
+	so->tex3 = fd_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g,
+			cso->swizzle_b, cso->swizzle_a);
+
+	return &so->base;
+}
+
+static void
+fd_sampler_view_destroy(struct pipe_context *pctx,
+		struct pipe_sampler_view *view)
+{
+	pipe_resource_reference(&view->texture, NULL);
+	FREE(view);
+}
+
+static void bind_sampler_states(struct fd_texture_stateobj *prog,
+		unsigned nr, void **hwcso)
+{
+	unsigned i;
+
+	for (i = 0; i < nr; i++) {
+		prog->samplers[i] = hwcso[i];
+		prog->dirty_samplers |= (1 << i);
+	}
+
+	for (; i < prog->num_samplers; i++) {
+		prog->samplers[i] = NULL;
+		prog->dirty_samplers |= (1 << i);
+	}
+
+	prog->num_samplers = nr;
+}
+
+static void set_sampler_views(struct fd_texture_stateobj *prog,
+		unsigned nr, struct pipe_sampler_view **views)
+{
+	unsigned i;
+
+	for (i = 0; i < nr; i++) {
+		pipe_sampler_view_reference(&prog->textures[i], views[i]);
+		prog->dirty_samplers |= (1 << i);
+	}
+
+	for (; i < prog->num_textures; i++) {
+		pipe_sampler_view_reference(&prog->textures[i], NULL);
+		prog->dirty_samplers |= (1 << i);
+	}
+
+	prog->num_textures = nr;
+}
+
+static void
+fd_fragtex_sampler_states_bind(struct pipe_context *pctx,
+		unsigned nr, void **hwcso)
+{
+	struct fd_context *ctx = fd_context(pctx);
+	bind_sampler_states(&ctx->fragtex, nr, hwcso);
+	ctx->dirty |= FD_DIRTY_FRAGTEX;
+}
+
+
+static void
+fd_fragtex_set_sampler_views(struct pipe_context *pctx, unsigned nr,
+		struct pipe_sampler_view **views)
+{
+	struct fd_context *ctx = fd_context(pctx);
+	set_sampler_views(&ctx->fragtex, nr, views);
+	ctx->dirty |= FD_DIRTY_FRAGTEX;
+}
+
+static void
+fd_verttex_sampler_states_bind(struct pipe_context *pctx,
+		unsigned nr, void **hwcso)
+{
+	struct fd_context *ctx = fd_context(pctx);
+	bind_sampler_states(&ctx->verttex, nr, hwcso);
+	ctx->dirty |= FD_DIRTY_VERTTEX;
+}
+
+
+static void
+fd_verttex_set_sampler_views(struct pipe_context *pctx, unsigned nr,
+		struct pipe_sampler_view **views)
+{
+	struct fd_context *ctx = fd_context(pctx);
+	set_sampler_views(&ctx->verttex, nr, views);
+	ctx->dirty |= FD_DIRTY_VERTTEX;
+}
+
+static bool
+tex_cmp(struct fd_texture_stateobj *tex1, unsigned samp_id1,
+		struct fd_texture_stateobj *tex2, unsigned samp_id2)
+{
+	if ((samp_id1 >= tex1->num_samplers) ||
+			(samp_id2 >= tex2->num_samplers))
+		return false;
+
+	if ((tex1 == tex2) && (samp_id1 == samp_id2))
+		return true;
+
+	if (tex1->textures[samp_id1]->texture != tex2->textures[samp_id2]->texture)
+		return false;
+
+	if (memcmp(&tex1->samplers[samp_id1]->base, &tex2->samplers[samp_id2]->base,
+			sizeof(tex1->samplers[samp_id1]->base)))
+		return false;
+
+	return true;
+}
+
+/* map gallium sampler-id to hw const-idx.. adreno uses a flat address
+ * space of samplers (const-idx), so we need to map the gallium sampler-id
+ * which is per-shader to a global const-idx space.
+ */
+unsigned
+fd_get_const_idx(struct fd_context *ctx, struct fd_texture_stateobj *tex,
+		unsigned samp_id)
+{
+	unsigned i, const_idx = 0;
+
+	/* TODO maybe worth having some sort of cache, because we need to
+	 * do this loop thru all the samplers both when patching shaders
+	 * and also when emitting sampler state..
+	 */
+
+	for (i = 0; i < ctx->verttex.num_samplers; i++) {
+		if (tex_cmp(&ctx->verttex, i, tex, samp_id))
+			return const_idx;
+		const_idx++;
+	}
+
+	for (i = 0; i < ctx->fragtex.num_samplers; i++) {
+		if (tex_cmp(&ctx->fragtex, i, tex, samp_id))
+			return const_idx;
+		const_idx++;
+	}
+
+	return const_idx;
+}
+
+void
+fd_texture_init(struct pipe_context *pctx)
+{
+	pctx->create_sampler_state = fd_sampler_state_create;
+	pctx->delete_sampler_state = fd_sampler_state_delete;
+
+	pctx->create_sampler_view = fd_sampler_view_create;
+	pctx->sampler_view_destroy = fd_sampler_view_destroy;
+
+	pctx->bind_fragment_sampler_states = fd_fragtex_sampler_states_bind;
+	pctx->set_fragment_sampler_views = fd_fragtex_set_sampler_views;
+
+	pctx->bind_vertex_sampler_states = fd_verttex_sampler_states_bind;
+	pctx->set_vertex_sampler_views = fd_verttex_set_sampler_views;
+}
diff --git a/src/gallium/drivers/freedreno/freedreno_texture.h b/src/gallium/drivers/freedreno/freedreno_texture.h
new file mode 100644
index 0000000..32bdb03
--- /dev/null
+++ b/src/gallium/drivers/freedreno/freedreno_texture.h
@@ -0,0 +1,61 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark at freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark at freedesktop.org>
+ */
+
+#ifndef FREEDRENO_TEXTURE_H_
+#define FREEDRENO_TEXTURE_H_
+
+#include "pipe/p_context.h"
+
+#include "freedreno_context.h"
+#include "freedreno_resource.h"
+#include "freedreno_util.h"
+
+struct fd_sampler_stateobj {
+	struct pipe_sampler_state base;
+	uint32_t tex0, tex3, tex4, tex5;
+};
+
+struct fd_pipe_sampler_view {
+	struct pipe_sampler_view base;
+	struct fd_resource *tex_resource;
+	enum sq_surfaceformat fmt;
+	uint32_t tex0, tex2, tex3;
+};
+
+static INLINE struct fd_pipe_sampler_view *
+fd_pipe_sampler_view(struct pipe_sampler_view *pview)
+{
+	return (struct fd_pipe_sampler_view *)pview;
+}
+
+unsigned fd_get_const_idx(struct fd_context *ctx,
+		struct fd_texture_stateobj *tex, unsigned samp_id);
+
+void fd_texture_init(struct pipe_context *pctx);
+
+#endif /* FREEDRENO_TEXTURE_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_util.c b/src/gallium/drivers/freedreno/freedreno_util.c
new file mode 100644
index 0000000..3bc3e79
--- /dev/null
+++ b/src/gallium/drivers/freedreno/freedreno_util.c
@@ -0,0 +1,351 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark at freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark at freedesktop.org>
+ */
+
+#include "pipe/p_defines.h"
+#include "util/u_format.h"
+
+#include "freedreno_util.h"
+
+enum sq_surfaceformat
+fd_pipe2surface(enum pipe_format format)
+{
+	switch (format) {
+	/* 8-bit buffers. */
+	case PIPE_FORMAT_A8_UNORM:
+	case PIPE_FORMAT_A8_SNORM:
+	case PIPE_FORMAT_A8_UINT:
+	case PIPE_FORMAT_A8_SINT:
+	case PIPE_FORMAT_I8_UNORM:
+	case PIPE_FORMAT_I8_SNORM:
+	case PIPE_FORMAT_I8_UINT:
+	case PIPE_FORMAT_I8_SINT:
+	case PIPE_FORMAT_L8_UNORM:
+	case PIPE_FORMAT_L8_SNORM:
+	case PIPE_FORMAT_L8_UINT:
+	case PIPE_FORMAT_L8_SINT:
+	case PIPE_FORMAT_L8_SRGB:
+	case PIPE_FORMAT_R8_UNORM:
+	case PIPE_FORMAT_R8_SNORM:
+	case PIPE_FORMAT_R8_UINT:
+	case PIPE_FORMAT_R8_SINT:
+		return FMT_8;
+
+	/* 16-bit buffers. */
+	case PIPE_FORMAT_B5G6R5_UNORM:
+		return FMT_5_6_5;
+	case PIPE_FORMAT_B5G5R5A1_UNORM:
+	case PIPE_FORMAT_B5G5R5X1_UNORM:
+		return FMT_1_5_5_5;
+	case PIPE_FORMAT_B4G4R4A4_UNORM:
+	case PIPE_FORMAT_B4G4R4X4_UNORM:
+		return FMT_4_4_4_4;
+	case PIPE_FORMAT_Z16_UNORM:
+		return FMT_16;
+	case PIPE_FORMAT_L8A8_UNORM:
+	case PIPE_FORMAT_L8A8_SNORM:
+	case PIPE_FORMAT_L8A8_UINT:
+	case PIPE_FORMAT_L8A8_SINT:
+	case PIPE_FORMAT_L8A8_SRGB:
+	case PIPE_FORMAT_R8G8_UNORM:
+	case PIPE_FORMAT_R8G8_SNORM:
+	case PIPE_FORMAT_R8G8_UINT:
+	case PIPE_FORMAT_R8G8_SINT:
+		return FMT_8_8;
+	case PIPE_FORMAT_R16_UNORM:
+	case PIPE_FORMAT_R16_SNORM:
+	case PIPE_FORMAT_R16_UINT:
+	case PIPE_FORMAT_R16_SINT:
+	case PIPE_FORMAT_A16_UNORM:
+	case PIPE_FORMAT_A16_SNORM:
+	case PIPE_FORMAT_A16_UINT:
+	case PIPE_FORMAT_A16_SINT:
+	case PIPE_FORMAT_L16_UNORM:
+	case PIPE_FORMAT_L16_SNORM:
+	case PIPE_FORMAT_L16_UINT:
+	case PIPE_FORMAT_L16_SINT:
+	case PIPE_FORMAT_I16_UNORM:
+	case PIPE_FORMAT_I16_SNORM:
+	case PIPE_FORMAT_I16_UINT:
+	case PIPE_FORMAT_I16_SINT:
+		return FMT_16;
+	case PIPE_FORMAT_R16_FLOAT:
+	case PIPE_FORMAT_A16_FLOAT:
+	case PIPE_FORMAT_L16_FLOAT:
+	case PIPE_FORMAT_I16_FLOAT:
+		return FMT_16_FLOAT;
+
+	/* 32-bit buffers. */
+	case PIPE_FORMAT_A8B8G8R8_SRGB:
+	case PIPE_FORMAT_A8B8G8R8_UNORM:
+	case PIPE_FORMAT_A8R8G8B8_UNORM:
+	case PIPE_FORMAT_B8G8R8A8_SRGB:
+	case PIPE_FORMAT_B8G8R8A8_UNORM:
+	case PIPE_FORMAT_B8G8R8X8_UNORM:
+	case PIPE_FORMAT_R8G8B8A8_SNORM:
+	case PIPE_FORMAT_R8G8B8A8_UNORM:
+	case PIPE_FORMAT_R8G8B8X8_UNORM:
+	case PIPE_FORMAT_R8SG8SB8UX8U_NORM:
+	case PIPE_FORMAT_X8B8G8R8_UNORM:
+	case PIPE_FORMAT_X8R8G8B8_UNORM:
+	case PIPE_FORMAT_R8G8B8_UNORM:
+	case PIPE_FORMAT_R8G8B8A8_SINT:
+	case PIPE_FORMAT_R8G8B8A8_UINT:
+		return FMT_8_8_8_8;
+	case PIPE_FORMAT_R10G10B10A2_UNORM:
+	case PIPE_FORMAT_R10G10B10X2_SNORM:
+	case PIPE_FORMAT_B10G10R10A2_UNORM:
+	case PIPE_FORMAT_B10G10R10A2_UINT:
+	case PIPE_FORMAT_R10SG10SB10SA2U_NORM:
+		return FMT_2_10_10_10;
+	case PIPE_FORMAT_Z24X8_UNORM:
+	case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+		return FMT_24_8;
+	case PIPE_FORMAT_R32_UINT:
+	case PIPE_FORMAT_R32_SINT:
+	case PIPE_FORMAT_A32_UINT:
+	case PIPE_FORMAT_A32_SINT:
+	case PIPE_FORMAT_L32_UINT:
+	case PIPE_FORMAT_L32_SINT:
+	case PIPE_FORMAT_I32_UINT:
+	case PIPE_FORMAT_I32_SINT:
+		return FMT_32;
+	case PIPE_FORMAT_R32_FLOAT:
+	case PIPE_FORMAT_A32_FLOAT:
+	case PIPE_FORMAT_L32_FLOAT:
+	case PIPE_FORMAT_I32_FLOAT:
+	case PIPE_FORMAT_Z32_FLOAT:
+		return FMT_32_FLOAT;
+	case PIPE_FORMAT_R16G16_FLOAT:
+	case PIPE_FORMAT_L16A16_FLOAT:
+		return FMT_16_16_FLOAT;
+	case PIPE_FORMAT_R16G16_UNORM:
+	case PIPE_FORMAT_R16G16_SNORM:
+	case PIPE_FORMAT_R16G16_UINT:
+	case PIPE_FORMAT_R16G16_SINT:
+	case PIPE_FORMAT_L16A16_UNORM:
+	case PIPE_FORMAT_L16A16_SNORM:
+	case PIPE_FORMAT_L16A16_UINT:
+	case PIPE_FORMAT_L16A16_SINT:
+		return FMT_16_16;
+
+	/* 64-bit buffers. */
+	case PIPE_FORMAT_R16G16B16A16_UINT:
+	case PIPE_FORMAT_R16G16B16A16_SINT:
+	case PIPE_FORMAT_R16G16B16A16_UNORM:
+	case PIPE_FORMAT_R16G16B16A16_SNORM:
+		return FMT_16_16_16_16;
+	case PIPE_FORMAT_R16G16B16A16_FLOAT:
+		return FMT_16_16_16_16_FLOAT;
+	case PIPE_FORMAT_R32G32_FLOAT:
+	case PIPE_FORMAT_L32A32_FLOAT:
+		return FMT_32_32_FLOAT;
+	case PIPE_FORMAT_R32G32_SINT:
+	case PIPE_FORMAT_R32G32_UINT:
+	case PIPE_FORMAT_L32A32_UINT:
+	case PIPE_FORMAT_L32A32_SINT:
+		return FMT_32_32;
+
+	/* 96-bit buffers. */
+	case PIPE_FORMAT_R32G32B32_FLOAT:
+		return FMT_32_32_32_FLOAT;
+
+	/* 128-bit buffers. */
+	case PIPE_FORMAT_R32G32B32A32_SNORM:
+	case PIPE_FORMAT_R32G32B32A32_UNORM:
+	case PIPE_FORMAT_R32G32B32A32_SINT:
+	case PIPE_FORMAT_R32G32B32A32_UINT:
+		return FMT_32_32_32_32;
+	case PIPE_FORMAT_R32G32B32A32_FLOAT:
+		return FMT_32_32_32_32_FLOAT;
+
+	/* YUV buffers. */
+	case PIPE_FORMAT_UYVY:
+		return FMT_Cr_Y1_Cb_Y0;
+	case PIPE_FORMAT_YUYV:
+		return FMT_Y1_Cr_Y0_Cb;
+
+	default:
+		return FMT_INVALID;
+	}
+}
+
+enum rb_colorformatx
+fd_pipe2color(enum pipe_format format)
+{
+	switch (format) {
+	/* 8-bit buffers. */
+	case PIPE_FORMAT_A8_UNORM:
+	case PIPE_FORMAT_A8_SNORM:
+	case PIPE_FORMAT_A8_UINT:
+	case PIPE_FORMAT_A8_SINT:
+	case PIPE_FORMAT_I8_UNORM:
+	case PIPE_FORMAT_I8_SNORM:
+	case PIPE_FORMAT_I8_UINT:
+	case PIPE_FORMAT_I8_SINT:
+	case PIPE_FORMAT_L8_UNORM:
+	case PIPE_FORMAT_L8_SNORM:
+	case PIPE_FORMAT_L8_UINT:
+	case PIPE_FORMAT_L8_SINT:
+	case PIPE_FORMAT_L8_SRGB:
+	case PIPE_FORMAT_R8_UNORM:
+	case PIPE_FORMAT_R8_SNORM:
+	case PIPE_FORMAT_R8_UINT:
+	case PIPE_FORMAT_R8_SINT:
+		return COLORX_8;
+
+	/* 16-bit buffers. */
+	case PIPE_FORMAT_B5G6R5_UNORM:
+		return COLORX_5_6_5;
+	case PIPE_FORMAT_B5G5R5A1_UNORM:
+	case PIPE_FORMAT_B5G5R5X1_UNORM:
+		return COLORX_1_5_5_5;
+	case PIPE_FORMAT_B4G4R4A4_UNORM:
+	case PIPE_FORMAT_B4G4R4X4_UNORM:
+		return COLORX_4_4_4_4;
+	case PIPE_FORMAT_L8A8_UNORM:
+	case PIPE_FORMAT_L8A8_SNORM:
+	case PIPE_FORMAT_L8A8_UINT:
+	case PIPE_FORMAT_L8A8_SINT:
+	case PIPE_FORMAT_L8A8_SRGB:
+	case PIPE_FORMAT_R8G8_UNORM:
+	case PIPE_FORMAT_R8G8_SNORM:
+	case PIPE_FORMAT_R8G8_UINT:
+	case PIPE_FORMAT_R8G8_SINT:
+	case PIPE_FORMAT_Z16_UNORM:
+		return COLORX_8_8;
+	case PIPE_FORMAT_R16_FLOAT:
+	case PIPE_FORMAT_A16_FLOAT:
+	case PIPE_FORMAT_L16_FLOAT:
+	case PIPE_FORMAT_I16_FLOAT:
+		return COLORX_16_FLOAT;
+
+	/* 32-bit buffers. */
+	case PIPE_FORMAT_A8B8G8R8_SRGB:
+	case PIPE_FORMAT_A8B8G8R8_UNORM:
+	case PIPE_FORMAT_A8R8G8B8_UNORM:
+	case PIPE_FORMAT_B8G8R8A8_SRGB:
+	case PIPE_FORMAT_B8G8R8A8_UNORM:
+	case PIPE_FORMAT_B8G8R8X8_UNORM:
+	case PIPE_FORMAT_R8G8B8A8_SNORM:
+	case PIPE_FORMAT_R8G8B8A8_UNORM:
+	case PIPE_FORMAT_R8G8B8X8_UNORM:
+	case PIPE_FORMAT_R8SG8SB8UX8U_NORM:
+	case PIPE_FORMAT_X8B8G8R8_UNORM:
+	case PIPE_FORMAT_X8R8G8B8_UNORM:
+	case PIPE_FORMAT_R8G8B8_UNORM:
+	case PIPE_FORMAT_R8G8B8A8_SINT:
+	case PIPE_FORMAT_R8G8B8A8_UINT:
+	case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+	case PIPE_FORMAT_Z24X8_UNORM:
+		return COLORX_8_8_8_8;
+	case PIPE_FORMAT_R32_FLOAT:
+	case PIPE_FORMAT_A32_FLOAT:
+	case PIPE_FORMAT_L32_FLOAT:
+	case PIPE_FORMAT_I32_FLOAT:
+	case PIPE_FORMAT_Z32_FLOAT:
+		return COLORX_32_FLOAT;
+	case PIPE_FORMAT_R16G16_FLOAT:
+	case PIPE_FORMAT_L16A16_FLOAT:
+		return COLORX_16_16_FLOAT;
+
+	/* 64-bit buffers. */
+	case PIPE_FORMAT_R16G16B16A16_FLOAT:
+		return COLORX_16_16_16_16_FLOAT;
+	case PIPE_FORMAT_R32G32_FLOAT:
+	case PIPE_FORMAT_L32A32_FLOAT:
+		return COLORX_32_32_FLOAT;
+
+	/* 128-bit buffers. */
+	case PIPE_FORMAT_R32G32B32A32_FLOAT:
+		return COLORX_32_32_32_32_FLOAT;
+
+	default:
+		return COLORX_INVALID;
+	}
+}
+
+enum rb_depth_format
+fd_pipe2depth(enum pipe_format format)
+{
+	switch (format) {
+	case PIPE_FORMAT_Z16_UNORM:
+		return DEPTHX_16;
+	case PIPE_FORMAT_Z24X8_UNORM:
+	case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+		return DEPTHX_24_8;
+	default:
+		return DEPTHX_INVALID;
+	}
+}
+
+enum pc_di_index_size
+fd_pipe2index(enum pipe_format format)
+{
+	switch (format) {
+	case PIPE_FORMAT_I8_UINT:
+		return INDEX_SIZE_8_BIT;
+	case PIPE_FORMAT_I16_UINT:
+		return INDEX_SIZE_16_BIT;
+	case PIPE_FORMAT_I32_UINT:
+		return INDEX_SIZE_32_BIT;
+	default:
+		return INDEX_SIZE_INVALID;
+	}
+}
+
+static inline enum sq_tex_swiz
+tex_swiz(unsigned swiz)
+{
+	switch (swiz) {
+	default:
+	case PIPE_SWIZZLE_RED:   return SQ_TEX_X;
+	case PIPE_SWIZZLE_GREEN: return SQ_TEX_Y;
+	case PIPE_SWIZZLE_BLUE:  return SQ_TEX_Z;
+	case PIPE_SWIZZLE_ALPHA: return SQ_TEX_W;
+	case PIPE_SWIZZLE_ZERO:  return SQ_TEX_ZERO;
+	case PIPE_SWIZZLE_ONE:   return SQ_TEX_ONE;
+	}
+}
+
+uint32_t
+fd_tex_swiz(enum pipe_format format, unsigned swizzle_r, unsigned swizzle_g,
+		unsigned swizzle_b, unsigned swizzle_a)
+{
+	const struct util_format_description *desc =
+			util_format_description(format);
+	uint8_t swiz[] = {
+			swizzle_r, swizzle_g, swizzle_b, swizzle_a,
+			PIPE_SWIZZLE_ZERO, PIPE_SWIZZLE_ONE,
+			PIPE_SWIZZLE_ONE, PIPE_SWIZZLE_ONE,
+	};
+
+	return SQ_TEX3_SWIZ_X(tex_swiz(swiz[desc->swizzle[0]])) |
+			SQ_TEX3_SWIZ_Y(tex_swiz(swiz[desc->swizzle[1]])) |
+			SQ_TEX3_SWIZ_Z(tex_swiz(swiz[desc->swizzle[2]])) |
+			SQ_TEX3_SWIZ_W(tex_swiz(swiz[desc->swizzle[3]]));
+}
diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h
new file mode 100644
index 0000000..fb1e392
--- /dev/null
+++ b/src/gallium/drivers/freedreno/freedreno_util.h
@@ -0,0 +1,124 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark at freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark at freedesktop.org>
+ */
+
+#ifndef FREEDRENO_UTIL_H_
+#define FREEDRENO_UTIL_H_
+
+#include <freedreno_drmif.h>
+#include <freedreno_ringbuffer.h>
+
+#include "pipe/p_format.h"
+#include "util/u_debug.h"
+
+#include "freedreno_pm4.h"
+#include "freedreno_a2xx_reg.h"
+
+enum sq_surfaceformat fd_pipe2surface(enum pipe_format format);
+enum rb_colorformatx fd_pipe2color(enum pipe_format format);
+enum rb_depth_format fd_pipe2depth(enum pipe_format format);
+enum pc_di_index_size fd_pipe2index(enum pipe_format format);
+uint32_t fd_tex_swiz(enum pipe_format format, unsigned swizzle_r,
+		unsigned swizzle_g, unsigned swizzle_b, unsigned swizzle_a);
+
+
+#define FD_DBG_MSGS   0x1
+#define FD_DBG_DISASM 0x2
+extern int fd_mesa_debug;
+
+#define DBG(fmt, ...) \
+		do { if (fd_mesa_debug & FD_DBG_MSGS) \
+			debug_printf("%s:%d: "fmt "\n", \
+				__FUNCTION__, __LINE__, ##__VA_ARGS__); } while (0)
+
+#define ALIGN(v,a) (((v) + (a) - 1) & ~((a) - 1))
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+
+#define min(a, b) (((a) < (b)) ? (a) : (b))
+#define max(a, b) (((a) > (b)) ? (a) : (b))
+
+
+#define LOG_DWORDS 0
+
+
+static inline void
+OUT_RING(struct fd_ringbuffer *ring, uint32_t data)
+{
+	if (LOG_DWORDS) {
+		DBG("ring[%p]: OUT_RING   %04x:  %08x", ring,
+				(uint32_t)(ring->cur - ring->last_start), data);
+	}
+	*(ring->cur++) = data;
+}
+
+static inline void
+OUT_RELOC(struct fd_ringbuffer *ring, struct fd_bo *bo,
+		uint32_t offset, uint32_t or)
+{
+	if (LOG_DWORDS) {
+		DBG("ring[%p]: OUT_RELOC  %04x:  %p+%u", ring,
+				(uint32_t)(ring->cur - ring->last_start), bo, offset);
+	}
+	fd_ringbuffer_emit_reloc(ring, bo, offset, or);
+}
+
+static inline void BEGIN_RING(struct fd_ringbuffer *ring, uint32_t ndwords)
+{
+	if ((ring->cur + ndwords) >= ring->end) {
+		/* this probably won't really work if we have multiple tiles..
+		 * but it is ok for 2d..  we might need different behavior
+		 * depending on 2d or 3d pipe.
+		 */
+		DBG("uh oh..");
+	}
+}
+
+static inline void
+OUT_PKT0(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
+{
+	BEGIN_RING(ring, cnt+1);
+	OUT_RING(ring, CP_TYPE0_PKT | ((cnt-1) << 16) | (regindx & 0x7FFF));
+}
+
+static inline void
+OUT_PKT3(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt)
+{
+	BEGIN_RING(ring, cnt+1);
+	OUT_RING(ring, CP_TYPE3_PKT | ((cnt-1) << 16) | ((opcode & 0xFF) << 8));
+}
+
+static inline void
+OUT_IB(struct fd_ringbuffer *ring, struct fd_ringmarker *start,
+		struct fd_ringmarker *end)
+{
+	OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFD, 2);
+	fd_ringbuffer_emit_reloc_ring(ring, start);
+	OUT_RING(ring, fd_ringmarker_dwords(start, end));
+}
+
+#endif /* FREEDRENO_UTIL_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_vbo.c b/src/gallium/drivers/freedreno/freedreno_vbo.c
new file mode 100644
index 0000000..3003b28
--- /dev/null
+++ b/src/gallium/drivers/freedreno/freedreno_vbo.c
@@ -0,0 +1,226 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark at freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark at freedesktop.org>
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_prim.h"
+
+#include "freedreno_vbo.h"
+#include "freedreno_context.h"
+#include "freedreno_state.h"
+#include "freedreno_zsa.h"
+#include "freedreno_resource.h"
+#include "freedreno_util.h"
+
+
+static void *
+fd_vertex_state_create(struct pipe_context *pctx, unsigned num_elements,
+		const struct pipe_vertex_element *elements)
+{
+	struct fd_vertex_stateobj *so = CALLOC_STRUCT(fd_vertex_stateobj);
+
+	if (!so)
+		return NULL;
+
+	memcpy(so->pipe, elements, sizeof(*elements) * num_elements);
+	so->num_elements = num_elements;
+
+	return so;
+}
+
+static void
+fd_vertex_state_delete(struct pipe_context *pctx, void *hwcso)
+{
+	FREE(hwcso);
+}
+
+static void
+fd_vertex_state_bind(struct pipe_context *pctx, void *hwcso)
+{
+	struct fd_context *ctx = fd_context(pctx);
+	ctx->vtx = hwcso;
+	ctx->dirty |= FD_DIRTY_VTX;
+}
+
+static void
+emit_cacheflush(struct fd_ringbuffer *ring)
+{
+	unsigned i;
+
+	for (i = 0; i < 12; i++) {
+		OUT_PKT3(ring, CP_EVENT_WRITE, 1);
+		OUT_RING(ring, CACHE_FLUSH);
+	}
+}
+
+static enum pc_di_primtype
+mode2primtype(unsigned mode)
+{
+	switch (mode) {
+	case PIPE_PRIM_POINTS:         return DI_PT_POINTLIST;
+	case PIPE_PRIM_LINES:          return DI_PT_LINELIST;
+	case PIPE_PRIM_LINE_STRIP:     return DI_PT_LINESTRIP;
+	case PIPE_PRIM_TRIANGLES:      return DI_PT_TRILIST;
+	case PIPE_PRIM_TRIANGLE_STRIP: return DI_PT_TRISTRIP;
+	case PIPE_PRIM_TRIANGLE_FAN:   return DI_PT_TRIFAN;
+	case PIPE_PRIM_QUADS:          return DI_PT_QUADLIST;
+	case PIPE_PRIM_QUAD_STRIP:     return DI_PT_QUADSTRIP;
+	case PIPE_PRIM_POLYGON:        return DI_PT_POLYGON;
+	}
+	DBG("unsupported mode: (%s) %d", u_prim_name(mode), mode);
+	assert(0);
+	return DI_PT_NONE;
+}
+
+static enum pc_di_index_size
+size2indextype(unsigned index_size)
+{
+	switch (index_size) {
+	case 1: return INDEX_SIZE_8_BIT;
+	case 2: return INDEX_SIZE_16_BIT;
+	case 4: return INDEX_SIZE_32_BIT;
+	}
+	DBG("unsupported index size: %d", index_size);
+	assert(0);
+	return INDEX_SIZE_IGN;
+}
+
+static void
+emit_vertexbufs(struct fd_context *ctx, unsigned count)
+{
+	struct fd_vertex_stateobj *vtx = ctx->vtx;
+	struct fd_vertexbuf_stateobj *vertexbuf = &ctx->vertexbuf;
+	struct fd_vertex_buf bufs[PIPE_MAX_ATTRIBS];
+	unsigned i;
+
+	if (!vtx->num_elements)
+		return;
+
+	for (i = 0; i < vtx->num_elements; i++) {
+		struct pipe_vertex_element *elem = &vtx->pipe[i];
+		struct pipe_vertex_buffer *vb =
+				&vertexbuf->vb[elem->vertex_buffer_index];
+		bufs[i].offset = vb->buffer_offset;
+		bufs[i].size = count * vb->stride;
+		bufs[i].prsc = vb->buffer;
+	}
+
+	// NOTE I believe the 0x78 (or 0x9c in solid_vp) relates to the
+	// CONST(20,0) (or CONST(26,0) in soliv_vp)
+
+	fd_emit_vertex_bufs(ctx->ring, 0x78, bufs, vtx->num_elements);
+}
+
+static void
+fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
+{
+	struct fd_context *ctx = fd_context(pctx);
+	struct fd_ringbuffer *ring = ctx->ring;
+	struct fd_bo *idx_bo = NULL;
+	enum pc_di_index_size idx_type = INDEX_SIZE_IGN;
+	enum pc_di_src_sel src_sel;
+	uint32_t idx_size, idx_offset;
+	unsigned buffers;
+
+	ctx->needs_flush = true;
+
+	if (info->indexed) {
+		struct pipe_index_buffer *idx = &ctx->indexbuf;
+
+		assert(!idx->user_buffer);
+
+		idx_bo = fd_resource(idx->buffer)->bo;
+		idx_type = size2indextype(idx->index_size);
+		idx_size = idx->index_size * info->count;
+		idx_offset = idx->offset;
+		src_sel = DI_SRC_SEL_DMA;
+	} else {
+		idx_bo = NULL;
+		idx_type = INDEX_SIZE_IGN;
+		idx_size = 0;
+		idx_offset = 0;
+		src_sel = DI_SRC_SEL_AUTO_INDEX;
+	}
+
+	/* figure out the buffers we need: */
+	buffers = FD_BUFFER_COLOR;
+	if (fd_depth_enabled(ctx->zsa)) {
+		buffers |= FD_BUFFER_DEPTH;
+	} else if (fd_stencil_enabled(ctx->zsa)) {
+		buffers |= FD_BUFFER_STENCIL;
+	}
+
+	/* any buffers that haven't been cleared, we need to restore: */
+	ctx->restore |= buffers & (FD_BUFFER_ALL & ~ctx->cleared);
+	/* and any buffers used, need to be resolved: */
+	ctx->resolve |= buffers;
+
+	fd_state_emit(pctx, ctx->dirty);
+
+	emit_vertexbufs(ctx, info->count);
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_VGT_INDX_OFFSET));
+	OUT_RING(ring, info->start);
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_VGT_VERTEX_REUSE_BLOCK_CNTL));
+	OUT_RING(ring, 0x0000003b);
+
+	OUT_PKT0(ring, REG_TC_CNTL_STATUS, 1);
+	OUT_RING(ring, TC_CNTL_STATUS_L2_INVALIDATE);
+
+	OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
+	OUT_RING(ring, 0x0000000);
+
+	OUT_PKT3(ring, CP_DRAW_INDX, info->indexed ? 5 : 3);
+	OUT_RING(ring, 0x00000000);        /* viz query info. */
+	OUT_RING(ring, DRAW(mode2primtype(info->mode),
+			src_sel, idx_type, IGNORE_VISIBILITY));
+	OUT_RING(ring, info->count);       /* NumIndices */
+	if (info->indexed) {
+		OUT_RELOC(ring, idx_bo, idx_offset, 0);
+		OUT_RING (ring, idx_size);
+	}
+
+	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+	OUT_RING(ring, CP_REG(REG_2010));
+	OUT_RING(ring, 0x00000000);
+
+	emit_cacheflush(ring);
+}
+
+void
+fd_vbo_init(struct pipe_context *pctx)
+{
+	pctx->create_vertex_elements_state = fd_vertex_state_create;
+	pctx->delete_vertex_elements_state = fd_vertex_state_delete;
+	pctx->bind_vertex_elements_state = fd_vertex_state_bind;
+	pctx->draw_vbo = fd_draw_vbo;
+}
diff --git a/src/gallium/drivers/freedreno/freedreno_vbo.h b/src/gallium/drivers/freedreno/freedreno_vbo.h
new file mode 100644
index 0000000..081edf5
--- /dev/null
+++ b/src/gallium/drivers/freedreno/freedreno_vbo.h
@@ -0,0 +1,42 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark at freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark at freedesktop.org>
+ */
+
+#ifndef FREEDRENO_VBO_H_
+#define FREEDRENO_VBO_H_
+
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+
+struct fd_vertex_stateobj {
+	struct pipe_vertex_element pipe[PIPE_MAX_ATTRIBS];
+	unsigned num_elements;
+};
+
+void fd_vbo_init(struct pipe_context *pctx);
+
+#endif /* FREEDRENO_VBO_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_zsa.c b/src/gallium/drivers/freedreno/freedreno_zsa.c
new file mode 100644
index 0000000..e8daa37
--- /dev/null
+++ b/src/gallium/drivers/freedreno/freedreno_zsa.c
@@ -0,0 +1,144 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark at freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark at freedesktop.org>
+ */
+
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+
+#include "freedreno_zsa.h"
+#include "freedreno_context.h"
+#include "freedreno_util.h"
+
+static enum rb_stencil_op
+stencil_op(unsigned op)
+{
+	switch (op) {
+	case PIPE_STENCIL_OP_KEEP:
+		return STENCIL_KEEP;
+	case PIPE_STENCIL_OP_ZERO:
+		return STENCIL_ZERO;
+	case PIPE_STENCIL_OP_REPLACE:
+		return STENCIL_REPLACE;
+	case PIPE_STENCIL_OP_INCR:
+		return STENCIL_INCR_CLAMP;
+	case PIPE_STENCIL_OP_DECR:
+		return STENCIL_DECR_CLAMP;
+	case PIPE_STENCIL_OP_INCR_WRAP:
+		return STENCIL_INCR_WRAP;
+	case PIPE_STENCIL_OP_DECR_WRAP:
+		return STENCIL_DECR_WRAP;
+	case PIPE_STENCIL_OP_INVERT:
+		return STENCIL_INVERT;
+	default:
+		DBG("invalid stencil op: %u", op);
+		return 0;
+	}
+}
+
+static void *
+fd_zsa_state_create(struct pipe_context *pctx,
+		const struct pipe_depth_stencil_alpha_state *cso)
+{
+	struct fd_zsa_stateobj *so;
+
+	so = CALLOC_STRUCT(fd_zsa_stateobj);
+	if (!so)
+		return NULL;
+
+	so->base = *cso;
+
+	so->rb_depthcontrol |=
+		RB_DEPTHCONTROL_ZFUNC(cso->depth.func); /* maps 1:1 */
+
+	if (cso->depth.enabled)
+		so->rb_depthcontrol |= RB_DEPTHCONTROL_Z_ENABLE;
+	if (cso->depth.writemask)
+		so->rb_depthcontrol |= RB_DEPTHCONTROL_Z_WRITE_ENABLE;
+
+	if (cso->stencil[0].enabled) {
+		const struct pipe_stencil_state *s = &cso->stencil[0];
+
+		so->rb_depthcontrol |=
+			RB_DEPTHCONTROL_STENCIL_ENABLE |
+			RB_DEPTHCONTROL_STENCILFUNC(s->func) | /* maps 1:1 */
+			RB_DEPTHCONTROL_STENCILFAIL(stencil_op(s->fail_op)) |
+			RB_DEPTHCONTROL_STENCILZPASS(stencil_op(s->zpass_op)) |
+			RB_DEPTHCONTROL_STENCILZFAIL(stencil_op(s->zfail_op));
+		so->rb_stencilrefmask |=
+			0xff000000 | /* ??? */
+			RB_STENCILREFMASK_STENCILWRITEMASK(s->writemask) |
+			RB_STENCILREFMASK_STENCILMASK(s->valuemask);
+
+		if (cso->stencil[1].enabled) {
+			const struct pipe_stencil_state *bs = &cso->stencil[1];
+
+			so->rb_depthcontrol |=
+				RB_DEPTHCONTROL_BACKFACE_ENABLE |
+				RB_DEPTHCONTROL_STENCILFUNC_BF(bs->func) | /* maps 1:1 */
+				RB_DEPTHCONTROL_STENCILFAIL_BF(stencil_op(bs->fail_op)) |
+				RB_DEPTHCONTROL_STENCILZPASS_BF(stencil_op(bs->zpass_op)) |
+				RB_DEPTHCONTROL_STENCILZFAIL_BF(stencil_op(bs->zfail_op));
+			so->rb_stencilrefmask_bf |=
+				0xff000000 | /* ??? */
+				RB_STENCILREFMASK_STENCILWRITEMASK(bs->writemask) |
+				RB_STENCILREFMASK_STENCILMASK(bs->valuemask);
+		}
+	}
+
+	if (cso->alpha.enabled) {
+		so->rb_colorcontrol =
+			RB_COLORCONTROL_ALPHA_FUNC(cso->alpha.func) |
+			RB_COLORCONTROL_ALPHA_TEST_ENABLE;
+		so->rb_alpha_ref = f2d(cso->alpha.ref_value);
+	}
+
+	return so;
+}
+
+static void
+fd_zsa_state_bind(struct pipe_context *pctx, void *hwcso)
+{
+	struct fd_context *ctx = fd_context(pctx);
+	ctx->zsa = hwcso;
+	ctx->dirty |= FD_DIRTY_ZSA;
+}
+
+static void
+fd_zsa_state_delete(struct pipe_context *pctx, void *hwcso)
+{
+	FREE(hwcso);
+}
+
+void
+fd_zsa_init(struct pipe_context *pctx)
+{
+	pctx->create_depth_stencil_alpha_state = fd_zsa_state_create;
+	pctx->bind_depth_stencil_alpha_state = fd_zsa_state_bind;
+	pctx->delete_depth_stencil_alpha_state = fd_zsa_state_delete;
+}
diff --git a/src/gallium/drivers/freedreno/freedreno_zsa.h b/src/gallium/drivers/freedreno/freedreno_zsa.h
new file mode 100644
index 0000000..d1112f1
--- /dev/null
+++ b/src/gallium/drivers/freedreno/freedreno_zsa.h
@@ -0,0 +1,60 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark at freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark at freedesktop.org>
+ */
+
+#ifndef FREEDRENO_ZSA_H_
+#define FREEDRENO_ZSA_H_
+
+
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+
+#include "freedreno_util.h"
+
+struct fd_zsa_stateobj {
+	struct pipe_depth_stencil_alpha_state base;
+	uint32_t rb_depthcontrol;
+	uint32_t rb_colorcontrol;   /* must be OR'd w/ blend->rb_colorcontrol */
+	uint32_t rb_alpha_ref;
+	uint32_t rb_stencilrefmask;
+	uint32_t rb_stencilrefmask_bf;
+};
+
+void fd_zsa_init(struct pipe_context *pctx);
+
+static inline bool fd_depth_enabled(struct fd_zsa_stateobj *zsa)
+{
+	return !!(zsa->rb_depthcontrol & RB_DEPTHCONTROL_Z_ENABLE);
+}
+
+static inline bool fd_stencil_enabled(struct fd_zsa_stateobj *zsa)
+{
+	//RB_DEPTHCONTROL_STENCIL_ENABLE
+	return !!(zsa->rb_depthcontrol & RB_DEPTHCONTROL_STENCIL_ENABLE);
+}
+
+#endif /* FREEDRENO_ZSA_H_ */
diff --git a/src/gallium/drivers/freedreno/instr.h b/src/gallium/drivers/freedreno/instr.h
new file mode 100644
index 0000000..fd19234
--- /dev/null
+++ b/src/gallium/drivers/freedreno/instr.h
@@ -0,0 +1,386 @@
+/*
+ * Copyright (c) 2012 Rob Clark <robdclark at gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef INSTR_H_
+#define INSTR_H_
+
+#include "freedreno_a2xx_reg.h"
+
+#define PACKED __attribute__((__packed__))
+
+/*
+ * ALU instructions:
+ */
+
+typedef enum {
+	ADDs = 0,
+	ADD_PREVs = 1,
+	MULs = 2,
+	MUL_PREVs = 3,
+	MUL_PREV2s = 4,
+	MAXs = 5,
+	MINs = 6,
+	SETEs = 7,
+	SETGTs = 8,
+	SETGTEs = 9,
+	SETNEs = 10,
+	FRACs = 11,
+	TRUNCs = 12,
+	FLOORs = 13,
+	EXP_IEEE = 14,
+	LOG_CLAMP = 15,
+	LOG_IEEE = 16,
+	RECIP_CLAMP = 17,
+	RECIP_FF = 18,
+	RECIP_IEEE = 19,
+	RECIPSQ_CLAMP = 20,
+	RECIPSQ_FF = 21,
+	RECIPSQ_IEEE = 22,
+	MOVAs = 23,
+	MOVA_FLOORs = 24,
+	SUBs = 25,
+	SUB_PREVs = 26,
+	PRED_SETEs = 27,
+	PRED_SETNEs = 28,
+	PRED_SETGTs = 29,
+	PRED_SETGTEs = 30,
+	PRED_SET_INVs = 31,
+	PRED_SET_POPs = 32,
+	PRED_SET_CLRs = 33,
+	PRED_SET_RESTOREs = 34,
+	KILLEs = 35,
+	KILLGTs = 36,
+	KILLGTEs = 37,
+	KILLNEs = 38,
+	KILLONEs = 39,
+	SQRT_IEEE = 40,
+	MUL_CONST_0 = 42,
+	MUL_CONST_1 = 43,
+	ADD_CONST_0 = 44,
+	ADD_CONST_1 = 45,
+	SUB_CONST_0 = 46,
+	SUB_CONST_1 = 47,
+	SIN = 48,
+	COS = 49,
+	RETAIN_PREV = 50,
+} instr_scalar_opc_t;
+
+typedef enum {
+	ADDv = 0,
+	MULv = 1,
+	MAXv = 2,
+	MINv = 3,
+	SETEv = 4,
+	SETGTv = 5,
+	SETGTEv = 6,
+	SETNEv = 7,
+	FRACv = 8,
+	TRUNCv = 9,
+	FLOORv = 10,
+	MULADDv = 11,
+	CNDEv = 12,
+	CNDGTEv = 13,
+	CNDGTv = 14,
+	DOT4v = 15,
+	DOT3v = 16,
+	DOT2ADDv = 17,
+	CUBEv = 18,
+	MAX4v = 19,
+	PRED_SETE_PUSHv = 20,
+	PRED_SETNE_PUSHv = 21,
+	PRED_SETGT_PUSHv = 22,
+	PRED_SETGTE_PUSHv = 23,
+	KILLEv = 24,
+	KILLGTv = 25,
+	KILLGTEv = 26,
+	KILLNEv = 27,
+	DSTv = 28,
+	MOVAv = 29,
+} instr_vector_opc_t;
+
+typedef struct PACKED {
+	/* dword0: */
+	uint8_t             vector_dest              : 6;
+	uint8_t             vector_dest_rel          : 1;
+	uint8_t             low_precision_16b_fp     : 1;
+	uint8_t             scalar_dest              : 6;
+	uint8_t             scalar_dest_rel          : 1;
+	uint8_t             export_data              : 1;
+	uint8_t             vector_write_mask        : 4;
+	uint8_t             scalar_write_mask        : 4;
+	uint8_t             vector_clamp             : 1;
+	uint8_t             scalar_clamp             : 1;
+	instr_scalar_opc_t  scalar_opc               : 6;
+	/* dword1: */
+	uint8_t             src3_swiz                : 8;
+	uint8_t             src2_swiz                : 8;
+	uint8_t             src1_swiz                : 8;
+	uint8_t             src3_reg_negate          : 1;
+	uint8_t             src2_reg_negate          : 1;
+	uint8_t             src1_reg_negate          : 1;
+	uint8_t             pred_select              : 2;
+	uint8_t             relative_addr            : 1;
+	uint8_t             const_1_rel_abs          : 1;
+	uint8_t             const_0_rel_abs          : 1;
+	/* dword2: */
+	uint8_t             src3_reg                 : 6;
+	uint8_t             src3_reg_select          : 1;
+	uint8_t             src3_reg_abs             : 1;
+	uint8_t             src2_reg                 : 6;
+	uint8_t             src2_reg_select          : 1;
+	uint8_t             src2_reg_abs             : 1;
+	uint8_t             src1_reg                 : 6;
+	uint8_t             src1_reg_select          : 1;
+	uint8_t             src1_reg_abs             : 1;
+	instr_vector_opc_t  vector_opc               : 5;
+	uint8_t             src3_sel                 : 1;
+	uint8_t             src2_sel                 : 1;
+	uint8_t             src1_sel                 : 1;
+} instr_alu_t;
+
+
+
+/*
+ * CF instructions:
+ */
+
+typedef enum {
+	NOP = 0,
+	EXEC = 1,
+	EXEC_END = 2,
+	COND_EXEC = 3,
+	COND_EXEC_END = 4,
+	COND_PRED_EXEC = 5,
+	COND_PRED_EXEC_END = 6,
+	LOOP_START = 7,
+	LOOP_END = 8,
+	COND_CALL = 9,
+	RETURN = 10,
+	COND_JMP = 11,
+	ALLOC = 12,
+	COND_EXEC_PRED_CLEAN = 13,
+	COND_EXEC_PRED_CLEAN_END = 14,
+	MARK_VS_FETCH_DONE = 15,
+} instr_cf_opc_t;
+
+typedef enum {
+	RELATIVE_ADDR = 0,
+	ABSOLUTE_ADDR = 1,
+} instr_addr_mode_t;
+
+typedef enum {
+	SQ_NO_ALLOC = 0,
+	SQ_POSITION = 1,
+	SQ_PARAMETER_PIXEL = 2,
+	SQ_MEMORY = 3,
+} instr_alloc_type_t;
+
+typedef struct PACKED {
+	uint16_t            address                  : 9;
+	uint8_t             reserved0                : 3;
+	uint8_t             count                    : 3;
+	uint8_t             yeild                    : 1;
+	uint16_t            serialize                : 12;
+	uint8_t             vc                       : 6;   /* vertex cache? */
+	uint8_t             bool_addr                : 8;
+	uint8_t             condition                : 1;
+	instr_addr_mode_t   address_mode             : 1;
+	instr_cf_opc_t      opc                      : 4;
+} instr_cf_exec_t;
+
+typedef struct PACKED {
+	uint16_t            address                  : 10;
+	uint8_t             reserved0                : 6;
+	uint8_t             loop_id                  : 5;
+	uint32_t            reserved1                : 22;
+	instr_addr_mode_t   address_mode             : 1;
+	instr_cf_opc_t      opc                      : 4;
+} instr_cf_loop_t;
+
+typedef struct PACKED {
+	uint16_t            address                  : 10;
+	uint8_t             reserved0                : 3;
+	uint8_t             force_call               : 1;
+	uint8_t             predicated_jmp           : 1;
+	uint32_t            reserved1                : 18;
+	uint8_t             direction                : 1;
+	uint8_t             bool_addr                : 8;
+	uint8_t             condition                : 1;
+	instr_addr_mode_t   address_mode             : 1;
+	instr_cf_opc_t      opc                      : 4;
+} instr_cf_jmp_call_t;
+
+typedef struct PACKED {
+	uint8_t             size                     : 4;
+	uint64_t            reserved0                : 36;
+	uint8_t             no_serial                : 1;
+	instr_alloc_type_t  buffer_select            : 2;
+	uint8_t             alloc_mode               : 1;
+	instr_cf_opc_t      opc                      : 4;
+} instr_cf_alloc_t;
+
+typedef union PACKED {
+	instr_cf_exec_t     exec;
+	instr_cf_loop_t     loop;
+	instr_cf_jmp_call_t jmp_call;
+	instr_cf_alloc_t    alloc;
+	struct PACKED {
+		uint64_t        dummy                    : 44;
+		instr_cf_opc_t  opc                      : 4;
+	};
+} instr_cf_t;
+
+
+
+/*
+ * FETCH instructions:
+ */
+
+typedef enum {
+	VTX_FETCH = 0,
+	TEX_FETCH = 1,
+	TEX_GET_BORDER_COLOR_FRAC = 16,
+	TEX_GET_COMP_TEX_LOD = 17,
+	TEX_GET_GRADIENTS = 18,
+	TEX_GET_WEIGHTS = 19,
+	TEX_SET_TEX_LOD = 24,
+	TEX_SET_GRADIENTS_H = 25,
+	TEX_SET_GRADIENTS_V = 26,
+	TEX_RESERVED_4 = 27,
+} instr_fetch_opc_t;
+
+typedef enum {
+	TEX_FILTER_POINT = 0,
+	TEX_FILTER_LINEAR = 1,
+	TEX_FILTER_BASEMAP = 2,            /* only applicable for mip-filter */
+	TEX_FILTER_USE_FETCH_CONST = 3,
+} instr_tex_filter_t;
+
+typedef enum {
+	ANISO_FILTER_DISABLED = 0,
+	ANISO_FILTER_MAX_1_1 = 1,
+	ANISO_FILTER_MAX_2_1 = 2,
+	ANISO_FILTER_MAX_4_1 = 3,
+	ANISO_FILTER_MAX_8_1 = 4,
+	ANISO_FILTER_MAX_16_1 = 5,
+	ANISO_FILTER_USE_FETCH_CONST = 7,
+} instr_aniso_filter_t;
+
+typedef enum {
+	ARBITRARY_FILTER_2X4_SYM = 0,
+	ARBITRARY_FILTER_2X4_ASYM = 1,
+	ARBITRARY_FILTER_4X2_SYM = 2,
+	ARBITRARY_FILTER_4X2_ASYM = 3,
+	ARBITRARY_FILTER_4X4_SYM = 4,
+	ARBITRARY_FILTER_4X4_ASYM = 5,
+	ARBITRARY_FILTER_USE_FETCH_CONST = 7,
+} instr_arbitrary_filter_t;
+
+typedef enum {
+	SAMPLE_CENTROID = 0,
+	SAMPLE_CENTER = 1,
+} instr_sample_loc_t;
+
+typedef enum sq_surfaceformat instr_surf_fmt_t;
+
+typedef struct PACKED {
+	/* dword0: */
+	instr_fetch_opc_t   opc                      : 5;
+	uint8_t             src_reg                  : 6;
+	uint8_t             src_reg_am               : 1;
+	uint8_t             dst_reg                  : 6;
+	uint8_t             dst_reg_am               : 1;
+	uint8_t             fetch_valid_only         : 1;
+	uint8_t             const_idx                : 5;
+	uint8_t             tx_coord_denorm          : 1;
+	uint8_t             src_swiz                 : 6;
+	/* dword1: */
+	uint16_t            dst_swiz                 : 12;
+	instr_tex_filter_t  mag_filter               : 2;
+	instr_tex_filter_t  min_filter               : 2;
+	instr_tex_filter_t  mip_filter               : 2;
+	instr_aniso_filter_t aniso_filter            : 3;
+	instr_arbitrary_filter_t arbitrary_filter    : 3;
+	instr_tex_filter_t  vol_mag_filter           : 2;
+	instr_tex_filter_t  vol_min_filter           : 2;
+	uint8_t             use_comp_lod             : 1;
+	uint8_t             use_reg_lod              : 2;
+	uint8_t             pred_select              : 1;
+	/* dword2: */
+	uint8_t             use_reg_gradients        : 1;
+	instr_sample_loc_t  sample_location          : 1;
+	uint8_t             lod_bias                 : 7;
+	uint8_t             unused                   : 7;
+	uint8_t             offset_x                 : 5;
+	uint8_t             offset_y                 : 5;
+	uint8_t             offset_z                 : 5;
+	uint8_t             pred_condition           : 1;
+} instr_fetch_tex_t;
+
+typedef struct PACKED {
+	/* dword0: */
+	instr_fetch_opc_t   opc                      : 5;
+	uint8_t             src_reg                  : 6;
+	uint8_t             src_reg_am               : 1;
+	uint8_t             dst_reg                  : 6;
+	uint8_t             dst_reg_am               : 1;
+	uint8_t             must_be_one              : 1;
+	uint8_t             const_index              : 5;
+	uint8_t             const_index_sel          : 2;
+	uint8_t             reserved0                : 3;
+	uint8_t             src_swiz                 : 2;
+	/* dword1: */
+	uint16_t            dst_swiz                 : 12;
+	uint8_t             format_comp_all          : 1;   /* '1' for signed, '0' for unsigned? */
+	uint8_t             num_format_all           : 1;   /* '0' for normalized, '1' for unnormalized */
+	uint8_t             signed_rf_mode_all       : 1;
+	uint8_t             reserved1                : 1;
+	instr_surf_fmt_t    format                   : 6;
+	uint8_t             reserved2                : 1;
+	uint8_t             exp_adjust_all           : 7;
+	uint8_t             reserved3                : 1;
+	uint8_t             pred_select              : 1;
+	/* dword2: */
+	uint8_t             stride                   : 8;
+	/* possibly offset and reserved4 are swapped on a200? */
+	uint8_t             offset                   : 8;
+	uint8_t             reserved4                : 8;
+	uint8_t             reserved5                : 7;
+	uint8_t             pred_condition           : 1;
+} instr_fetch_vtx_t;
+
+typedef union PACKED {
+	instr_fetch_tex_t   tex;
+	instr_fetch_vtx_t   vtx;
+	struct PACKED {
+		/* dword0: */
+		instr_fetch_opc_t opc                    : 5;
+		uint32_t        dummy0                   : 27;
+		/* dword1: */
+		uint32_t        dummy1                   : 32;
+		/* dword2: */
+		uint32_t        dummy2                   : 32;
+	};
+} instr_fetch_t;
+
+#endif /* INSTR_H_ */
diff --git a/src/gallium/drivers/freedreno/ir.c b/src/gallium/drivers/freedreno/ir.c
new file mode 100644
index 0000000..cbc1230
--- /dev/null
+++ b/src/gallium/drivers/freedreno/ir.c
@@ -0,0 +1,701 @@
+/*
+ * Copyright (c) 2012 Rob Clark <robdclark at gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "ir.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+#include "freedreno_util.h"
+#include "instr.h"
+
+#define DEBUG_MSG(f, ...)  do { if (0) DBG(f, ##__VA_ARGS__); } while (0)
+#define WARN_MSG(f, ...)   DBG("WARN:  "f, ##__VA_ARGS__)
+#define ERROR_MSG(f, ...)  DBG("ERROR: "f, ##__VA_ARGS__)
+
+#define REG_MASK 0x3f
+
+static int cf_emit(struct ir_cf *cf, instr_cf_t *instr);
+
+static int instr_emit(struct ir_instruction *instr, uint32_t *dwords,
+		uint32_t idx, struct ir_shader_info *info);
+
+static void reg_update_stats(struct ir_register *reg,
+		struct ir_shader_info *info, bool dest);
+static uint32_t reg_fetch_src_swiz(struct ir_register *reg, uint32_t n);
+static uint32_t reg_fetch_dst_swiz(struct ir_register *reg);
+static uint32_t reg_alu_dst_swiz(struct ir_register *reg);
+static uint32_t reg_alu_src_swiz(struct ir_register *reg);
+
+/* simple allocator to carve allocations out of an up-front allocated heap,
+ * so that we can free everything easily in one shot.
+ */
+static void * ir_alloc(struct ir_shader *shader, int sz)
+{
+	void *ptr = &shader->heap[shader->heap_idx];
+	shader->heap_idx += ALIGN(sz, 4);
+	return ptr;
+}
+
+static char * ir_strdup(struct ir_shader *shader, const char *str)
+{
+	char *ptr = NULL;
+	if (str) {
+		int len = strlen(str);
+		ptr = ir_alloc(shader, len+1);
+		memcpy(ptr, str, len);
+		ptr[len] = '\0';
+	}
+	return ptr;
+}
+
+struct ir_shader * ir_shader_create(void)
+{
+	DEBUG_MSG("");
+	return calloc(1, sizeof(struct ir_shader));
+}
+
+void ir_shader_destroy(struct ir_shader *shader)
+{
+	DEBUG_MSG("");
+	free(shader);
+}
+
+/* resolve addr/cnt/sequence fields in the individual CF's */
+static int shader_resolve(struct ir_shader *shader, struct ir_shader_info *info)
+{
+	uint32_t addr;
+	unsigned i;
+	int j;
+
+	addr = shader->cfs_count / 2;
+	for (i = 0; i < shader->cfs_count; i++) {
+		struct ir_cf *cf = shader->cfs[i];
+		if ((cf->cf_type == EXEC) || (cf->cf_type == EXEC_END)) {
+			uint32_t sequence = 0;
+
+			if (cf->exec.addr && (cf->exec.addr != addr))
+				WARN_MSG("invalid addr '%d' at CF %d", cf->exec.addr, i);
+			if (cf->exec.cnt && (cf->exec.cnt != cf->exec.instrs_count))
+				WARN_MSG("invalid cnt '%d' at CF %d", cf->exec.cnt, i);
+
+			for (j = cf->exec.instrs_count - 1; j >= 0; j--) {
+				struct ir_instruction *instr = cf->exec.instrs[j];
+				sequence <<= 2;
+				if (instr->instr_type == IR_FETCH)
+					sequence |= 0x1;
+				if (instr->sync)
+					sequence |= 0x2;
+			}
+
+			cf->exec.addr = addr;
+			cf->exec.cnt  = cf->exec.instrs_count;
+			cf->exec.sequence = sequence;
+
+			addr += cf->exec.instrs_count;
+		}
+	}
+
+	info->sizedwords = 3 * addr;
+
+	return 0;
+}
+
+void * ir_shader_assemble(struct ir_shader *shader, struct ir_shader_info *info)
+{
+	uint32_t i, j;
+	uint32_t *ptr, *dwords = NULL;
+	uint32_t idx = 0;
+	int ret;
+
+	info->sizedwords    = 0;
+	info->max_reg       = -1;
+	info->max_input_reg = 0;
+	info->regs_written  = 0;
+
+	/* we need an even # of CF's.. insert a NOP if needed */
+	if (shader->cfs_count != ALIGN(shader->cfs_count, 2))
+		ir_cf_create(shader, NOP);
+
+	/* first pass, resolve sizes and addresses: */
+	ret = shader_resolve(shader, info);
+	if (ret) {
+		ERROR_MSG("resolve failed: %d", ret);
+		goto fail;
+	}
+
+	ptr = dwords = calloc(1, 4 * info->sizedwords);
+
+	/* second pass, emit CF program in pairs: */
+	for (i = 0; i < shader->cfs_count; i += 2) {
+		instr_cf_t *cfs = (instr_cf_t *)ptr;
+		ret = cf_emit(shader->cfs[i], &cfs[0]);
+		if (ret) {
+			ERROR_MSG("CF emit failed: %d\n", ret);
+			goto fail;
+		}
+		ret = cf_emit(shader->cfs[i+1], &cfs[1]);
+		if (ret) {
+			ERROR_MSG("CF emit failed: %d\n", ret);
+			goto fail;
+		}
+		ptr += 3;
+		assert((ptr - dwords) <= info->sizedwords);
+	}
+
+	/* third pass, emit ALU/FETCH: */
+	for (i = 0; i < shader->cfs_count; i++) {
+		struct ir_cf *cf = shader->cfs[i];
+		if ((cf->cf_type == EXEC) || (cf->cf_type == EXEC_END)) {
+			for (j = 0; j < cf->exec.instrs_count; j++) {
+				ret = instr_emit(cf->exec.instrs[j], ptr, idx++, info);
+				if (ret) {
+					ERROR_MSG("instruction emit failed: %d", ret);
+					goto fail;
+				}
+				ptr += 3;
+				assert((ptr - dwords) <= info->sizedwords);
+			}
+		}
+	}
+
+	return dwords;
+
+fail:
+	free(dwords);
+	return NULL;
+}
+
+
+struct ir_attribute * ir_attribute_create(struct ir_shader *shader,
+		int rstart, int num, const char *name)
+{
+	struct ir_attribute *a = ir_alloc(shader, sizeof(struct ir_attribute));
+	DEBUG_MSG("R%d-R%d: %s", rstart, rstart + num - 1, name);
+	a->name   = ir_strdup(shader, name);
+	a->rstart = rstart;
+	a->num    = num;
+	assert(shader->attributes_count < ARRAY_SIZE(shader->attributes));
+	shader->attributes[shader->attributes_count++] = a;
+	return a;
+}
+
+struct ir_const * ir_const_create(struct ir_shader *shader,
+		int cstart, float v0, float v1, float v2, float v3)
+{
+	struct ir_const *c = ir_alloc(shader, sizeof(struct ir_const));
+	DEBUG_MSG("C%d: %f, %f, %f, %f", cstart, v0, v1, v2, v3);
+	c->val[0] = v0;
+	c->val[1] = v1;
+	c->val[2] = v2;
+	c->val[3] = v3;
+	c->cstart = cstart;
+	assert(shader->consts_count < ARRAY_SIZE(shader->consts));
+	shader->consts[shader->consts_count++] = c;
+	return c;
+}
+
+struct ir_sampler * ir_sampler_create(struct ir_shader *shader,
+		int idx, const char *name)
+{
+	struct ir_sampler *s = ir_alloc(shader, sizeof(struct ir_sampler));
+	DEBUG_MSG("CONST(%d): %s", idx, name);
+	s->name   = ir_strdup(shader, name);
+	s->idx    = idx;
+	assert(shader->samplers_count < ARRAY_SIZE(shader->samplers));
+	shader->samplers[shader->samplers_count++] = s;
+	return s;
+}
+
+struct ir_uniform * ir_uniform_create(struct ir_shader *shader,
+		int cstart, int num, const char *name)
+{
+	struct ir_uniform *u = ir_alloc(shader, sizeof(struct ir_uniform));
+	DEBUG_MSG("C%d-C%d: %s", cstart, cstart + num - 1, name);
+	u->name   = ir_strdup(shader, name);
+	u->cstart = cstart;
+	u->num    = num;
+	assert(shader->uniforms_count < ARRAY_SIZE(shader->uniforms));
+	shader->uniforms[shader->uniforms_count++] = u;
+	return u;
+}
+
+struct ir_varying * ir_varying_create(struct ir_shader *shader,
+		int rstart, int num, const char *name)
+{
+	struct ir_varying *v = ir_alloc(shader, sizeof(struct ir_varying));
+	DEBUG_MSG("R%d-R%d: %s", rstart, rstart + num - 1, name);
+	v->name   = ir_strdup(shader, name);
+	v->rstart = rstart;
+	v->num    = num;
+	assert(shader->varyings_count < ARRAY_SIZE(shader->varyings));
+	shader->varyings[shader->varyings_count++] = v;
+	return v;
+}
+
+
+struct ir_cf * ir_cf_create(struct ir_shader *shader, instr_cf_opc_t cf_type)
+{
+	struct ir_cf *cf = ir_alloc(shader, sizeof(struct ir_cf));
+	DEBUG_MSG("%d", cf_type);
+	cf->shader = shader;
+	cf->cf_type = cf_type;
+	assert(shader->cfs_count < ARRAY_SIZE(shader->cfs));
+	shader->cfs[shader->cfs_count++] = cf;
+	return cf;
+}
+
+
+/*
+ * CF instructions:
+ */
+
+static int cf_emit(struct ir_cf *cf, instr_cf_t *instr)
+{
+	memset(instr, 0, sizeof(*instr));
+
+	instr->opc = cf->cf_type;
+
+	switch (cf->cf_type) {
+	case NOP:
+		break;
+	case EXEC:
+	case EXEC_END:
+		assert(cf->exec.addr <= 0x1ff);
+		assert(cf->exec.cnt <= 0x6);
+		assert(cf->exec.sequence <= 0xfff);
+		instr->exec.address = cf->exec.addr;
+		instr->exec.count = cf->exec.cnt;
+		instr->exec.serialize = cf->exec.sequence;
+		break;
+	case ALLOC:
+		assert(cf->alloc.size <= 0xf);
+		instr->alloc.size = cf->alloc.size;
+		switch (cf->alloc.type) {
+		case SQ_POSITION:
+		case SQ_PARAMETER_PIXEL:
+			instr->alloc.buffer_select = cf->alloc.type;
+			break;
+		default:
+			ERROR_MSG("invalid alloc type: %d", cf->alloc.type);
+			return -1;
+		}
+		break;
+	case COND_EXEC:
+	case COND_EXEC_END:
+	case COND_PRED_EXEC:
+	case COND_PRED_EXEC_END:
+	case LOOP_START:
+	case LOOP_END:
+	case COND_CALL:
+	case RETURN:
+	case COND_JMP:
+	case COND_EXEC_PRED_CLEAN:
+	case COND_EXEC_PRED_CLEAN_END:
+	case MARK_VS_FETCH_DONE:
+		ERROR_MSG("TODO");
+		return -1;
+	}
+
+	return 0;
+}
+
+
+struct ir_instruction * ir_instr_create(struct ir_cf *cf, int instr_type)
+{
+	struct ir_instruction *instr =
+			ir_alloc(cf->shader, sizeof(struct ir_instruction));
+	DEBUG_MSG("%d", instr_type);
+	instr->shader = cf->shader;
+	instr->pred = cf->shader->pred;
+	instr->instr_type = instr_type;
+	assert(cf->exec.instrs_count < ARRAY_SIZE(cf->exec.instrs));
+	cf->exec.instrs[cf->exec.instrs_count++] = instr;
+	return instr;
+}
+
+
+/*
+ * FETCH instructions:
+ */
+
+static int instr_emit_fetch(struct ir_instruction *instr,
+		uint32_t *dwords, uint32_t idx,
+		struct ir_shader_info *info)
+{
+	instr_fetch_t *fetch = (instr_fetch_t *)dwords;
+	int reg = 0;
+	struct ir_register *dst_reg = instr->regs[reg++];
+	struct ir_register *src_reg = instr->regs[reg++];
+
+	memset(fetch, 0, sizeof(*fetch));
+
+	reg_update_stats(dst_reg, info, true);
+	reg_update_stats(src_reg, info, false);
+
+	fetch->opc = instr->fetch.opc;
+
+	if (instr->fetch.opc == VTX_FETCH) {
+		instr_fetch_vtx_t *vtx = &fetch->vtx;
+
+		assert(instr->fetch.stride <= 0xff);
+		assert(instr->fetch.fmt <= 0x3f);
+		assert(instr->fetch.const_idx <= 0x1f);
+		assert(instr->fetch.const_idx_sel <= 0x3);
+
+		vtx->src_reg = src_reg->num;
+		vtx->src_swiz = reg_fetch_src_swiz(src_reg, 1);
+		vtx->dst_reg = dst_reg->num;
+		vtx->dst_swiz = reg_fetch_dst_swiz(dst_reg);
+		vtx->must_be_one = 1;
+		vtx->const_index = instr->fetch.const_idx;
+		vtx->const_index_sel = instr->fetch.const_idx_sel;
+		vtx->format_comp_all = !!instr->fetch.is_signed;
+		vtx->num_format_all = !instr->fetch.is_normalized;
+		vtx->format = instr->fetch.fmt;
+		vtx->stride = instr->fetch.stride;
+		vtx->offset = instr->fetch.offset;
+
+		if (instr->pred != IR_PRED_NONE) {
+			vtx->pred_select = 1;
+			vtx->pred_condition = (instr->pred == IR_PRED_EQ) ? 1 : 0;
+		}
+
+		/* XXX seems like every FETCH but the first has
+		 * this bit set:
+		 */
+		vtx->reserved3 = (idx > 0) ? 0x1 : 0x0;
+		vtx->reserved0 = (idx > 0) ? 0x2 : 0x3;
+	} else if (instr->fetch.opc == TEX_FETCH) {
+		instr_fetch_tex_t *tex = &fetch->tex;
+
+		assert(instr->fetch.const_idx <= 0x1f);
+
+		tex->src_reg = src_reg->num;
+		tex->src_swiz = reg_fetch_src_swiz(src_reg, 3);
+		tex->dst_reg = dst_reg->num;
+		tex->dst_swiz = reg_fetch_dst_swiz(dst_reg);
+		tex->const_idx = instr->fetch.const_idx;
+		tex->mag_filter = TEX_FILTER_USE_FETCH_CONST;
+		tex->min_filter = TEX_FILTER_USE_FETCH_CONST;
+		tex->mip_filter = TEX_FILTER_USE_FETCH_CONST;
+		tex->aniso_filter = ANISO_FILTER_USE_FETCH_CONST;
+		tex->arbitrary_filter = ARBITRARY_FILTER_USE_FETCH_CONST;
+		tex->vol_mag_filter = TEX_FILTER_USE_FETCH_CONST;
+		tex->vol_min_filter = TEX_FILTER_USE_FETCH_CONST;
+		tex->use_comp_lod = 1;
+		tex->sample_location = SAMPLE_CENTER;
+
+		if (instr->pred != IR_PRED_NONE) {
+			tex->pred_select = 1;
+			tex->pred_condition = (instr->pred == IR_PRED_EQ) ? 1 : 0;
+		}
+
+	} else {
+		ERROR_MSG("invalid fetch opc: %d\n", instr->fetch.opc);
+		return -1;
+	}
+
+	return 0;
+}
+
+/*
+ * ALU instructions:
+ */
+
+static int instr_emit_alu(struct ir_instruction *instr, uint32_t *dwords,
+		struct ir_shader_info *info)
+{
+	int reg = 0;
+	instr_alu_t *alu = (instr_alu_t *)dwords;
+	struct ir_register *dst_reg  = instr->regs[reg++];
+	struct ir_register *src1_reg;
+	struct ir_register *src2_reg;
+	struct ir_register *src3_reg;
+
+	memset(alu, 0, sizeof(*alu));
+
+	/* handle instructions w/ 3 src operands: */
+	switch (instr->alu.vector_opc) {
+	case MULADDv:
+	case CNDEv:
+	case CNDGTEv:
+	case CNDGTv:
+	case DOT2ADDv:
+		/* note: disassembler lists 3rd src first, ie:
+		 *   MULADDv Rdst = Rsrc3 + (Rsrc1 * Rsrc2)
+		 * which is the reason for this strange ordering.
+		 */
+		src3_reg = instr->regs[reg++];
+		break;
+	default:
+		src3_reg = NULL;
+		break;
+	}
+
+	src1_reg = instr->regs[reg++];
+	src2_reg = instr->regs[reg++];
+
+	reg_update_stats(dst_reg, info, true);
+	reg_update_stats(src1_reg, info, false);
+	reg_update_stats(src2_reg, info, false);
+
+	assert((dst_reg->flags & ~IR_REG_EXPORT) == 0);
+	assert(!dst_reg->swizzle || (strlen(dst_reg->swizzle) == 4));
+	assert((src1_reg->flags & IR_REG_EXPORT) == 0);
+	assert(!src1_reg->swizzle || (strlen(src1_reg->swizzle) == 4));
+	assert((src2_reg->flags & IR_REG_EXPORT) == 0);
+	assert(!src2_reg->swizzle || (strlen(src2_reg->swizzle) == 4));
+
+	if (instr->alu.vector_opc == ~0) {
+		alu->vector_opc          = MAXv;
+		alu->vector_write_mask   = 0;
+	} else {
+		alu->vector_opc          = instr->alu.vector_opc;
+		alu->vector_write_mask   = reg_alu_dst_swiz(dst_reg);
+	}
+
+	alu->vector_dest         = dst_reg->num;
+	alu->export_data         = !!(dst_reg->flags & IR_REG_EXPORT);
+
+	// TODO predicate case/condition.. need to add to parser
+
+	alu->src2_reg            = src2_reg->num;
+	alu->src2_swiz           = reg_alu_src_swiz(src2_reg);
+	alu->src2_reg_negate     = !!(src2_reg->flags & IR_REG_NEGATE);
+	alu->src2_reg_abs        = !!(src2_reg->flags & IR_REG_ABS);
+	alu->src2_sel            = !(src2_reg->flags & IR_REG_CONST);
+
+	alu->src1_reg            = src1_reg->num;
+	alu->src1_swiz           = reg_alu_src_swiz(src1_reg);
+	alu->src1_reg_negate     = !!(src1_reg->flags & IR_REG_NEGATE);
+	alu->src1_reg_abs        = !!(src1_reg->flags & IR_REG_ABS);
+	alu->src1_sel            = !(src1_reg->flags & IR_REG_CONST);
+
+	alu->vector_clamp        = instr->alu.vector_clamp;
+	alu->scalar_clamp        = instr->alu.scalar_clamp;
+
+	if (instr->alu.scalar_opc != ~0) {
+		struct ir_register *sdst_reg = instr->regs[reg++];
+
+		reg_update_stats(sdst_reg, info, true);
+
+		assert(sdst_reg->flags == dst_reg->flags);
+
+		if (src3_reg) {
+			assert(src3_reg == instr->regs[reg++]);
+		} else {
+			src3_reg = instr->regs[reg++];
+		}
+
+		alu->scalar_dest         = sdst_reg->num;
+		alu->scalar_write_mask   = reg_alu_dst_swiz(sdst_reg);
+		alu->scalar_opc          = instr->alu.scalar_opc;
+	} else {
+		/* not sure if this is required, but adreno compiler seems
+		 * to always set scalar opc to MAXs if it is not used:
+		 */
+		alu->scalar_opc = MAXs;
+	}
+
+	if (src3_reg) {
+		reg_update_stats(src3_reg, info, false);
+
+		alu->src3_reg            = src3_reg->num;
+		alu->src3_swiz           = reg_alu_src_swiz(src3_reg);
+		alu->src3_reg_negate     = !!(src3_reg->flags & IR_REG_NEGATE);
+		alu->src3_reg_abs        = !!(src3_reg->flags & IR_REG_ABS);
+		alu->src3_sel            = !(src3_reg->flags & IR_REG_CONST);
+	} else {
+		/* not sure if this is required, but adreno compiler seems
+		 * to always set register bank for 3rd src if unused:
+		 */
+		alu->src3_sel = 1;
+	}
+
+	if (instr->pred != IR_PRED_NONE) {
+		alu->pred_select = (instr->pred == IR_PRED_EQ) ? 3 : 2;
+	}
+
+	return 0;
+}
+
+static int instr_emit(struct ir_instruction *instr, uint32_t *dwords,
+		uint32_t idx, struct ir_shader_info *info)
+{
+	switch (instr->instr_type) {
+	case IR_FETCH: return instr_emit_fetch(instr, dwords, idx, info);
+	case IR_ALU:   return instr_emit_alu(instr, dwords, info);
+	}
+	return -1;
+}
+
+
+struct ir_register * ir_reg_create(struct ir_instruction *instr,
+		int num, const char *swizzle, int flags)
+{
+	struct ir_register *reg =
+			ir_alloc(instr->shader, sizeof(struct ir_register));
+	DEBUG_MSG("%x, %d, %s", flags, num, swizzle);
+	assert(num <= REG_MASK);
+	reg->flags = flags;
+	reg->num = num;
+	reg->swizzle = ir_strdup(instr->shader, swizzle);
+	assert(instr->regs_count < ARRAY_SIZE(instr->regs));
+	instr->regs[instr->regs_count++] = reg;
+	return reg;
+}
+
+static void reg_update_stats(struct ir_register *reg,
+		struct ir_shader_info *info, bool dest)
+{
+	if (!(reg->flags & (IR_REG_CONST|IR_REG_EXPORT))) {
+		info->max_reg = max(info->max_reg, reg->num);
+
+		if (dest) {
+			info->regs_written |= (1 << reg->num);
+		} else if (!(info->regs_written & (1 << reg->num))) {
+			/* for registers that haven't been written, they must be an
+			 * input register that the thread scheduler (presumably?)
+			 * needs to know about:
+			 */
+			info->max_input_reg = max(info->max_input_reg, reg->num);
+		}
+	}
+}
+
+static uint32_t reg_fetch_src_swiz(struct ir_register *reg, uint32_t n)
+{
+	uint32_t swiz = 0;
+	int i;
+
+	assert(reg->flags == 0);
+	assert(reg->swizzle);
+
+	DEBUG_MSG("fetch src R%d.%s", reg->num, reg->swizzle);
+
+	for (i = n-1; i >= 0; i--) {
+		swiz <<= 2;
+		switch (reg->swizzle[i]) {
+		default:
+			ERROR_MSG("invalid fetch src swizzle: %s", reg->swizzle);
+		case 'x': swiz |= 0x0; break;
+		case 'y': swiz |= 0x1; break;
+		case 'z': swiz |= 0x2; break;
+		case 'w': swiz |= 0x3; break;
+		}
+	}
+
+	return swiz;
+}
+
+static uint32_t reg_fetch_dst_swiz(struct ir_register *reg)
+{
+	uint32_t swiz = 0;
+	int i;
+
+	assert(reg->flags == 0);
+	assert(!reg->swizzle || (strlen(reg->swizzle) == 4));
+
+	DEBUG_MSG("fetch dst R%d.%s", reg->num, reg->swizzle);
+
+	if (reg->swizzle) {
+		for (i = 3; i >= 0; i--) {
+			swiz <<= 3;
+			switch (reg->swizzle[i]) {
+			default:
+				ERROR_MSG("invalid dst swizzle: %s", reg->swizzle);
+			case 'x': swiz |= 0x0; break;
+			case 'y': swiz |= 0x1; break;
+			case 'z': swiz |= 0x2; break;
+			case 'w': swiz |= 0x3; break;
+			case '0': swiz |= 0x4; break;
+			case '1': swiz |= 0x5; break;
+			case '_': swiz |= 0x7; break;
+			}
+		}
+	} else {
+		swiz = 0x688;
+	}
+
+	return swiz;
+}
+
+/* actually, a write-mask */
+static uint32_t reg_alu_dst_swiz(struct ir_register *reg)
+{
+	uint32_t swiz = 0;
+	int i;
+
+	assert((reg->flags & ~IR_REG_EXPORT) == 0);
+	assert(!reg->swizzle || (strlen(reg->swizzle) == 4));
+
+	DEBUG_MSG("alu dst R%d.%s", reg->num, reg->swizzle);
+
+	if (reg->swizzle) {
+		for (i = 3; i >= 0; i--) {
+			swiz <<= 1;
+			if (reg->swizzle[i] == "xyzw"[i]) {
+				swiz |= 0x1;
+			} else if (reg->swizzle[i] != '_') {
+				ERROR_MSG("invalid dst swizzle: %s", reg->swizzle);
+				break;
+			}
+		}
+	} else {
+		swiz = 0xf;
+	}
+
+	return swiz;
+}
+
+static uint32_t reg_alu_src_swiz(struct ir_register *reg)
+{
+	uint32_t swiz = 0;
+	int i;
+
+	assert((reg->flags & IR_REG_EXPORT) == 0);
+	assert(!reg->swizzle || (strlen(reg->swizzle) == 4));
+
+	DEBUG_MSG("vector src R%d.%s", reg->num, reg->swizzle);
+
+	if (reg->swizzle) {
+		for (i = 3; i >= 0; i--) {
+			swiz <<= 2;
+			switch (reg->swizzle[i]) {
+			default:
+				ERROR_MSG("invalid vector src swizzle: %s", reg->swizzle);
+			case 'x': swiz |= (0x0 - i) & 0x3; break;
+			case 'y': swiz |= (0x1 - i) & 0x3; break;
+			case 'z': swiz |= (0x2 - i) & 0x3; break;
+			case 'w': swiz |= (0x3 - i) & 0x3; break;
+			}
+		}
+	} else {
+		swiz = 0x0;
+	}
+
+	return swiz;
+}
diff --git a/src/gallium/drivers/freedreno/ir.h b/src/gallium/drivers/freedreno/ir.h
new file mode 100644
index 0000000..e802544
--- /dev/null
+++ b/src/gallium/drivers/freedreno/ir.h
@@ -0,0 +1,243 @@
+/*
+ * Copyright (c) 2012 Rob Clark <robdclark at gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef IR_H_
+#define IR_H_
+
+#include <stdint.h>
+#include <stdbool.h>
+
+#include "instr.h"
+
+/* low level intermediate representation of an adreno shader program */
+
+struct ir_shader;
+
+struct ir_shader * fd_asm_parse(const char *src);
+
+struct ir_shader_info {
+	uint16_t sizedwords;
+	int8_t   max_reg;   /* highest GPR # used by shader */
+	uint8_t  max_input_reg;
+	uint64_t regs_written;
+};
+
+struct ir_register {
+	enum {
+		IR_REG_CONST  = 0x1,
+		IR_REG_EXPORT = 0x2,
+		IR_REG_NEGATE = 0x4,
+		IR_REG_ABS    = 0x8,
+	} flags;
+	int num;
+	char *swizzle;
+};
+
+enum ir_pred {
+	IR_PRED_NONE = 0,
+	IR_PRED_EQ = 1,
+	IR_PRED_NE = 2,
+};
+
+struct ir_instruction {
+	struct ir_shader *shader;
+	enum {
+		IR_FETCH,
+		IR_ALU,
+	} instr_type;
+	enum ir_pred pred;
+	int sync;
+	unsigned regs_count;
+	struct ir_register *regs[5];
+	union {
+		/* FETCH specific: */
+		struct {
+			instr_fetch_opc_t opc;
+			unsigned const_idx;
+			/* maybe vertex fetch specific: */
+			unsigned const_idx_sel;
+			enum sq_surfaceformat fmt;
+			bool is_signed : 1;
+			bool is_normalized : 1;
+			uint32_t stride;
+			uint32_t offset;
+		} fetch;
+		/* ALU specific: */
+		struct {
+			instr_vector_opc_t vector_opc;
+			instr_scalar_opc_t scalar_opc;
+			bool vector_clamp : 1;
+			bool scalar_clamp : 1;
+		} alu;
+	};
+};
+
+struct ir_cf {
+	struct ir_shader *shader;
+	instr_cf_opc_t cf_type;
+
+	union {
+		/* EXEC/EXEC_END specific: */
+		struct {
+			unsigned instrs_count;
+			struct ir_instruction *instrs[6];
+			uint32_t addr, cnt, sequence;
+		} exec;
+		/* ALLOC specific: */
+		struct {
+			instr_alloc_type_t type;   /* SQ_POSITION or SQ_PARAMETER_PIXEL */
+			int size;
+		} alloc;
+	};
+};
+
+/* somewhat arbitrary limits.. */
+#define MAX_ATTRIBUTES 32
+#define MAX_CONSTS     32
+#define MAX_SAMPLERS   32
+#define MAX_UNIFORMS   32
+#define MAX_VARYINGS   32
+
+struct ir_attribute {
+	const char *name;
+	int rstart;         /* first register */
+	int num;            /* number of registers */
+};
+
+struct ir_const {
+	float val[4];
+	int cstart;         /* first const register */
+};
+
+struct ir_sampler {
+	const char *name;
+	int idx;
+};
+
+struct ir_uniform {
+	const char *name;
+	int cstart;         /* first const register */
+	int num;            /* number of const registers */
+};
+
+struct ir_varying {
+	const char *name;
+	int rstart;         /* first register */
+	int num;            /* number of registers */
+};
+
+struct ir_shader {
+	unsigned cfs_count;
+	struct ir_cf *cfs[0x56];
+	uint32_t heap[100 * 4096];
+	unsigned heap_idx;
+
+	enum ir_pred pred;  /* pred inherited by newly created instrs */
+
+	/* @ headers: */
+	uint32_t attributes_count;
+	struct ir_attribute *attributes[MAX_ATTRIBUTES];
+
+	uint32_t consts_count;
+	struct ir_const *consts[MAX_CONSTS];
+
+	uint32_t samplers_count;
+	struct ir_sampler *samplers[MAX_SAMPLERS];
+
+	uint32_t uniforms_count;
+	struct ir_uniform *uniforms[MAX_UNIFORMS];
+
+	uint32_t varyings_count;
+	struct ir_varying *varyings[MAX_VARYINGS];
+
+};
+
+struct ir_shader * ir_shader_create(void);
+void ir_shader_destroy(struct ir_shader *shader);
+void * ir_shader_assemble(struct ir_shader *shader,
+		struct ir_shader_info *info);
+
+struct ir_attribute * ir_attribute_create(struct ir_shader *shader,
+		int rstart, int num, const char *name);
+struct ir_const * ir_const_create(struct ir_shader *shader,
+		int cstart, float v0, float v1, float v2, float v3);
+struct ir_sampler * ir_sampler_create(struct ir_shader *shader,
+		int idx, const char *name);
+struct ir_uniform * ir_uniform_create(struct ir_shader *shader,
+		int cstart, int num, const char *name);
+struct ir_varying * ir_varying_create(struct ir_shader *shader,
+		int rstart, int num, const char *name);
+
+struct ir_cf * ir_cf_create(struct ir_shader *shader, instr_cf_opc_t cf_type);
+
+struct ir_instruction * ir_instr_create(struct ir_cf *cf, int instr_type);
+
+struct ir_register * ir_reg_create(struct ir_instruction *instr,
+		int num, const char *swizzle, int flags);
+
+/* some helper fxns: */
+
+static inline struct ir_cf *
+ir_cf_create_alloc(struct ir_shader *shader, instr_alloc_type_t type, int size)
+{
+	struct ir_cf *cf = ir_cf_create(shader, ALLOC);
+	if (!cf)
+		return cf;
+	cf->alloc.type = type;
+	cf->alloc.size = size;
+	return cf;
+}
+static inline struct ir_instruction *
+ir_instr_create_alu(struct ir_cf *cf, instr_vector_opc_t vop, instr_scalar_opc_t sop)
+{
+	struct ir_instruction *instr = ir_instr_create(cf, IR_ALU);
+	if (!instr)
+		return instr;
+	instr->alu.vector_opc = vop;
+	instr->alu.scalar_opc = sop;
+	return instr;
+}
+static inline struct ir_instruction *
+ir_instr_create_vtx_fetch(struct ir_cf *cf, int ci, int cis,
+		enum sq_surfaceformat fmt, bool is_signed, int stride)
+{
+	struct ir_instruction *instr = instr = ir_instr_create(cf, IR_FETCH);
+	instr->fetch.opc = VTX_FETCH;
+	instr->fetch.const_idx = ci;
+	instr->fetch.const_idx_sel = cis;
+	instr->fetch.fmt = fmt;
+	instr->fetch.is_signed = is_signed;
+	instr->fetch.stride = stride;
+	return instr;
+}
+static inline struct ir_instruction *
+ir_instr_create_tex_fetch(struct ir_cf *cf, int ci)
+{
+	struct ir_instruction *instr = instr = ir_instr_create(cf, IR_FETCH);
+	instr->fetch.opc = TEX_FETCH;
+	instr->fetch.const_idx = ci;
+	return instr;
+}
+
+
+#endif /* IR_H_ */
diff --git a/src/gallium/targets/dri-freedreno/Makefile.am b/src/gallium/targets/dri-freedreno/Makefile.am
new file mode 100644
index 0000000..59293a6
--- /dev/null
+++ b/src/gallium/targets/dri-freedreno/Makefile.am
@@ -0,0 +1,71 @@
+# Copyright © 2012 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+include $(top_srcdir)/src/gallium/Automake.inc
+
+AM_CFLAGS = \
+	$(GALLIUM_CFLAGS) \
+	$(PTHREAD_CFLAGS) \
+	$(LIBDRM_CFLAGS)
+AM_CPPFLAGS = \
+	-I$(top_srcdir)/src/gallium/drivers \
+	-I$(top_srcdir)/src/gallium/winsys \
+	-I$(top_srcdir)/src/mesa \
+	-I$(top_srcdir)/src/mapi \
+	-I$(top_builddir)/src/mesa/drivers/dri/common \
+	-DGALLIUM_RBUG \
+	-DGALLIUM_TRACE
+
+dridir = $(DRI_DRIVER_INSTALL_DIR)
+dri_LTLIBRARIES = kgsl_dri.la
+
+nodist_EXTRA_kgsl_dri_la_SOURCES = dummy.cpp
+kgsl_dri_la_SOURCES = \
+	target.c \
+	$(top_srcdir)/src/mesa/drivers/dri/common/utils.c \
+	$(top_srcdir)/src/mesa/drivers/dri/common/dri_util.c \
+	$(top_srcdir)/src/mesa/drivers/dri/common/xmlconfig.c
+
+kgsl_dri_la_LDFLAGS = -module -avoid-version -shared -no-undefined
+
+kgsl_dri_la_LIBADD = \
+	$(top_builddir)/src/mesa/libmesagallium.la \
+	$(top_builddir)/src/gallium/auxiliary/libgallium.la \
+	$(top_builddir)/src/gallium/state_trackers/dri/drm/libdridrm.la \
+	$(top_builddir)/src/gallium/winsys/freedreno/drm/libfreedrenodrm.la \
+	$(top_builddir)/src/gallium/drivers/freedreno/libfreedreno.la \
+	$(top_builddir)/src/gallium/drivers/trace/libtrace.la \
+	$(top_builddir)/src/gallium/drivers/rbug/librbug.la \
+	$(GALLIUM_DRI_LIB_DEPS) \
+	$(LIBDRM_LIBS) \
+	$(FREEDRENO_LIBS)
+
+if HAVE_MESA_LLVM
+kgsl_dri_la_LDFLAGS += $(LLVM_LDFLAGS)
+kgsl_dri_la_LIBADD += $(LLVM_LIBS)
+endif
+
+# Provide compatibility with scripts for the old Mesa build system for
+# a while by putting a link to the driver into /lib of the build tree.
+all-local: kgsl_dri.la
+	$(MKDIR_P) $(top_builddir)/$(LIB_DIR)/gallium
+	ln -f .libs/kgsl_dri.so $(top_builddir)/$(LIB_DIR)/gallium/kgsl_dri.so
diff --git a/src/gallium/targets/dri-freedreno/target.c b/src/gallium/targets/dri-freedreno/target.c
new file mode 100644
index 0000000..dcaf299
--- /dev/null
+++ b/src/gallium/targets/dri-freedreno/target.c
@@ -0,0 +1,20 @@
+
+#include "target-helpers/inline_debug_helper.h"
+#include "state_tracker/drm_driver.h"
+#include "freedreno/drm/freedreno_drm_public.h"
+
+static struct pipe_screen *
+create_screen(int fd)
+{
+   struct pipe_screen *screen;
+
+   screen = fd_drm_screen_create(fd);
+   if (!screen)
+      return NULL;
+
+   screen = debug_screen_wrap(screen);
+
+   return screen;
+}
+
+DRM_DRIVER_DESCRIPTOR("freedreno", "kgsl", create_screen, NULL)
diff --git a/src/gallium/targets/egl-static/Makefile.am b/src/gallium/targets/egl-static/Makefile.am
index 5c40ae8..31dbc79 100644
--- a/src/gallium/targets/egl-static/Makefile.am
+++ b/src/gallium/targets/egl-static/Makefile.am
@@ -191,6 +191,15 @@ egl_gallium_la_LIBADD += \
 	$(top_builddir)/src/gallium/drivers/svga/libsvga.la
 endif
 
+if HAVE_GALLIUM_FREEDRENO
+AM_CPPFLAGS += -D_EGL_PIPE_FREEDRENO=1
+egl_gallium_la_LIBADD += \
+	$(top_builddir)/src/gallium/winsys/freedreno/drm/libfreedrenodrm.la \
+	$(top_builddir)/src/gallium/drivers/freedreno/libfreedreno.la \
+	$(FREEDRENO_LIBS)
+
+endif
+
 if HAVE_GALLIUM_SOFTPIPE
 AM_CPPFLAGS += -DGALLIUM_SOFTPIPE -DGALLIUM_RBUG -DGALLIUM_TRACE
 egl_gallium_la_LIBADD += \
diff --git a/src/gallium/targets/egl-static/egl_pipe.c b/src/gallium/targets/egl-static/egl_pipe.c
index 407c6a8..e05490b 100644
--- a/src/gallium/targets/egl-static/egl_pipe.c
+++ b/src/gallium/targets/egl-static/egl_pipe.c
@@ -45,6 +45,8 @@
 /* for vmwgfx */
 #include "svga/drm/svga_drm_public.h"
 #include "svga/svga_public.h"
+/* for freedreno */
+#include "freedreno/drm/freedreno_drm_public.h"
 
 static struct pipe_screen *
 pipe_i915_create_screen(int fd)
@@ -179,6 +181,24 @@ pipe_vmwgfx_create_screen(int fd)
 #endif
 }
 
+static struct pipe_screen *
+pipe_freedreno_create_screen(int fd)
+{
+#if _EGL_PIPE_FREEDRENO
+   struct pipe_screen *screen;
+
+   screen = fd_drm_screen_create(fd);
+   if (!screen)
+      return NULL;
+
+   screen = debug_screen_wrap(screen);
+
+   return screen;
+#else
+   return NULL;
+#endif
+}
+
 struct pipe_screen *
 egl_pipe_create_drm_screen(const char *name, int fd)
 {
@@ -194,6 +214,8 @@ egl_pipe_create_drm_screen(const char *name, int fd)
       return pipe_radeonsi_create_screen(fd);
    else if (strcmp(name, "vmwgfx") == 0)
       return pipe_vmwgfx_create_screen(fd);
+   else if (strcmp(name, "kgsl") == 0)
+      return pipe_freedreno_create_screen(fd);
    else
       return NULL;
 }
diff --git a/src/gallium/winsys/freedreno/drm/.gitignore b/src/gallium/winsys/freedreno/drm/.gitignore
new file mode 100644
index 0000000..f3c7a7c
--- /dev/null
+++ b/src/gallium/winsys/freedreno/drm/.gitignore
@@ -0,0 +1 @@
+Makefile
diff --git a/src/gallium/winsys/freedreno/drm/Makefile.am b/src/gallium/winsys/freedreno/drm/Makefile.am
new file mode 100644
index 0000000..58f69d1
--- /dev/null
+++ b/src/gallium/winsys/freedreno/drm/Makefile.am
@@ -0,0 +1,32 @@
+# Copyright © 2012 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+include $(top_srcdir)/src/gallium/Automake.inc
+
+AM_CFLAGS = \
+	-I$(top_srcdir)/src/gallium/drivers \
+	$(GALLIUM_CFLAGS) \
+	$(FREEDRENO_CFLAGS)
+
+noinst_LTLIBRARIES = libfreedrenodrm.la
+
+libfreedrenodrm_la_SOURCES = freedreno_drm_winsys.c
diff --git a/src/gallium/winsys/freedreno/drm/freedreno_drm_public.h b/src/gallium/winsys/freedreno/drm/freedreno_drm_public.h
new file mode 100644
index 0000000..a7ba207
--- /dev/null
+++ b/src/gallium/winsys/freedreno/drm/freedreno_drm_public.h
@@ -0,0 +1,9 @@
+
+#ifndef __FREEDRENO_DRM_PUBLIC_H__
+#define __FREEDRENO_DRM_PUBLIC_H__
+
+struct pipe_screen;
+
+struct pipe_screen *fd_drm_screen_create(int drmFD);
+
+#endif
diff --git a/src/gallium/winsys/freedreno/drm/freedreno_drm_winsys.c b/src/gallium/winsys/freedreno/drm/freedreno_drm_winsys.c
new file mode 100644
index 0000000..8afb9cd
--- /dev/null
+++ b/src/gallium/winsys/freedreno/drm/freedreno_drm_winsys.c
@@ -0,0 +1,18 @@
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "util/u_format.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+
+#include "freedreno_drm_public.h"
+
+#include "freedreno/freedreno_screen.h"
+
+struct pipe_screen *
+fd_drm_screen_create(int fd)
+{
+	struct fd_device *dev = fd_device_new(fd);
+	if (!dev)
+		return NULL;
+	return fd_screen_create(dev);
+}
-- 
1.8.1.2



More information about the mesa-dev mailing list