[PATCH] Add EXA support

Connor Behan connor.behan at gmail.com
Thu Jul 5 17:51:13 PDT 2012


For real this time. This allows the r128 driver to continue having 2D
acceleration without XAA. Implemented hooks are Solid, Copy and
Composite. They appear to pass all rendercheck tests, except the
gradient test which XAA also fails. Tested on multiple color depths,
with and without DRI, with and without the composite extension. Hardware
cursor, Xvideo and page flipping are supported as well.
Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=47866

Signed-off-by: Connor Behan <connor.behan at gmail.com>
---
 configure.ac          |  98 +++++++
 src/Makefile.am       |   6 +-
 src/r128.h            |  84 ++++++
 src/r128_accel.c      |  29 ++-
 src/r128_cursor.c     |  56 +++-
 src/r128_dri.c        |  57 ++++-
 src/r128_driver.c     | 387 +++++++++++++++++++---------
 src/r128_exa.c        | 496 +++++++++++++++++++++++++++++++++++
 src/r128_exa_render.c | 697 ++++++++++++++++++++++++++++++++++++++++++++++++++
 src/r128_video.c      | 122 ++++++---
 10 files changed, 1851 insertions(+), 181 deletions(-)
 create mode 100644 src/r128_exa.c
 create mode 100644 src/r128_exa_render.c

diff --git a/configure.ac b/configure.ac
index 8049006..b861721 100644
--- a/configure.ac
+++ b/configure.ac
@@ -63,6 +63,11 @@ AC_ARG_ENABLE(dri, AS_HELP_STRING([--disable-dri],
               [DRI="$enableval"],
               [DRI=auto])
 
+AC_ARG_ENABLE(exa, AS_HELP_STRING([--disable-exa],
+				  [Disable EXA support [[default=enabled]]]),
+              [EXA="$enableval"],
+              [EXA=yes])
+
 # Store the list of server defined optional extensions in REQUIRED_MODULES
 XORG_DRIVER_CHECK_EXT(RANDR, randrproto)
 XORG_DRIVER_CHECK_EXT(RENDER, renderproto)
@@ -112,6 +117,39 @@ fi
 
 SAVE_CPPFLAGS="$CPPFLAGS"
 CPPFLAGS="$CPPFLAGS $XORG_CFLAGS"
+# Properly handle EXA.
+AC_MSG_CHECKING([whether to enable EXA support])
+if test "x$EXA" = xyes; then
+        AC_MSG_RESULT(yes)
+
+        SAVE_CPPFLAGS="$CPPFLAGS"
+        CPPFLAGS="$CPPFLAGS $XORG_CFLAGS"
+        AC_CHECK_HEADER(exa.h,
+                       [have_exa_h="yes"], [have_exa_h="no"])
+        CPPFLAGS="$SAVE_CPPFLAGS"
+else
+        AC_MSG_RESULT(no)
+fi 
+
+SAVE_CPPFLAGS="$CPPFLAGS"
+CPPFLAGS="$CPPFLAGS $XORG_CFLAGS"
+if test "x$have_exa_h" = xyes; then
+        AC_MSG_CHECKING([whether EXA version is at least 2.0.0])
+        AC_PREPROC_IFELSE([AC_LANG_PROGRAM([[
+#include "exa.h"
+#if EXA_VERSION_MAJOR < 2
+#error OLD EXA!
+#endif
+                          ]])],
+                          [USE_EXA=yes],
+                          [USE_EXA=no])
+        AC_MSG_RESULT($USE_EXA)
+
+        if test "x$USE_EXA" = xyes; then
+                AC_DEFINE(USE_EXA, 1, [Build support for Exa])
+        fi
+fi
+
 AC_CHECK_DECL(XSERVER_LIBPCIACCESS,
 	      [XSERVER_LIBPCIACCESS=yes],[XSERVER_LIBPCIACCESS=no],
 	      [#include "xorg-server.h"])
@@ -125,6 +163,66 @@ if test "x$XSERVER_LIBPCIACCESS" = xyes; then
 fi
 AM_CONDITIONAL(XSERVER_LIBPCIACCESS, test "x$XSERVER_LIBPCIACCESS" = xyes)
 
+# Checks for headers/macros for byte swapping
+# Known variants:
+#	<byteswap.h> bswap_16, bswap_32, bswap_64  (glibc)
+#	<sys/endian.h> __swap16, __swap32, __swap64 (OpenBSD)
+#	<sys/endian.h> bswap16, bswap32, bswap64 (other BSD's)
+#	and a fallback to local macros if none of the above are found
+
+# if <byteswap.h> is found, assume it's the correct version
+AC_CHECK_HEADERS([byteswap.h])
+
+# if <sys/endian.h> is found, have to check which version
+AC_CHECK_HEADER([sys/endian.h], [HAVE_SYS_ENDIAN_H="yes"], [HAVE_SYS_ENDIAN_H="no"])
+
+if test "x$HAVE_SYS_ENDIAN_H" = "xyes" ; then
+	AC_MSG_CHECKING([for __swap16 variant of <sys/endian.h> byteswapping macros])
+	AC_LINK_IFELSE([AC_LANG_PROGRAM([
+#include <sys/types.h>
+#include <sys/endian.h>
+ ], [
+int a = 1, b;
+b = __swap16(a);
+ ])
+], [SYS_ENDIAN__SWAP='yes'], [SYS_ENDIAN__SWAP='no'])
+	AC_MSG_RESULT([$SYS_ENDIAN__SWAP])
+
+	AC_MSG_CHECKING([for bswap16 variant of <sys/endian.h> byteswapping macros])
+	AC_LINK_IFELSE([AC_LANG_PROGRAM([
+#include <sys/types.h>
+#include <sys/endian.h>
+ ], [
+int a = 1, b;
+b = bswap16(a);
+ ])
+], [SYS_ENDIAN_BSWAP='yes'], [SYS_ENDIAN_BSWAP='no'])
+	AC_MSG_RESULT([$SYS_ENDIAN_BSWAP])
+
+    	if test "$SYS_ENDIAN_BSWAP" = "yes" ; then
+		USE_SYS_ENDIAN_H=yes
+		BSWAP=bswap
+	else	
+	    	if test "$SYS_ENDIAN__SWAP" = "yes" ; then
+			USE_SYS_ENDIAN_H=yes
+			BSWAP=__swap
+		else
+			USE_SYS_ENDIAN_H=no
+		fi
+	fi
+
+	if test "$USE_SYS_ENDIAN_H" = "yes" ; then
+	    AC_DEFINE([USE_SYS_ENDIAN_H], 1, 
+		[Define to use byteswap macros from <sys/endian.h>])
+	    AC_DEFINE_UNQUOTED([bswap_16], ${BSWAP}16, 
+			[Define to 16-bit byteswap macro])
+	    AC_DEFINE_UNQUOTED([bswap_32], ${BSWAP}32, 
+			[Define to 32-bit byteswap macro])
+	    AC_DEFINE_UNQUOTED([bswap_64], ${BSWAP}64, 
+			[Define to 64-bit byteswap macro])
+	fi
+fi
+
 AC_SUBST([moduledir])
 
 DRIVER_NAME=r128
diff --git a/src/Makefile.am b/src/Makefile.am
index f8bc8d1..4ee9725 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -30,6 +30,10 @@ if DRI
 R128_DRI_SRCS = r128_dri.c
 endif
 
+if USE_EXA
+R128_EXA_SRCS = r128_exa.c
+endif
+
 AM_CFLAGS = @XORG_CFLAGS@ @DRI_CFLAGS@
 
 r128_drv_la_LTLIBRARIES = r128_drv.la
@@ -37,7 +41,7 @@ r128_drv_la_LDFLAGS = -module -avoid-version
 r128_drv_ladir = @moduledir@/drivers
 r128_drv_la_SOURCES = \
 	r128_accel.c r128_cursor.c r128_dga.c r128_driver.c \
-	r128_video.c r128_misc.c r128_probe.c $(R128_DRI_SRCS)
+	r128_video.c r128_misc.c r128_probe.c $(R128_EXA_SRCS) $(R128_DRI_SRCS)
 
 EXTRA_DIST = \
         compat-api.h \
diff --git a/src/r128.h b/src/r128.h
index 3c51879..fc8b182 100644
--- a/src/r128.h
+++ b/src/r128.h
@@ -43,6 +43,11 @@
 				/* PCI support */
 #include "xf86Pci.h"
 
+				/* EXA support */
+#ifdef USE_EXA
+#include "exa.h"
+#endif
+
 				/* XAA and Cursor Support */
 #include "xaa.h"
 #include "xf86Cursor.h"
@@ -70,6 +75,36 @@
 
 #include "r128_probe.h"
 
+#if HAVE_BYTESWAP_H
+#include <byteswap.h>
+#elif defined(USE_SYS_ENDIAN_H)
+#include <sys/endian.h>
+#else
+#define bswap_16(value)  \
+        ((((value) & 0xff) << 8) | ((value) >> 8))
+
+#define bswap_32(value) \
+        (((uint32_t)bswap_16((uint16_t)((value) & 0xffff)) << 16) | \
+        (uint32_t)bswap_16((uint16_t)((value) >> 16)))
+ 
+#define bswap_64(value) \
+        (((uint64_t)bswap_32((uint32_t)((value) & 0xffffffff)) \
+            << 32) | \
+        (uint64_t)bswap_32((uint32_t)((value) >> 32)))
+#endif
+
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+#define le32_to_cpu(x) bswap_32(x)
+#define le16_to_cpu(x) bswap_16(x)
+#define cpu_to_le32(x) bswap_32(x)
+#define cpu_to_le16(x) bswap_16(x)
+#else
+#define le32_to_cpu(x) (x)
+#define le16_to_cpu(x) (x)
+#define cpu_to_le32(x) (x)
+#define cpu_to_le16(x) (x)
+#endif
+
 #define R128_DEBUG          0   /* Turn off debugging output               */
 #define R128_IDLE_RETRY    32   /* Fall out of idle loops after this count */
 #define R128_TIMEOUT  2000000   /* Fall out of wait loops after this count */
@@ -78,6 +113,8 @@
 #define R128_VBIOS_SIZE 0x00010000
 
 #if R128_DEBUG
+#include "r128_version.h"
+
 #define R128TRACE(x)                                          \
     do {                                                      \
 	ErrorF("(**) %s(%d): ", R128_NAME, pScrn->scrnIndex); \
@@ -228,6 +265,36 @@ typedef enum
     MT_STV
 } R128MonitorType;
 
+#ifdef USE_EXA
+struct r128_2d_state {
+    Bool in_use;
+    Bool composite_setup;   
+    uint32_t dst_pitch_offset;
+    uint32_t src_pitch_offset;
+    uint32_t dp_gui_master_cntl;
+    uint32_t dp_cntl;
+    uint32_t dp_write_mask;
+    uint32_t dp_brush_frgd_clr;
+    uint32_t dp_brush_bkgd_clr;
+    uint32_t dp_src_frgd_clr;
+    uint32_t dp_src_bkgd_clr;
+    uint32_t default_sc_bottom_right;
+#ifdef R128DRI
+#ifdef RENDER
+    Bool has_mask;
+    int x_offset;
+    int y_offset;
+    int widths[2];
+    int heights[2];
+    Bool is_transform[2];
+    PictTransform *transform[2];
+    PixmapPtr src_pix;
+    PixmapPtr msk_pix;
+#endif
+#endif
+};
+#endif
+
 typedef struct {
     EntityInfoPtr     pEnt;
     pciVideoPtr       PciInfo;
@@ -278,6 +345,15 @@ typedef struct {
 
     XAAInfoRecPtr     accel;
     Bool              accelOn;
+
+    Bool	      useEXA;
+    Bool	      RenderAccel;
+#ifdef USE_EXA
+    ExaDriverPtr      ExaDriver;
+    XF86ModReqInfo    exaReq;
+    struct r128_2d_state state_2d;
+#endif
+
     xf86CursorInfoPtr cursor;
     unsigned long     cursor_start;
     unsigned long     cursor_end;
@@ -522,6 +598,14 @@ extern void        R128CCEReleaseIndirect(ScrnInfoPtr pScrn);
 extern void        R128CCEWaitForIdle(ScrnInfoPtr pScrn);
 extern int         R128CCEStop(ScrnInfoPtr pScrn);
 
+#ifdef USE_EXA
+extern Bool	   R128ExaInit(ScreenPtr pScreen);
+extern Bool	   R128GetDatatypeBpp(int bpp, uint32_t *type);
+extern Bool	   R128GetPixmapOffsetPitch(PixmapPtr pPix, uint32_t *pitch_offset);
+extern void	   R128DoPrepareCopy(ScrnInfoPtr pScrn, uint32_t src_pitch_offset,
+				    uint32_t dst_pitch_offset, uint32_t datatype, int alu, Pixel planemask);
+#endif
+
 
 #define CCE_PACKET0( reg, n )						\
 	(R128_CCE_PACKET0 | ((n) << 16) | ((reg) >> 2))
diff --git a/src/r128_accel.c b/src/r128_accel.c
index 7708bda..26413d1 100644
--- a/src/r128_accel.c
+++ b/src/r128_accel.c
@@ -1866,8 +1866,33 @@ Bool R128AccelInit(ScreenPtr pScreen)
     R128InfoPtr   info  = R128PTR(pScrn);
     XAAInfoRecPtr a;
 
-    if (!xf86LoadSubModule(pScrn, "xaa"))
-	return FALSE;
+#ifdef USE_EXA
+    if (info->useEXA) {
+        int errmaj = 0, errmin = 0;
+
+        info->exaReq.majorversion = EXA_VERSION_MAJOR;
+        info->exaReq.minorversion = EXA_VERSION_MINOR;
+
+        xf86DrvMsg(pScrn->scrnIndex,X_INFO,"Loading EXA module...\n");
+        if (!LoadSubModule(pScrn->module, "exa", NULL, NULL, NULL, &info->exaReq, &errmaj, &errmin)) {
+            LoaderErrorMsg(NULL, "exa", errmaj, errmin);
+            return FALSE;
+        }
+	
+	/* Don't init EXA here because it'll be taken care of in mm init */
+	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Allocating EXA driver...\n");
+	info->ExaDriver = exaDriverAlloc();
+	if (!info->ExaDriver) {
+	    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Could not allocate EXA driver...\n");
+	    info->accel = FALSE;
+	}
+	
+	return TRUE;
+    } else
+#endif
+    {
+        if (!xf86LoadSubModule(pScrn, "xaa")) return FALSE;
+    }
 
     if (!(a = info->accel = XAACreateInfoRec())) return FALSE;
 
diff --git a/src/r128_cursor.c b/src/r128_cursor.c
index 62d277d..548b9d2 100644
--- a/src/r128_cursor.c
+++ b/src/r128_cursor.c
@@ -54,6 +54,11 @@
 				/* X and server generic header files */
 #include "xf86.h"
 
+				/* Because for EXA we need to use a different allocator */
+#ifdef USE_EXA
+#include "exa.h"
+#endif
+
 #if X_BYTE_ORDER == X_BIG_ENDIAN
 #define P_SWAP32( a , b )                \
        ((char *)a)[0] = ((char *)b)[3];  \
@@ -253,10 +258,17 @@ Bool R128CursorInit(ScreenPtr pScreen)
     ScrnInfoPtr           pScrn   = xf86ScreenToScrn(pScreen);
     R128InfoPtr           info    = R128PTR(pScrn);
     xf86CursorInfoPtr     cursor;
-    FBAreaPtr             fbarea;
+    FBAreaPtr             fbarea  = NULL;
+#ifdef USE_EXA
+    ExaOffscreenArea*	  osArea  = NULL;
+#endif
     int                   width;
     int                   height;
     int                   size;
+    int                   cpp = info->CurrentLayout.pixel_bytes;
+    int                   width_bytes = (pScrn->displayWidth *
+					 info->CurrentLayout.pixel_bytes);
+    int                   x1, x2, y1, y2;
 
 
     if (!(cursor = info->cursor = xf86CreateCursorInfoRec())) return FALSE;
@@ -284,24 +296,40 @@ Bool R128CursorInit(ScreenPtr pScreen)
     size                      = (cursor->MaxWidth/4) * cursor->MaxHeight;
     width                     = pScrn->displayWidth;
     height                    = (size*2 + 1023) / pScrn->displayWidth;
-    fbarea                    = xf86AllocateOffscreenArea(pScreen,
-							  width,
-							  height,
-							  16,
-							  NULL,
-							  NULL,
-							  NULL);
-
-    if (!fbarea) {
+    
+    if(!info->useEXA) {
+	fbarea = xf86AllocateOffscreenArea(pScreen, width, height,
+					   16, NULL, NULL, NULL);
+
+	if (fbarea) {
+	    x1 = fbarea->box.x1;
+	    x2 = fbarea->box.x2;
+	    y1 = fbarea->box.y1;
+	    y2 = fbarea->box.y2;
+	}
+    } 
+#ifdef USE_EXA    
+    else {
+	osArea = exaOffscreenAlloc(pScreen, width * height, 16,
+				   TRUE, NULL, NULL);
+
+	if (osArea) {
+	    x1 = osArea->offset % width_bytes;
+	    x2 = (osArea->offset + osArea->size) % width_bytes;
+	    y1 = osArea->offset / width_bytes;
+	    y2 = (osArea->offset + osArea->size) / width_bytes;
+	}
+    }
+#endif
+
+    if ((!info->useEXA && !fbarea) || (info->useEXA && !osArea)) {
 	info->cursor_start    = 0;
 	xf86DrvMsg(pScrn->scrnIndex, X_WARNING,
 		   "Hardware cursor disabled"
 		   " due to insufficient offscreen memory\n");
     } else {
-	info->cursor_start    = R128_ALIGN((fbarea->box.x1
-					    + width * fbarea->box.y1)
-					   * info->CurrentLayout.pixel_bytes, 16);
-	info->cursor_end      = info->cursor_start + size;
+	info->cursor_start = x1 * cpp + y1 * pScrn->virtualX * cpp;
+	info->cursor_end = x2 * cpp + y2 * pScrn->virtualX * cpp;
     }
 
     R128TRACE(("R128CursorInit (0x%08x-0x%08x)\n",
diff --git a/src/r128_dri.c b/src/r128_dri.c
index 4f318ef..f0d621b 100644
--- a/src/r128_dri.c
+++ b/src/r128_dri.c
@@ -305,6 +305,9 @@ static void R128EnterServer(ScreenPtr pScreen)
     R128InfoPtr info = R128PTR(pScrn);
 
     if (info->accel) info->accel->NeedToSync = TRUE;
+#ifdef USE_EXA
+    if (info->ExaDriver) exaMarkSync(pScreen);
+#endif
 }
 
 /* Called when the X server goes to sleep to allow the X server's
@@ -331,6 +334,9 @@ static void R128LeaveServer(ScreenPtr pScreen)
 
 	info->CCEInUse = FALSE;
     }
+#ifdef USE_EXA
+    if (info->ExaDriver) exaMarkSync(pScreen);
+#endif
 }
 
 /* Contexts can be swapped by the X server if necessary.  This callback
@@ -1389,6 +1395,7 @@ static void R128DRIRefreshArea(ScrnInfoPtr pScrn, int num, BoxPtr pbox)
     R128InfoPtr         info       = R128PTR(pScrn);
     int                 i;
     R128SAREAPrivPtr    pSAREAPriv = DRIGetSAREAPrivate(pScrn->pScreen);
+    PixmapPtr		pPix	   = pScrn->pScreen->GetScreenPixmap(pScrn->pScreen);
 
     /* Don't want to do this when no 3d is active and pages are
      * right-way-round
@@ -1396,20 +1403,41 @@ static void R128DRIRefreshArea(ScrnInfoPtr pScrn, int num, BoxPtr pbox)
     if (!pSAREAPriv->pfAllowPageFlip && pSAREAPriv->pfCurrentPage == 0)
 	return;
 
-    (*info->accel->SetupForScreenToScreenCopy)(pScrn,
+    if (!info->useEXA) {
+        (*info->accel->SetupForScreenToScreenCopy)(pScrn,
 					       1, 1, GXcopy,
 					       (CARD32)(-1), -1);
+    }
+#ifdef USE_EXA 
+    else {
+        CARD32 src_pitch_offset, dst_pitch_offset, datatype;
+	
+	R128GetPixmapOffsetPitch(pPix, &src_pitch_offset);
+	dst_pitch_offset = src_pitch_offset + (info->backOffset >> 5);
+	R128GetDatatypeBpp(pScrn->bitsPerPixel, &datatype);
+	info->xdir = info->ydir = 1;
+	
+        R128DoPrepareCopy(pScrn, src_pitch_offset, dst_pitch_offset, datatype, GXcopy, ~0);
+    }
+#endif
 
     for (i = 0 ; i < num ; i++, pbox++) {
 	int xa = max(pbox->x1, 0), xb = min(pbox->x2, pScrn->virtualX-1);
 	int ya = max(pbox->y1, 0), yb = min(pbox->y2, pScrn->virtualY-1);
 
 	if (xa <= xb && ya <= yb) {
-	    (*info->accel->SubsequentScreenToScreenCopy)(pScrn, xa, ya,
+	    if (!info->useEXA) {
+	        (*info->accel->SubsequentScreenToScreenCopy)(pScrn, xa, ya,
 							 xa + info->backX,
 							 ya + info->backY,
 							 xb - xa + 1,
 							 yb - ya + 1);
+	    }
+#ifdef USE_EXA
+	    else {
+	        (*info->ExaDriver->Copy)(pPix, xa, ya, xa, ya, xb - xa + 1, yb - ya + 1);
+	    }
+#endif
 	}
     }
 }
@@ -1419,20 +1447,37 @@ static void R128EnablePageFlip(ScreenPtr pScreen)
     ScrnInfoPtr         pScrn      = xf86ScreenToScrn(pScreen);
     R128InfoPtr         info       = R128PTR(pScrn);
     R128SAREAPrivPtr    pSAREAPriv = DRIGetSAREAPrivate(pScreen);
+    PixmapPtr		pPix	   = pScreen->GetScreenPixmap(pScreen);
 
     if (info->allowPageFlip) {
 	/* Duplicate the frontbuffer to the backbuffer */
-	(*info->accel->SetupForScreenToScreenCopy)(pScrn,
+	if (!info->useEXA) {
+	    (*info->accel->SetupForScreenToScreenCopy)(pScrn,
 						   1, 1, GXcopy,
 						   (CARD32)(-1), -1);
 
-	(*info->accel->SubsequentScreenToScreenCopy)(pScrn,
+	    (*info->accel->SubsequentScreenToScreenCopy)(pScrn,
 						     0,
 						     0,
 						     info->backX,
 						     info->backY,
 						     pScrn->virtualX,
 						     pScrn->virtualY);
+	}
+#ifdef USE_EXA 
+	else {
+	    CARD32 src_pitch_offset, dst_pitch_offset, datatype;
+	
+	    R128GetPixmapOffsetPitch(pPix, &src_pitch_offset);
+	    dst_pitch_offset = src_pitch_offset + (info->backOffset >> 5);
+	    R128GetDatatypeBpp(pScrn->bitsPerPixel, &datatype);
+	    info->xdir = info->ydir = 1;
+	
+            R128DoPrepareCopy(pScrn, src_pitch_offset, dst_pitch_offset, datatype, GXcopy, ~0);
+	    
+	    (*info->ExaDriver->Copy)(pPix, 0, 0, 0, 0, pScrn->virtualX, pScrn->virtualY);
+	}
+#endif
 
 	pSAREAPriv->pfAllowPageFlip = 1;
     }
@@ -1471,6 +1516,10 @@ static void R128DRITransitionTo3d(ScreenPtr pScreen)
 
     if (info->cursor_start)
         xf86ForceHWCursor(pScreen, TRUE);
+
+#ifdef USE_EXA
+    info->state_2d.composite_setup = FALSE;
+#endif
 }
 
 static void R128DRITransitionTo2d(ScreenPtr pScreen)
diff --git a/src/r128_driver.c b/src/r128_driver.c
index bfa3dfd..877170c 100644
--- a/src/r128_driver.c
+++ b/src/r128_driver.c
@@ -156,7 +156,9 @@ typedef enum {
   OPTION_FBDEV,
   OPTION_VIDEO_KEY,
   OPTION_SHOW_CACHE,
-  OPTION_VGA_ACCESS
+  OPTION_VGA_ACCESS,
+  OPTION_ACCELMETHOD,
+  OPTION_RENDERACCEL
 } R128Opts;
 
 static const OptionInfoRec R128Options[] = {
@@ -184,6 +186,8 @@ static const OptionInfoRec R128Options[] = {
   { OPTION_VIDEO_KEY,    "VideoKey",         OPTV_INTEGER, {0}, FALSE },
   { OPTION_SHOW_CACHE,   "ShowCache",        OPTV_BOOLEAN, {0}, FALSE },
   { OPTION_VGA_ACCESS,   "VGAAccess",        OPTV_BOOLEAN, {0}, TRUE  },
+  { OPTION_ACCELMETHOD,  "AccelMethod",      OPTV_STRING,  {0}, FALSE },
+  { OPTION_RENDERACCEL,  "RenderAccel",      OPTV_BOOLEAN, {0}, FALSE },
   { -1,                  NULL,               OPTV_NONE,    {0}, FALSE }
 };
 
@@ -2185,16 +2189,83 @@ R128BlockHandler(BLOCKHANDLER_ARGS_DECL)
     }
 }
 
+#ifdef USE_EXA
+Bool R128VerboseInitEXA(ScreenPtr pScreen)
+{
+    ScrnInfoPtr pScrn  = xf86ScreenToScrn(pScreen);
+    R128InfoPtr info   = R128PTR(pScrn);
+
+    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Going to init EXA...\n");
+
+    if (R128EXAInit(pScreen)) {
+	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXA Acceleration enabled\n");
+	info->accelOn = TRUE;
+    } else {
+	xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
+		   "EXA Acceleration initialization failed\n");
+	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXA Acceleration disabled\n");
+	info->accelOn = FALSE;
+	
+	return FALSE;
+    }
+}
+#endif
+
+void R128VerboseInitAccel(Bool noAccel, ScreenPtr pScreen)
+{
+    ScrnInfoPtr pScrn  = xf86ScreenToScrn(pScreen);
+    R128InfoPtr info   = R128PTR(pScrn);
+    
+    if (!noAccel) {
+	if (R128AccelInit(pScreen)) {
+	    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Acceleration enabled\n");
+	    info->accelOn = TRUE;
+	} else {
+	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
+		       "Acceleration initialization failed\n");
+	    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Acceleration disabled\n");
+	    info->accelOn = FALSE;
+	}
+    } else {
+	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Acceleration disabled\n");
+	info->accelOn = FALSE;
+    }
+}
+
 /* Called at the start of each server generation. */
 Bool R128ScreenInit(SCREEN_INIT_ARGS_DECL)
 {
-    ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
+    ScrnInfoPtr pScrn  = xf86ScreenToScrn(pScreen);
     R128InfoPtr info   = R128PTR(pScrn);
     BoxRec      MemBox;
-    int		y2;
+    int width_bytes = (pScrn->displayWidth *
+			   info->CurrentLayout.pixel_bytes);
+    int         x1, x2, y1, y2;
     Bool	noAccel;
-
+#ifdef USE_EXA
+    ExaOffscreenArea*     osArea;
+#endif
+    char *optstr;
+    
     R128TRACE(("R128ScreenInit %x %d\n", pScrn->memPhysBase, pScrn->fbOffset));
+    info->useEXA = FALSE;
+
+#ifdef USE_EXA	
+    optstr = (char *)xf86GetOptValString(info->Options, OPTION_ACCELMETHOD);
+    if (optstr != NULL) {
+	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "AccelMethod option found\n");
+	if (xf86NameCmp(optstr, "EXA") == 0) {
+	    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "AccelMethod is set to EXA, turning EXA on\n");
+	    info->useEXA = TRUE;
+	}
+    }
+#ifdef RENDER
+    info->RenderAccel = xf86ReturnOptValBool(info->Options, OPTION_RENDERACCEL, TRUE);
+    if (info->RenderAccel)
+        xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Acceleration of RENDER operations will be enabled"
+					     "upon successful loading of DRI and EXA\n");
+#endif
+#endif
 
 #ifdef R128DRI
 				/* Turn off the CCE for now. */
@@ -2220,7 +2291,7 @@ Bool R128ScreenInit(SCREEN_INIT_ARGS_DECL)
     } else {
 	if (!R128ModeInit(pScrn, pScrn->currentMode)) return FALSE;
     }
-
+    
     R128SaveScreen(pScreen, SCREEN_SAVER_ON);
     pScrn->AdjustFrame(ADJUST_FRAME_ARGS(pScrn, pScrn->frameX0, pScrn->frameY0));
 
@@ -2244,8 +2315,6 @@ Bool R128ScreenInit(SCREEN_INIT_ARGS_DECL)
 	/* FIXME: When we move to dynamic allocation of back and depth
 	   buffers, we will want to revisit the following check for 3
 	   times the virtual size of the screen below. */
-	int width_bytes = (pScrn->displayWidth *
-			   info->CurrentLayout.pixel_bytes);
 	int maxy        = info->FbMapSize / width_bytes;
 
 	if (noAccel) {
@@ -2322,8 +2391,6 @@ Bool R128ScreenInit(SCREEN_INIT_ARGS_DECL)
 #ifdef R128DRI
     if (info->directRenderingEnabled) {
 	FBAreaPtr fbarea;
-	int width_bytes = (pScrn->displayWidth *
-			   info->CurrentLayout.pixel_bytes);
 	int cpp = info->CurrentLayout.pixel_bytes;
 	int bufferSize = pScrn->virtualY * width_bytes;
 	int l, total;
@@ -2386,7 +2453,7 @@ Bool R128ScreenInit(SCREEN_INIT_ARGS_DECL)
 	    info->textureOffset = 0;
 	    info->textureSize = 0;
 	}
-
+	
 	total = info->FbMapSize - info->textureSize;
 	scanlines = total / width_bytes;
 	if (scanlines > 8191) scanlines = 8191;
@@ -2401,50 +2468,101 @@ Bool R128ScreenInit(SCREEN_INIT_ARGS_DECL)
 	MemBox.x2 = pScrn->displayWidth;
 	MemBox.y2 = scanlines;
 
-	if (!xf86InitFBManager(pScreen, &MemBox)) {
-	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
-		       "Memory manager initialization to (%d,%d) (%d,%d) failed\n",
-		       MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
-	    return FALSE;
-	} else {
-	    int width, height;
-
-	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
-		       "Memory manager initialized to (%d,%d) (%d,%d)\n",
-		       MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
-	    if ((fbarea = xf86AllocateOffscreenArea(pScreen,
-						    pScrn->displayWidth,
-						    2, 0, NULL, NULL, NULL))) {
-		xf86DrvMsg(pScrn->scrnIndex, X_INFO,
-			   "Reserved area from (%d,%d) to (%d,%d)\n",
-			   fbarea->box.x1, fbarea->box.y1,
-			   fbarea->box.x2, fbarea->box.y2);
+	if (!info->useEXA) {
+	    if (!xf86InitFBManager(pScreen, &MemBox)) {
+	        xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
+		           "Memory manager initialization to (%d,%d) (%d,%d) failed\n",
+		           MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
+	        return FALSE;
 	    } else {
-		xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "Unable to reserve area\n");
-	    }
-	    if (xf86QueryLargestOffscreenArea(pScreen, &width,
-					      &height, 0, 0, 0)) {
-		xf86DrvMsg(pScrn->scrnIndex, X_INFO,
-			   "Largest offscreen area available: %d x %d\n",
-			   width, height);
+	        int width, height;
+
+	        xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+		           "Memory manager initialized to (%d,%d) (%d,%d)\n",
+		           MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
+	        if ((fbarea = xf86AllocateOffscreenArea(pScreen,
+						        pScrn->displayWidth,
+						        2, 0, NULL, NULL, NULL))) {
+		    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+			       "Reserved area from (%d,%d) to (%d,%d)\n",
+			       fbarea->box.x1, fbarea->box.y1,
+			       fbarea->box.x2, fbarea->box.y2);
+	        } else {
+		    xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "Unable to reserve area\n");
+	        }
+	        if (xf86QueryLargestOffscreenArea(pScreen, &width,
+			  		          &height, 0, 0, 0)) {
+		    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+			       "Largest offscreen area available: %d x %d\n",
+		  	       width, height);
+	        }
+		
+		R128VerboseInitAccel(noAccel, pScreen);
 	    }
+	} 
+#ifdef USE_EXA
+	else {
+	    xf86DrvMsg(pScrn->scrnIndex, X_INFO, 
+		       "Filling in EXA memory info\n");
+		       
+	    R128VerboseInitAccel(noAccel, pScreen);
+	    info->ExaDriver->offScreenBase = pScrn->virtualY * width_bytes;
+
+	    xf86DrvMsg(pScrn->scrnIndex, X_INFO, 
+		       "Filled in offs\n");
+
+	    /* Don't give EXA the true full memory size, because the
+	       textureSize sized chunk on the end is handled by DRI */
+	    info->ExaDriver->memorySize = total;
+
+	    R128VerboseInitEXA(pScreen);
 	}
+#endif
 
-				/* Allocate the shared back buffer */
-	if ((fbarea = xf86AllocateOffscreenArea(pScreen,
-						pScrn->virtualX,
-						pScrn->virtualY,
-						32, NULL, NULL, NULL))) {
+				/* Allocate the shared back buffer */	
+	if(!info->useEXA) {
+	    fbarea = xf86AllocateOffscreenArea(pScreen,
+					       pScrn->virtualX,
+					       pScrn->virtualY,
+					       32, NULL, NULL, NULL);
+
+	    if (fbarea) {
+		x1 = fbarea->box.x1;
+		x2 = fbarea->box.x2;
+		y1 = fbarea->box.y1;
+		y2 = fbarea->box.y2;
+	    }
+	} 
+#ifdef USE_EXA
+	else {
 	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
-		       "Reserved back buffer from (%d,%d) to (%d,%d)\n",
-		       fbarea->box.x1, fbarea->box.y1,
-		       fbarea->box.x2, fbarea->box.y2);
-
-	    info->backX = fbarea->box.x1;
-	    info->backY = fbarea->box.y1;
-	    info->backOffset = (fbarea->box.y1 * width_bytes +
-				fbarea->box.x1 * cpp);
+		       "Actually trying an EXA allocation...\n");
+	    osArea = exaOffscreenAlloc(pScreen,
+				       pScrn->virtualY * width_bytes,
+				       32, TRUE, NULL, NULL);
+
+	    if (osArea) {
+		x1 = osArea->offset % width_bytes;
+		x2 = (osArea->offset + osArea->size) % width_bytes;
+		y1 = osArea->offset / width_bytes;
+		y2 = (osArea->offset + osArea->size) / width_bytes;
+
+		xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Went swimmingly...\n");
+	    }
+	}
+#endif
+	
+	if ((!info->useEXA && fbarea) || (info->useEXA && osArea)) {
+	    /* info->backOffset = y1 * width_bytes + x1 * cpp; */
+	    info->backOffset = R128_ALIGN(y1 * width_bytes + x1 * cpp, 16);
+	    info->backX = info->backOffset % width_bytes;
+	    info->backY = info->backOffset / width_bytes;
 	    info->backPitch = pScrn->displayWidth;
+
+	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+		       "Reserved back buffer from (%d,%d) to (%d,%d) offset: %x\n",
+		       x1, y1,
+		       x2, y2, info->backOffset);
 	} else {
 	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "Unable to reserve back buffer\n");
 	    info->backX = -1;
@@ -2454,25 +2572,49 @@ Bool R128ScreenInit(SCREEN_INIT_ARGS_DECL)
 	}
 
 				/* Allocate the shared depth buffer */
-	if ((fbarea = xf86AllocateOffscreenArea(pScreen,
-						pScrn->virtualX,
-						pScrn->virtualY + 1,
-						32, NULL, NULL, NULL))) {
-	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
-		       "Reserved depth buffer from (%d,%d) to (%d,%d)\n",
-		       fbarea->box.x1, fbarea->box.y1,
-		       fbarea->box.x2, fbarea->box.y2);
-
-	    info->depthX = fbarea->box.x1;
-	    info->depthY = fbarea->box.y1;
-	    info->depthOffset = (fbarea->box.y1 * width_bytes +
-				 fbarea->box.x1 * cpp);
+	if(!info->useEXA) {
+	    fbarea = xf86AllocateOffscreenArea(pScreen,
+					       pScrn->virtualX,
+					       pScrn->virtualY + 1,
+					       32, NULL, NULL, NULL);
+	    if (fbarea) {
+		x1 = fbarea->box.x1;
+		x2 = fbarea->box.x2;
+		y1 = fbarea->box.y1;
+		y2 = fbarea->box.y2;
+	    }
+	} 
+#ifdef USE_EXA
+	else {
+	    osArea = exaOffscreenAlloc(pScreen,
+				       (pScrn->virtualY + 1) * width_bytes,
+				       32, TRUE, NULL, NULL);
+
+	    if (osArea) {
+		x1 = osArea->offset % width_bytes;
+		x2 = (osArea->offset + osArea->size) % width_bytes;
+		y1 = osArea->offset / width_bytes;
+		y2 = (osArea->offset + osArea->size) / width_bytes;
+	    }
+	}
+#endif
+
+	if ((!info->useEXA && fbarea) || (info->useEXA && osArea)) {
+	    /* info->depthOffset = y1 * width_bytes + x1 * cpp; */
+	    info->depthOffset = R128_ALIGN(y1 * width_bytes + x1 * cpp, 16);
+	    info->depthX = info->depthOffset % width_bytes;
+	    info->depthY = info->depthOffset / width_bytes;
 	    info->depthPitch = pScrn->displayWidth;
-	    info->spanOffset = ((fbarea->box.y2 - 1) * width_bytes +
-				fbarea->box.x1 * cpp);
+	    info->spanOffset = (y2 - 1) * width_bytes + x1 * cpp;
+
+	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+		       "Reserved depth buffer from (%d,%d) to (%d,%d) offset: %x\n",
+		       x1, y1,
+		       x2, y2, info->depthOffset);
+
 	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
 		       "Reserved depth span from (%d,%d) offset 0x%x\n",
-		       fbarea->box.x1, fbarea->box.y2 - 1, info->spanOffset);
+		       x1, y2 - 1, info->spanOffset);
 	} else {
 	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "Unable to reserve depth buffer\n");
 	    info->depthX = -1;
@@ -2487,7 +2629,7 @@ Bool R128ScreenInit(SCREEN_INIT_ARGS_DECL)
 		   info->textureSize/1024, info->textureOffset);
     }
     else
-#endif
+#endif /* R128DRI */
     {
 	MemBox.x1 = 0;
 	MemBox.y1 = 0;
@@ -2501,50 +2643,51 @@ Bool R128ScreenInit(SCREEN_INIT_ARGS_DECL)
 	if (y2 > 8191) y2 = 8191;
 	MemBox.y2 = y2;
 
-	if (!xf86InitFBManager(pScreen, &MemBox)) {
-	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
-		       "Memory manager initialization to (%d,%d) (%d,%d) failed\n",
-		       MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
-	    return FALSE;
-	} else {
-	    int       width, height;
-	    FBAreaPtr fbarea;
-
-	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
-		       "Memory manager initialized to (%d,%d) (%d,%d)\n",
-		       MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
-	    if ((fbarea = xf86AllocateOffscreenArea(pScreen, pScrn->displayWidth,
-						    2, 0, NULL, NULL, NULL))) {
-		xf86DrvMsg(pScrn->scrnIndex, X_INFO,
-			   "Reserved area from (%d,%d) to (%d,%d)\n",
-			   fbarea->box.x1, fbarea->box.y1,
-			   fbarea->box.x2, fbarea->box.y2);
+	if (!info->useEXA) {
+	    if (!xf86InitFBManager(pScreen, &MemBox)) {
+	        xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
+		           "Memory manager initialization to (%d,%d) (%d,%d) failed\n",
+		           MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
+	        return FALSE;
 	    } else {
-		xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "Unable to reserve area\n");
-	    }
-	    if (xf86QueryLargestOffscreenArea(pScreen, &width, &height,
-					      0, 0, 0)) {
-		xf86DrvMsg(pScrn->scrnIndex, X_INFO,
-			   "Largest offscreen area available: %d x %d\n",
-			   width, height);
+	        int       width, height;
+	        FBAreaPtr fbarea;
+
+	        xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+		           "Memory manager initialized to (%d,%d) (%d,%d)\n",
+		           MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
+	        if ((fbarea = xf86AllocateOffscreenArea(pScreen, pScrn->displayWidth, 2, 0, NULL, NULL, NULL))) {
+		    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+			       "Reserved area from (%d,%d) to (%d,%d)\n",
+			       fbarea->box.x1, fbarea->box.y1,
+			       fbarea->box.x2, fbarea->box.y2);
+	        } else {
+		    xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "Unable to reserve area\n");
+	        }
+	        if (xf86QueryLargestOffscreenArea(pScreen, &width, &height, 0, 0, 0)) {
+		    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+			       "Largest offscreen area available: %d x %d\n",
+		    	       width, height);
+	        }
+		
+		R128VerboseInitAccel(noAccel, pScreen);
 	    }
 	}
-    }
-
-				/* Acceleration setup */
-    if (!noAccel) {
-	if (R128AccelInit(pScreen)) {
-	    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Acceleration enabled\n");
-	    info->accelOn = TRUE;
-	} else {
-	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
-		       "Acceleration initialization failed\n");
-	    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Acceleration disabled\n");
-	    info->accelOn = FALSE;
+#ifdef USE_EXA
+	else {
+	    xf86DrvMsg(pScrn->scrnIndex, X_INFO, 
+		       "Filling in EXA memory info\n");
+	    
+	    R128VerboseInitAccel(noAccel, pScreen);
+	    info->ExaDriver->offScreenBase = pScrn->virtualY * width_bytes;
+	    
+	    xf86DrvMsg(pScrn->scrnIndex, X_INFO, 
+		       "Filled in offs\n");
+	    
+	    info->ExaDriver->memorySize = info->FbMapSize;
+	    R128VerboseInitEXA(pScreen);
 	}
-    } else {
-	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Acceleration disabled\n");
-	info->accelOn = FALSE;
+#endif
     }
 
 				/* DGA setup */
@@ -2896,16 +3039,6 @@ static void R128RestorePLL2Registers(ScrnInfoPtr pScrn, R128SavePtr restore)
 					| R128_P2PLL_ATOMIC_UPDATE_EN
 					| R128_P2PLL_VGA_ATOMIC_UPDATE_EN));
 
-    R128TRACE(("Wrote: 0x%08x 0x%08x 0x%08x (0x%08x)\n",
-	       restore->p2pll_ref_div,
-	       restore->p2pll_div_0,
-	       restore->htotal_cntl2,
-	       INPLL(pScrn, RADEON_P2PLL_CNTL)));
-    R128TRACE(("Wrote: rd=%d, fd=%d, pd=%d\n",
-	       restore->p2pll_ref_div & RADEON_P2PLL_REF_DIV_MASK,
-	       restore->p2pll_div_0 & RADEON_P2PLL_FB3_DIV_MASK,
-	       (restore->p2pll_div_0 & RADEON_P2PLL_POST3_DIV_MASK) >>16));
-
     usleep(5000); /* Let the clock to lock */
 
     OUTPLLP(pScrn, R128_V2CLK_VCLKTV_CNTL,
@@ -3792,12 +3925,6 @@ static void R128InitPLL2Registers(R128SavePtr save, R128PLLPtr pll,
 				     pll->reference_freq);
     save->post_div_2       = post_div->divider;
 
-    R128TRACE(("dc=%d, of=%d, fd=%d, pd=%d\n",
-	       save->dot_clock_freq_2,
-	       save->pll_output_freq_2,
-	       save->feedback_div_2,
-	       save->post_div_2));
-
     save->p2pll_ref_div   = pll->reference_div;
     save->p2pll_div_0    = (save->feedback_div_2 | (post_div->bitvalue<<16));
     save->htotal_cntl2    = 0;
@@ -4265,6 +4392,10 @@ void R128LeaveVT(VT_FUNC_ARGS_DECL)
 	DRILock(pScrn->pScreen, 0);
 	R128CCE_STOP(pScrn, info);
     }
+#ifdef USE_EXA
+    if (info->useEXA)
+        info->state_2d.composite_setup = FALSE;
+#endif
 #endif
     R128SavePalette(pScrn, save);
     info->PaletteSavedOnVT = TRUE;
@@ -4298,8 +4429,18 @@ static Bool R128CloseScreen(CLOSE_SCREEN_ARGS_DECL)
 	R128UnmapMem(pScrn);
     }
 
-    if (info->accel)             XAADestroyInfoRec(info->accel);
-    info->accel                  = NULL;
+    if (info->accel) {
+#ifdef USE_EXA
+        if (info->useEXA) {
+	    exaDriverFini(pScreen);
+	    free(info->ExaDriver);
+	} else
+#endif
+	{
+            XAADestroyInfoRec(info->accel);
+	}
+        info->accel = NULL;
+    }
 
     if (info->scratch_save)      free(info->scratch_save);
     info->scratch_save           = NULL;
diff --git a/src/r128_exa.c b/src/r128_exa.c
new file mode 100644
index 0000000..c1919b1
--- /dev/null
+++ b/src/r128_exa.c
@@ -0,0 +1,496 @@
+/*
+ * Copyright 2006 Joseph Garvin
+ * Copyright 2012 Connor Behan
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Joseph Garvin <joseph.h.garvin at gmail.com>
+ *    Connor Behan <connor.behan at gmail.com>
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "r128.h"
+#include "exa.h"
+
+#include "r128_reg.h"
+
+#include "xf86.h"
+
+extern Bool R128DMA(R128InfoPtr info, unsigned char *src, unsigned char *dst, 
+		int srcPitch, int dstPitch, int h, int w);
+
+static struct {
+    int rop;
+    int pattern;
+} R128_ROP[] = {
+    { R128_ROP3_ZERO, R128_ROP3_ZERO }, /* GXclear        */
+    { R128_ROP3_DSa,  R128_ROP3_DPa  }, /* Gxand          */
+    { R128_ROP3_SDna, R128_ROP3_PDna }, /* GXandReverse   */
+    { R128_ROP3_S,    R128_ROP3_P    }, /* GXcopy         */
+    { R128_ROP3_DSna, R128_ROP3_DPna }, /* GXandInverted  */
+    { R128_ROP3_D,    R128_ROP3_D    }, /* GXnoop         */
+    { R128_ROP3_DSx,  R128_ROP3_DPx  }, /* GXxor          */
+    { R128_ROP3_DSo,  R128_ROP3_DPo  }, /* GXor           */
+    { R128_ROP3_DSon, R128_ROP3_DPon }, /* GXnor          */
+    { R128_ROP3_DSxn, R128_ROP3_PDxn }, /* GXequiv        */
+    { R128_ROP3_Dn,   R128_ROP3_Dn   }, /* GXinvert       */
+    { R128_ROP3_SDno, R128_ROP3_PDno }, /* GXorReverse    */
+    { R128_ROP3_Sn,   R128_ROP3_Pn   }, /* GXcopyInverted */
+    { R128_ROP3_DSno, R128_ROP3_DPno }, /* GXorInverted   */
+    { R128_ROP3_DSan, R128_ROP3_DPan }, /* GXnand         */
+    { R128_ROP3_ONE,  R128_ROP3_ONE  }  /* GXset          */
+};
+
+/* Assumes that depth 15 and 16 can be used as depth 16, which is okay since we
+ * require src and dest datatypes to be equal.
+ */
+Bool R128GetDatatypeBpp(int bpp, uint32_t *type)
+{
+    switch (bpp) {
+    case 8:
+        *type = R128_DATATYPE_CI8;
+        return TRUE;
+    case 16:
+        *type = R128_DATATYPE_RGB565;
+        return TRUE;
+    case 24:
+        *type = R128_DATATYPE_RGB888;
+        return TRUE;
+    case 32:
+        *type = R128_DATATYPE_ARGB8888;
+        return TRUE;
+    default:
+        return FALSE;
+    }
+}
+
+static Bool R128GetOffsetPitch(PixmapPtr pPix, int bpp, uint32_t *pitch_offset,
+				 unsigned int offset, unsigned int pitch)
+{
+    ScreenPtr     pScreen   = pPix->drawable.pScreen;
+    ScrnInfoPtr   pScrn     = xf86ScreenToScrn(pScreen);
+    R128InfoPtr   info      = R128PTR(pScrn);
+
+    if (pitch > 16320 || pitch % info->ExaDriver->pixmapPitchAlign != 0) {
+        R128TRACE(("Bad pitch 0x%08x\n", pitch));
+	return FALSE;
+    }
+
+    if (offset % info->ExaDriver->pixmapOffsetAlign != 0) {
+        R128TRACE(("Bad offset 0x%08x\n", offset));
+	return FALSE;
+    }
+
+    *pitch_offset = ((pitch / bpp) << 21) | (offset >> 5);
+
+    return TRUE;
+}
+
+Bool R128GetPixmapOffsetPitch(PixmapPtr pPix, uint32_t *pitch_offset)
+{
+    uint32_t pitch, offset;
+    int bpp;
+
+    bpp = pPix->drawable.bitsPerPixel;
+    if (bpp == 24)
+        bpp = 8;
+
+    offset = exaGetPixmapOffset(pPix);
+    pitch = exaGetPixmapPitch(pPix);
+
+    return R128GetOffsetPitch(pPix, bpp, pitch_offset, offset, pitch);
+}
+
+static void Emit2DState(ScrnInfoPtr pScrn)
+{
+    R128InfoPtr   info      = R128PTR(pScrn);
+    int has_src		    = info->state_2d.src_pitch_offset;
+    unsigned char *R128MMIO = info->MMIO;
+    
+    R128WaitForFifo(pScrn, (has_src ? 10 : 9));
+    
+    OUTREG(R128_DEFAULT_SC_BOTTOM_RIGHT, info->state_2d.default_sc_bottom_right);
+    OUTREG(R128_DP_GUI_MASTER_CNTL, info->state_2d.dp_gui_master_cntl);
+    OUTREG(R128_DP_BRUSH_FRGD_CLR, info->state_2d.dp_brush_frgd_clr);
+    OUTREG(R128_DP_BRUSH_BKGD_CLR, info->state_2d.dp_brush_bkgd_clr);
+    OUTREG(R128_DP_SRC_FRGD_CLR,   info->state_2d.dp_src_frgd_clr);
+    OUTREG(R128_DP_SRC_BKGD_CLR,   info->state_2d.dp_src_bkgd_clr);
+    OUTREG(R128_DP_WRITE_MASK, info->state_2d.dp_write_mask);
+    OUTREG(R128_DP_CNTL, info->state_2d.dp_cntl);
+
+    OUTREG(R128_DST_PITCH_OFFSET, info->state_2d.dst_pitch_offset);
+    if (has_src) OUTREG(R128_SRC_PITCH_OFFSET, info->state_2d.src_pitch_offset);
+}
+
+static void EmitCCE2DState(ScrnInfoPtr pScrn)
+{
+    R128InfoPtr   info      = R128PTR(pScrn);
+    int has_src		    = info->state_2d.src_pitch_offset;
+    RING_LOCALS;
+    
+    R128CCE_REFRESH( pScrn, info );
+    
+    BEGIN_RING( (has_src ? 20 : 18) );
+    
+    OUT_RING_REG( R128_DEFAULT_SC_BOTTOM_RIGHT, info->state_2d.default_sc_bottom_right );
+    OUT_RING_REG( R128_DP_GUI_MASTER_CNTL, info->state_2d.dp_gui_master_cntl );
+    OUT_RING_REG( R128_DP_BRUSH_FRGD_CLR, info->state_2d.dp_brush_frgd_clr );
+    OUT_RING_REG( R128_DP_BRUSH_BKGD_CLR, info->state_2d.dp_brush_bkgd_clr );
+    OUT_RING_REG( R128_DP_SRC_FRGD_CLR,   info->state_2d.dp_src_frgd_clr );
+    OUT_RING_REG( R128_DP_SRC_BKGD_CLR,   info->state_2d.dp_src_bkgd_clr );
+    OUT_RING_REG( R128_DP_WRITE_MASK, info->state_2d.dp_write_mask );
+    OUT_RING_REG( R128_DP_CNTL, info->state_2d.dp_cntl );
+
+    OUT_RING_REG( R128_DST_PITCH_OFFSET, info->state_2d.dst_pitch_offset );
+    if (has_src) OUT_RING_REG( R128_SRC_PITCH_OFFSET, info->state_2d.src_pitch_offset );
+    
+    ADVANCE_RING();
+}
+
+/* EXA Callbacks */
+
+static Bool
+R128PrepareSolid(PixmapPtr pPixmap, int alu, Pixel planemask, Pixel fg)
+{
+    ScreenPtr     pScreen   = pPixmap->drawable.pScreen;
+    ScrnInfoPtr   pScrn     = xf86ScreenToScrn(pScreen);
+    R128InfoPtr   info      = R128PTR(pScrn);
+    
+    int bpp = pPixmap->drawable.bitsPerPixel;
+    uint32_t datatype, dst_pitch_offset;
+    
+    if (!R128GetDatatypeBpp(bpp, &datatype)) {
+        R128TRACE(("R128GetDatatypeBpp failed\n"));
+	return FALSE;
+    }
+    if (!R128GetPixmapOffsetPitch(pPixmap, &dst_pitch_offset)) {
+        R128TRACE(("R128GetPixmapOffsetPitch failed\n"));
+	return FALSE;
+    }  
+    if (info->state_2d.in_use) return FALSE;
+    
+    info->state_2d.in_use = TRUE;
+    info->state_2d.default_sc_bottom_right = (R128_DEFAULT_SC_RIGHT_MAX | R128_DEFAULT_SC_BOTTOM_MAX);
+    info->state_2d.dp_brush_bkgd_clr = 0x00000000;
+    info->state_2d.dp_src_frgd_clr = 0xffffffff;
+    info->state_2d.dp_src_bkgd_clr = 0x00000000;
+    info->state_2d.dp_gui_master_cntl = (R128_GMC_DST_PITCH_OFFSET_CNTL |
+					  R128_GMC_BRUSH_SOLID_COLOR |
+					  (datatype >> 8) |
+					  R128_GMC_SRC_DATATYPE_COLOR |
+					  R128_ROP[alu].pattern |
+					  R128_GMC_CLR_CMP_CNTL_DIS);
+    info->state_2d.dp_brush_frgd_clr = fg;
+    info->state_2d.dp_cntl = (R128_DST_X_LEFT_TO_RIGHT | R128_DST_Y_TOP_TO_BOTTOM);
+    info->state_2d.dp_write_mask = planemask;
+    info->state_2d.dst_pitch_offset = dst_pitch_offset;
+    info->state_2d.src_pitch_offset = 0;
+    
+#ifdef R128DRI
+    if (info->directRenderingEnabled) {
+        EmitCCE2DState(pScrn);
+    } else
+#endif
+    {
+        Emit2DState(pScrn);
+    }
+    return TRUE;
+}
+
+static void
+R128Solid(PixmapPtr pPixmap, int x1, int y1, int x2, int y2)
+{
+    ScreenPtr     pScreen   = pPixmap->drawable.pScreen;
+    ScrnInfoPtr   pScrn     = xf86ScreenToScrn(pScreen);
+    R128InfoPtr   info      = R128PTR(pScrn);
+    unsigned char *R128MMIO = info->MMIO;
+
+    R128WaitForFifo(pScrn, 2);
+    OUTREG(R128_DST_Y_X,          (y1 << 16) | x1);
+    OUTREG(R128_DST_WIDTH_HEIGHT, ((x2-x1) << 16) | (y2-y1));
+}
+
+#define R128DoneSolid R128Done
+
+void
+R128DoPrepareCopy(ScrnInfoPtr pScrn, uint32_t src_pitch_offset,
+			uint32_t dst_pitch_offset, uint32_t datatype, int alu, Pixel planemask)
+{
+    R128InfoPtr   info      = R128PTR(pScrn);
+    
+    info->state_2d.in_use = TRUE;
+    info->state_2d.dp_gui_master_cntl = (R128_GMC_DST_PITCH_OFFSET_CNTL |
+					  R128_GMC_SRC_PITCH_OFFSET_CNTL |
+					  R128_GMC_BRUSH_NONE |
+					  (datatype >> 8) |
+					  R128_GMC_SRC_DATATYPE_COLOR |
+					  R128_ROP[alu].rop |
+					  R128_DP_SRC_SOURCE_MEMORY |
+					  R128_GMC_CLR_CMP_CNTL_DIS);
+    info->state_2d.dp_cntl = ((info->xdir >= 0 ? R128_DST_X_LEFT_TO_RIGHT : 0) |
+			       (info->ydir >= 0 ? R128_DST_Y_TOP_TO_BOTTOM : 0));
+    info->state_2d.dp_brush_frgd_clr = 0xffffffff;
+    info->state_2d.dp_brush_bkgd_clr = 0x00000000;
+    info->state_2d.dp_src_frgd_clr = 0xffffffff;
+    info->state_2d.dp_src_bkgd_clr = 0x00000000;
+    info->state_2d.dp_write_mask = planemask;
+    info->state_2d.dst_pitch_offset = dst_pitch_offset;
+    info->state_2d.src_pitch_offset = src_pitch_offset;
+    info->state_2d.default_sc_bottom_right = (R128_DEFAULT_SC_RIGHT_MAX | R128_DEFAULT_SC_BOTTOM_MAX);
+    
+#ifdef R128DRI
+    if (info->directRenderingEnabled) {
+        EmitCCE2DState(pScrn);
+    } else
+#endif
+    {
+        Emit2DState(pScrn);
+    }
+}
+
+static Bool
+R128PrepareCopy(PixmapPtr pSrcPixmap, PixmapPtr pDstPixmap, int xdir, int ydir, int alu, Pixel planemask)
+{
+    ScreenPtr     pScreen   = pSrcPixmap->drawable.pScreen;
+    ScrnInfoPtr   pScrn     = xf86ScreenToScrn(pScreen);
+    R128InfoPtr   info      = R128PTR(pScrn);
+    
+    int bpp = pDstPixmap->drawable.bitsPerPixel;
+    uint32_t datatype, src_pitch_offset, dst_pitch_offset;
+    
+    if (!R128GetDatatypeBpp(bpp, &datatype)) {
+        R128TRACE(("R128GetDatatypeBpp failed\n"));
+	return FALSE;
+    }
+    if (!R128GetPixmapOffsetPitch(pSrcPixmap, &src_pitch_offset)) {
+        R128TRACE(("R128GetPixmapOffsetPitch source failed\n"));
+	return FALSE;
+    }  
+    if (!R128GetPixmapOffsetPitch(pDstPixmap, &dst_pitch_offset)) {
+        R128TRACE(("R128GetPixmapOffsetPitch dest failed\n"));
+	return FALSE;
+    }  
+    if (info->state_2d.in_use) return FALSE;
+
+    info->xdir = xdir;
+    info->ydir = ydir;
+    
+    R128DoPrepareCopy(pScrn, src_pitch_offset, dst_pitch_offset, datatype, alu, planemask);
+    
+    return TRUE;
+}
+
+static void
+R128Copy(PixmapPtr pDstPixmap, int srcX, int srcY, int dstX, int dstY, int width, int height)
+{
+    ScreenPtr     pScreen   = pDstPixmap->drawable.pScreen;
+    ScrnInfoPtr   pScrn     = xf86ScreenToScrn(pScreen);
+    R128InfoPtr   info      = R128PTR(pScrn);
+    unsigned char *R128MMIO = info->MMIO;
+
+    if (info->xdir < 0) srcX += width - 1, dstX += width - 1;
+    if (info->ydir < 0) srcY += height - 1, dstY += height - 1;
+
+    R128WaitForFifo(pScrn, 3);
+    OUTREG(R128_SRC_Y_X,          (srcY << 16) | srcX);
+    OUTREG(R128_DST_Y_X,          (dstY << 16) | dstX);
+    OUTREG(R128_DST_HEIGHT_WIDTH, (height << 16) | width);
+}
+
+#define R128DoneCopy R128Done
+
+static void
+R128Sync(ScreenPtr pScreen, int marker)
+{
+    R128WaitForIdle(xf86ScreenToScrn(pScreen));
+}
+
+static void
+R128Done(PixmapPtr pPixmap)
+{
+    ScreenPtr     pScreen   = pPixmap->drawable.pScreen;
+    ScrnInfoPtr   pScrn     = xf86ScreenToScrn(pScreen);
+    R128InfoPtr   info      = R128PTR(pScrn);
+    
+    info->state_2d.in_use = FALSE;
+#ifdef R128DRI
+#ifdef RENDER
+    if (info->state_2d.src_pix) {
+        pScreen->DestroyPixmap(info->state_2d.src_pix);
+	info->state_2d.src_pix = NULL;
+    }
+    if (info->state_2d.msk_pix) {
+        pScreen->DestroyPixmap(info->state_2d.msk_pix);
+	info->state_2d.msk_pix = NULL;
+    }
+#endif
+#endif
+}
+
+#ifdef R128DRI
+
+#define R128CCEPrepareSolid R128PrepareSolid
+
+static void
+R128CCESolid(PixmapPtr pPixmap, int x1, int y1, int x2, int y2)
+{
+    ScreenPtr     pScreen   = pPixmap->drawable.pScreen;
+    ScrnInfoPtr   pScrn     = xf86ScreenToScrn(pScreen);
+    R128InfoPtr   info      = R128PTR(pScrn);
+    RING_LOCALS;
+
+    R128CCE_REFRESH( pScrn, info );
+
+    BEGIN_RING( 4 );
+
+    OUT_RING_REG( R128_DST_Y_X,          (y1 << 16) | x1 );
+    OUT_RING_REG( R128_DST_WIDTH_HEIGHT, ((x2-x1) << 16) | (y2-y1) );
+
+    ADVANCE_RING();
+}
+
+#define R128CCEDoneSolid R128Done
+
+#define R128CCEPrepareCopy R128PrepareCopy
+
+static void
+R128CCECopy(PixmapPtr pDstPixmap, int srcX, int srcY, int dstX, int dstY, 
+	 int width, int height)
+{
+    ScreenPtr     pScreen   = pDstPixmap->drawable.pScreen;
+    ScrnInfoPtr   pScrn     = xf86ScreenToScrn(pScreen);
+    R128InfoPtr   info      = R128PTR(pScrn);
+    RING_LOCALS;
+   
+    R128CCE_REFRESH( pScrn, info );
+
+    if (info->xdir < 0) srcX += width - 1, dstX += width - 1;
+    if (info->ydir < 0) srcY += height - 1, dstY += height - 1;
+
+    BEGIN_RING( 6 );
+
+    OUT_RING_REG( R128_SRC_Y_X,          (srcY << 16) | srcX );
+    OUT_RING_REG( R128_DST_Y_X,          (dstY << 16) | dstX );
+    OUT_RING_REG( R128_DST_HEIGHT_WIDTH, (height << 16) | width );
+
+    ADVANCE_RING();
+}
+
+#define R128CCEDoneCopy R128Done
+
+static void
+R128CCESync(ScreenPtr pScreen, int marker)
+{
+    R128CCEWaitForIdle(xf86ScreenToScrn(pScreen));
+}
+
+#ifdef RENDER
+#include "r128_exa_render.c"
+#endif
+
+#endif
+
+Bool
+R128EXAInit(ScreenPtr pScreen)
+{
+    ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
+    R128InfoPtr info  = R128PTR(pScrn);
+
+    info->ExaDriver->exa_major = EXA_VERSION_MAJOR;
+    info->ExaDriver->exa_minor = EXA_VERSION_MINOR;
+    
+    info->ExaDriver->memoryBase = info->FB + pScrn->fbOffset;
+    info->ExaDriver->flags = EXA_OFFSCREEN_PIXMAPS | EXA_OFFSCREEN_ALIGN_POT;
+
+#if EXA_VERSION_MAJOR > 2 || (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 3)
+    info->ExaDriver->maxPitchBytes = 16320;
+#endif
+    /* Pitch alignment is in sets of 8 pixels, and we need to cover 32bpp, so it's 32 bytes */
+    info->ExaDriver->pixmapPitchAlign = 32;
+    info->ExaDriver->pixmapOffsetAlign = 32;
+    info->ExaDriver->maxX = 2048;
+    info->ExaDriver->maxY = 2048;
+
+    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+	       "Setting up EXA callbacks\n");
+
+#ifdef R128DRI
+    if (info->directRenderingEnabled) {
+	info->ExaDriver->PrepareSolid = R128CCEPrepareSolid;
+	info->ExaDriver->Solid = R128CCESolid;
+	info->ExaDriver->DoneSolid = R128CCEDoneSolid;
+
+	info->ExaDriver->PrepareCopy = R128CCEPrepareCopy;
+	info->ExaDriver->Copy = R128CCECopy;
+	info->ExaDriver->DoneCopy = R128CCEDoneCopy;
+
+#ifdef RENDER
+	if (info->RenderAccel) {
+	    info->ExaDriver->CheckComposite = R128CCECheckComposite;
+	    info->ExaDriver->PrepareComposite = R128CCEPrepareComposite;
+	    info->ExaDriver->Composite = R128CCEComposite;
+	    info->ExaDriver->DoneComposite = R128CCEDoneComposite;
+	}
+#endif
+
+	info->ExaDriver->WaitMarker = R128CCESync;
+    } else
+#endif
+    {
+	info->ExaDriver->PrepareSolid = R128PrepareSolid;
+	info->ExaDriver->Solid = R128Solid;
+	info->ExaDriver->DoneSolid = R128DoneSolid;
+    
+	info->ExaDriver->PrepareCopy = R128PrepareCopy;
+	info->ExaDriver->Copy = R128Copy;
+	info->ExaDriver->DoneCopy = R128DoneCopy;
+	
+	/* The registers used for r128 compositing are CCE specific, just like the
+	 * registers used for radeon compositing are CP specific. The radeon driver
+	 * falls back to different registers when there is no DRI. The equivalent
+	 * registers on the r128 (if they even exist) are not listed in the register
+	 * file so I can't implement compositing without DRI.
+	 */
+	
+	info->ExaDriver->WaitMarker = R128Sync;
+    }
+    
+    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+	       "Initalizing 2D acceleration engine...\n");
+
+    R128EngineInit(pScrn);
+
+    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+	       "Initializing EXA driver...\n");
+
+    if (!exaDriverInit(pScreen, info->ExaDriver)) {
+        free(info->ExaDriver);
+	return FALSE;
+    }
+    
+    info->state_2d.composite_setup = FALSE;
+    return TRUE;
+}
diff --git a/src/r128_exa_render.c b/src/r128_exa_render.c
new file mode 100644
index 0000000..5cf5c7a
--- /dev/null
+++ b/src/r128_exa_render.c
@@ -0,0 +1,697 @@
+/*
+ * Copyright 2003 Eric Anholt
+ * Copyright 2003 Anders Carlsson
+ * Copyright 2012 Connor Behan
+ * Copyright 2012 Michel Dänzer
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Anders Carlsson <andersca at gnome.org>
+ *    Eric Anholt <anholt at FreeBSD.org>
+ *    Connor Behan <connor.behan at gmail.com>
+ *    Michel Dänzer <michel.daenzer at amd.com>
+ *
+ */
+
+/* The following is based on the kdrive ATI driver. */
+
+#include <stdio.h>
+#include <string.h>
+
+static struct {
+    Bool dst_alpha;
+    Bool src_alpha;
+    CARD32 sblend;
+    CARD32 dblend;
+} R128BlendOp[] = {
+    /* Clear */
+    {0, 0, R128_ALPHA_BLEND_ZERO        , R128_ALPHA_BLEND_ZERO},
+    /* Src */
+    {0, 0, R128_ALPHA_BLEND_ONE         , R128_ALPHA_BLEND_ZERO},
+    /* Dst */
+    {0, 0, R128_ALPHA_BLEND_ZERO        , R128_ALPHA_BLEND_ONE},
+    /* Over */
+    {0, 1, R128_ALPHA_BLEND_ONE         , R128_ALPHA_BLEND_INVSRCALPHA},
+    /* OverReverse */
+    {1, 0, R128_ALPHA_BLEND_INVDSTALPHA , R128_ALPHA_BLEND_ONE},
+    /* In */
+    {1, 0, R128_ALPHA_BLEND_DSTALPHA    , R128_ALPHA_BLEND_ZERO},
+    /* InReverse */
+    {0, 1, R128_ALPHA_BLEND_ZERO        , R128_ALPHA_BLEND_SRCALPHA},
+    /* Out */
+    {1, 0, R128_ALPHA_BLEND_INVDSTALPHA , R128_ALPHA_BLEND_ZERO},
+    /* OutReverse */
+    {0, 1, R128_ALPHA_BLEND_ZERO        , R128_ALPHA_BLEND_INVSRCALPHA},
+    /* Atop */
+    {1, 1, R128_ALPHA_BLEND_DSTALPHA    , R128_ALPHA_BLEND_INVSRCALPHA},
+    /* AtopReverse */
+    {1, 1, R128_ALPHA_BLEND_INVDSTALPHA , R128_ALPHA_BLEND_SRCALPHA},
+    /* Xor */
+    {1, 1, R128_ALPHA_BLEND_INVDSTALPHA , R128_ALPHA_BLEND_INVSRCALPHA},
+    /* Add */
+    {0, 0, R128_ALPHA_BLEND_ONE         , R128_ALPHA_BLEND_ONE},
+};
+
+static Bool
+R128TransformAffineOrScaled(PictTransformPtr t)
+{
+    if (t == NULL) return TRUE;
+    
+    /* the shaders don't handle scaling either */
+    return t->matrix[2][0] == 0 && t->matrix[2][1] == 0 && t->matrix[2][2] == IntToxFixed(1);
+}
+
+static PixmapPtr
+R128GetDrawablePixmap(DrawablePtr pDrawable)
+{
+    if (pDrawable->type == DRAWABLE_WINDOW)
+	return pDrawable->pScreen->GetWindowPixmap((WindowPtr)pDrawable);
+    else
+	return (PixmapPtr)pDrawable;
+}
+
+static PixmapPtr
+R128SolidPixmap(ScreenPtr pScreen, uint32_t solid)
+{
+    ScrnInfoPtr   pScrn     = xf86ScreenToScrn(pScreen);
+    R128InfoPtr   info      = R128PTR(pScrn);
+    PixmapPtr	  pPix	    = pScreen->CreatePixmap(pScreen, 1, 1, 32, 0);
+   
+    exaMoveInPixmap(pPix);
+    if (!exaDrawableIsOffscreen(&pPix->drawable)) {
+        pScreen->DestroyPixmap(pPix);
+	return NULL;
+    }
+    info->ExaDriver->WaitMarker(pScreen, 0);
+    memcpy(info->ExaDriver->memoryBase + exaGetPixmapOffset(pPix), &solid, 4);
+
+    return pPix;
+}
+
+static Bool
+R128GetDatatypePict1(uint32_t format, uint32_t *type)
+{
+    switch(format) {
+    case PICT_r5g6b5:
+	*type = R128_DATATYPE_RGB565;
+	return TRUE;
+    case PICT_x1r5g5b5:
+	*type = R128_DATATYPE_ARGB1555;
+	return TRUE;
+    case PICT_x8r8g8b8:
+	*type = R128_DATATYPE_ARGB8888;
+	return TRUE;
+    default:
+        return FALSE;
+    }
+}
+
+static Bool
+R128GetDatatypePict2(uint32_t format, uint32_t *type)
+{
+    switch(format) {
+    case PICT_a8:
+        *type = R128_DATATYPE_RGB8;
+	return TRUE;
+    case PICT_r5g6b5:
+	*type = R128_DATATYPE_RGB565;
+	return TRUE;
+    case PICT_a8r8g8b8:
+	*type = R128_DATATYPE_ARGB8888;
+	return TRUE;
+    default:
+        return FALSE;
+    }
+}
+
+static Bool
+R128CheckCompositeTexture(PicturePtr pPict, PicturePtr pDstPict, int op)
+{
+    ScreenPtr     pScreen   = pDstPict->pDrawable->pScreen;
+    ScrnInfoPtr   pScrn     = xf86ScreenToScrn(pScreen);
+    R128InfoPtr   info      = R128PTR(pScrn);
+    
+    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
+    uint32_t tmp1;
+   
+    if (!R128GetDatatypePict2(pPict->format, &tmp1)) return FALSE;
+    
+    if (pPict->pDrawable) {
+        int w = pPict->pDrawable->width;
+        int h = pPict->pDrawable->height;
+	
+        if (pPict->repeat && ((w & (w - 1)) != 0 || (h & (h - 1)) != 0)) {
+            R128TRACE(("NPOT repeat unsupported (%dx%d)\n", w, h));
+	    return FALSE;
+        }
+    }
+    
+    if (pPict->filter != PictFilterNearest && pPict->filter != PictFilterBilinear) {
+	R128TRACE(("Unsupported filter 0x%x\n", pPict->filter));
+	return FALSE;
+    }
+    
+    /* The radeon driver has a long explanation about this part that I don't really understand */
+    if (pPict->transform != 0 && repeatType == RepeatNone && PICT_FORMAT_A(pPict->format) == 0) {
+	if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0))) {
+	    R128TRACE(("REPEAT_NONE unsupported for transformed xRGB source\n"));
+	    return FALSE;
+	}
+    }
+    if (!R128TransformAffineOrScaled(pPict->transform)) {
+	R128TRACE(("Non-affine transforms not supported\n"));
+	return FALSE;
+    }
+    
+    return TRUE;
+}
+
+static Bool
+R128CCECheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture, PicturePtr pDstPicture)
+{
+    ScreenPtr     pScreen   = pDstPicture->pDrawable->pScreen;
+    ScrnInfoPtr   pScrn     = xf86ScreenToScrn(pScreen);
+    R128InfoPtr   info      = R128PTR(pScrn);
+    
+    PixmapPtr pSrcPixmap, pDstPixmap;
+    uint32_t tmp1;
+   
+    /* Check for unsupported compositing operations. */
+    if (op >= sizeof(R128BlendOp) / sizeof(R128BlendOp[0])) {
+	R128TRACE(("Unsupported Composite op 0x%x\n", op));
+	return FALSE;
+    }
+    
+    pDstPixmap = R128GetDrawablePixmap(pDstPicture->pDrawable);
+    if (pDstPixmap->drawable.width > 1024 || pDstPixmap->drawable.height > 1024) {
+	R128TRACE(("Dest w/h too large (%d,%d).\n", pDstPixmap->drawable.width, pDstPixmap->drawable.height));
+	return FALSE;
+    }
+    
+    if (pSrcPicture->pDrawable) {
+        pSrcPixmap = R128GetDrawablePixmap(pSrcPicture->pDrawable);
+        if (pSrcPixmap->drawable.width > 1024 || pSrcPixmap->drawable.height > 1024) {
+	    R128TRACE(("Source w/h too large (%d,%d).\n", pSrcPixmap->drawable.width, pSrcPixmap->drawable.height));
+	    return FALSE;
+        }
+    } else if (pSrcPicture->pSourcePict->type != SourcePictTypeSolidFill) {
+        R128TRACE(("Gradient pictures not supported yet\n"));
+	return FALSE;
+    }
+    
+    if (pDstPicture->format == PICT_a8) {
+        if (R128BlendOp[op].src_alpha || R128BlendOp[op].dst_alpha || pMaskPicture != NULL) {
+	    R128TRACE(("Alpha blending unsupported with A8 dst?\n"));
+	    return FALSE;
+	}
+    } else {
+        if (!R128GetDatatypePict1(pDstPicture->format, &tmp1)) return FALSE;
+    }
+    
+    if (pMaskPicture) {
+        PixmapPtr pMaskPixmap;
+	
+        if (pMaskPicture->pDrawable) {
+	    pMaskPixmap = R128GetDrawablePixmap(pMaskPicture->pDrawable);
+            if (pMaskPixmap->drawable.width > 1024 || pMaskPixmap->drawable.height > 1024) {
+	        R128TRACE(("Mask w/h too large (%d,%d).\n", pMaskPixmap->drawable.width, pMaskPixmap->drawable.height));
+	        return FALSE;
+            }
+	} else if (pMaskPicture->pSourcePict->type != SourcePictTypeSolidFill) {
+	    R128TRACE(("Gradient pictures not supported yet\n"));
+	    return FALSE;
+	}
+	
+	if (pMaskPicture->componentAlpha && R128BlendOp[op].src_alpha) {
+	    R128TRACE(("Component alpha not supported with source alpha blending\n"));
+	    return FALSE;
+	}
+	
+	if (!R128CheckCompositeTexture(pMaskPicture, pDstPicture, op)) return FALSE;
+    }
+    
+    if (!R128CheckCompositeTexture(pSrcPicture, pDstPicture, op)) return FALSE; 
+    return TRUE;
+}
+
+static Bool
+R128TextureSetup(PicturePtr pPict, PixmapPtr pPix, int unit, uint32_t *txsize, uint32_t *tex_cntl_c, Bool trying_solid)
+{
+    ScreenPtr     pScreen   = pPix->drawable.pScreen;
+    ScrnInfoPtr   pScrn     = xf86ScreenToScrn(pScreen);
+    R128InfoPtr   info      = R128PTR(pScrn);
+    
+    int w, h, bytepp, shift, l2w, l2h, l2p, pitch;
+    
+    if (pPict->pDrawable) {
+	w = pPict->pDrawable->width;
+	h = pPict->pDrawable->height;
+    } else {
+	w = h = 1;
+    }
+    
+    pitch = exaGetPixmapPitch(pPix);
+    if ((pitch & (pitch - 1)) != 0) {
+        R128TRACE(("NPOT pitch 0x%x unsupported\n", pitch));
+	return FALSE;
+    }
+    
+    if (!R128GetDatatypePict2(pPict->format, tex_cntl_c)) return FALSE;
+    
+    bytepp = PICT_FORMAT_BPP(pPict->format) / 8;
+    *tex_cntl_c |= R128_MIP_MAP_DISABLE;
+
+    if (pPict->filter == PictFilterBilinear) {
+        *tex_cntl_c |= R128_MIN_BLEND_LINEAR | R128_MAG_BLEND_LINEAR;
+    } else if (pPict->filter == PictFilterNearest) {
+	*tex_cntl_c |= R128_MIN_BLEND_NEAREST | R128_MAG_BLEND_NEAREST;
+    } else {
+	R128TRACE(("Bad filter 0x%x\n", pPict->filter));
+	return FALSE;
+    }
+
+    if (unit == 0) {
+        shift = 0;
+    } else {
+        shift = 16;
+        *tex_cntl_c |= R128_SEC_SELECT_SEC_ST;
+    }
+
+    l2w = R128MinBits(w) - 1;
+    l2h = R128MinBits(h) - 1;
+    l2p = R128MinBits(pitch / bytepp) - 1;
+
+    if (pPict->repeat && w == 1 && h == 1) {
+        l2p = 0;
+    } else if (pPict->repeat && l2p != l2w) {
+        R128TRACE(("Repeat not supported for pitch != width\n"));
+	return FALSE;
+    }
+    
+    l2w = l2p;
+    /* This is required to handle NPOT height */
+    if ((unit == 1) || (unit == 0 && !pPict->repeat && !trying_solid)) l2h++;
+    
+    info->state_2d.widths[unit] = 1 << l2w;
+    info->state_2d.heights[unit] = 1 << l2h;
+    *txsize |= l2p << (R128_TEX_PITCH_SHIFT + shift);
+    *txsize |= ((w > h) ? l2w : l2h) << (R128_TEX_SIZE_SHIFT + shift);
+    *txsize |= l2h << (R128_TEX_HEIGHT_SHIFT + shift);
+
+    if (pPict->transform != 0) {
+        info->state_2d.is_transform[unit] = TRUE;
+        info->state_2d.transform[unit] = pPict->transform;
+    } else {
+        info->state_2d.is_transform[unit] = FALSE;
+    }
+
+    return TRUE;
+}
+
+/* The composite preparation commands that are the same every time can
+ * just be written once.
+ */
+#define COMPOSITE_SETUP()				\
+do {							\
+    BEGIN_RING( 10 );					\
+							\
+    OUT_RING_REG(R128_SCALE_3D_CNTL,			\
+		    R128_SCALE_3D_TEXMAP_SHADE |	\
+		    R128_SCALE_PIX_REPLICATE |		\
+		    R128_TEX_CACHE_SPLIT |		\
+		    R128_TEX_MAP_ALPHA_IN_TEXTURE |	\
+		    R128_TEX_CACHE_LINE_SIZE_4QW);	\
+    OUT_RING_REG(R128_SETUP_CNTL,			\
+		    R128_COLOR_SOLID_COLOR |		\
+		    R128_PRIM_TYPE_TRI |		\
+		    R128_TEXTURE_ST_MULT_W |		\
+		    R128_STARTING_VERTEX_1 |		\
+		    R128_ENDING_VERTEX_3 |		\
+		    R128_SUB_PIX_4BITS);		\
+    OUT_RING_REG(R128_PM4_VC_FPU_SETUP,			\
+		    R128_FRONT_DIR_CCW |		\
+		    R128_BACKFACE_CULL |		\
+		    R128_FRONTFACE_SOLID |		\
+		    R128_FPU_COLOR_SOLID |		\
+		    R128_FPU_SUB_PIX_4BITS |		\
+		    R128_FPU_MODE_3D |			\
+		    R128_TRAP_BITS_DISABLE |		\
+		    R128_XFACTOR_2 |			\
+		    R128_YFACTOR_2 |			\
+		    R128_FLAT_SHADE_VERTEX_OGL |	\
+		    R128_FPU_ROUND_TRUNCATE |		\
+		    R128_WM_SEL_8DW);			\
+    OUT_RING_REG(R128_PLANE_3D_MASK_C, 0xffffffff);	\
+    OUT_RING_REG(R128_CONSTANT_COLOR_C, 0xff000000);	\
+							\
+    ADVANCE_RING();					\
+} while(0)
+
+static Bool
+R128CCEPrepareComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture,
+    PicturePtr pDstPicture, PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
+{
+    ScreenPtr     pScreen   = pDst->drawable.pScreen;
+    ScrnInfoPtr   pScrn     = xf86ScreenToScrn(pScreen);
+    R128InfoPtr   info      = R128PTR(pScrn);
+    unsigned char *R128MMIO = info->MMIO;
+    RING_LOCALS;
+    
+    Bool add_src = FALSE;
+    Bool add_msk = FALSE;
+    uint32_t txsize = 0, prim_tex_cntl_c, sec_tex_cntl_c = 0, dstDatatype;
+    uint32_t src_pitch_offset, dst_pitch_offset, color_factor, in_color_factor, alpha_comb;
+    uint32_t sblend, dblend, blend_cntl, window_offset;
+    int i;
+    
+    if (pDstPicture->format == PICT_a8) {
+        if (R128BlendOp[op].dst_alpha) {
+	    R128TRACE(("Can't dst alpha blend A8\n"));
+	    return FALSE;
+        }
+        dstDatatype = R128_DATATYPE_Y8;
+    } else {
+        if (!R128GetDatatypePict1(pDstPicture->format, &dstDatatype)) return FALSE;
+    }
+    
+    if (!pSrc) {
+	pSrc = R128SolidPixmap(pScreen, cpu_to_le32(pSrcPicture->pSourcePict->solidFill.color));
+	if (!pSrc) {
+	    R128TRACE(("Failed to create solid scratch pixmap\n"));
+	    return FALSE;
+	}
+	add_src = TRUE;
+    }
+    if (pMaskPicture) {
+        info->state_2d.has_mask = TRUE;
+        if (!pMask) {
+	    pMask = R128SolidPixmap(pScreen, cpu_to_le32(pMaskPicture->pSourcePict->solidFill.color));
+	    if (!pMask) {
+	        if (!pSrcPicture->pDrawable)
+		    pScreen->DestroyPixmap(pSrc);
+		info->state_2d.has_mask = FALSE;
+	        R128TRACE(("Failed to create solid scratch pixmap\n"));
+	        return FALSE;
+	    }
+	    add_msk = TRUE;
+	}
+    } else {
+        info->state_2d.has_mask = FALSE;
+    }
+    
+    if (!R128TextureSetup(pSrcPicture, pSrc, 0, &txsize, &prim_tex_cntl_c, (add_src || add_msk))) return FALSE;
+    
+    if (pMask != NULL) {
+	info->state_2d.has_mask = TRUE;
+        if (!R128TextureSetup(pMaskPicture, pMask, 1, &txsize, &sec_tex_cntl_c, (add_src || add_msk))) return FALSE;
+    } else {
+	info->state_2d.has_mask = FALSE;
+	info->state_2d.is_transform[1] = FALSE;
+    }
+    
+    if (!R128GetPixmapOffsetPitch(pDst, &dst_pitch_offset)) return FALSE;
+    if (!R128GetPixmapOffsetPitch(pSrc, &src_pitch_offset)) return FALSE;
+   
+    info->state_2d.in_use = TRUE;
+    if (add_src) info->state_2d.src_pix = pSrc;
+    if (add_msk) info->state_2d.msk_pix = pMask;
+    sblend = R128BlendOp[op].sblend;
+    dblend = R128BlendOp[op].dblend;
+    if (PICT_FORMAT_A(pDstPicture->format) == 0 && R128BlendOp[op].dst_alpha) {
+        if (sblend == R128_ALPHA_BLEND_DSTALPHA)
+	    sblend = R128_ALPHA_BLEND_ONE;
+	else if (sblend == R128_ALPHA_BLEND_INVDSTALPHA)
+	    sblend = R128_ALPHA_BLEND_ZERO;
+    }
+    blend_cntl = (sblend << R128_ALPHA_BLEND_SRC_SHIFT) |
+		 (dblend << R128_ALPHA_BLEND_DST_SHIFT);
+
+    R128CCE_REFRESH( pScrn, info );
+    
+    if (!info->state_2d.composite_setup) {
+        COMPOSITE_SETUP();
+	/* DRI and EXA are fighting over control of the texture hardware.
+	 * That means we need to set up the compositing hardware every time
+	 * while a 3D app is running and once after it closes.
+	 */
+	if (!info->have3DWindows)
+	    info->state_2d.composite_setup = TRUE;
+    }
+    
+    /* We cannot guarantee that this register will stay zero - DRI needs it too. */
+    if (info->have3DWindows)
+        info->ExaDriver->WaitMarker(pScreen, 0);
+    window_offset = INREG(R128_WINDOW_XY_OFFSET);
+    info->state_2d.x_offset = (window_offset & 0xfff00000) >> R128_WINDOW_X_SHIFT;
+    info->state_2d.y_offset = (window_offset & 0x000fffff) >> R128_WINDOW_Y_SHIFT;
+
+    info->state_2d.dp_gui_master_cntl = (R128_GMC_DST_PITCH_OFFSET_CNTL |
+        R128_GMC_BRUSH_SOLID_COLOR |
+        (dstDatatype >> 8) |
+        R128_GMC_SRC_DATATYPE_COLOR |
+        R128_ROP[3].rop |
+        R128_DP_SRC_SOURCE_MEMORY |
+        R128_GMC_3D_FCN_EN |
+        R128_GMC_CLR_CMP_CNTL_DIS |
+        R128_GMC_AUX_CLIP_DIS |
+        R128_GMC_WR_MSK_DIS);
+    info->state_2d.dp_cntl = (R128_DST_X_LEFT_TO_RIGHT | R128_DST_Y_TOP_TO_BOTTOM);
+    info->state_2d.dp_brush_frgd_clr = 0xffffffff;
+    info->state_2d.dp_brush_bkgd_clr = 0x00000000;
+    info->state_2d.dp_src_frgd_clr = 0xffffffff;
+    info->state_2d.dp_src_bkgd_clr = 0x00000000;
+    info->state_2d.dp_write_mask = 0xffffffff;
+    info->state_2d.dst_pitch_offset = dst_pitch_offset;
+    info->state_2d.src_pitch_offset = src_pitch_offset;
+    info->state_2d.default_sc_bottom_right = (R128_DEFAULT_SC_RIGHT_MAX | R128_DEFAULT_SC_BOTTOM_MAX);
+    EmitCCE2DState(pScrn);
+ 
+    BEGIN_RING( 6 );
+    OUT_RING_REG(R128_MISC_3D_STATE_CNTL_REG,
+        R128_MISC_SCALE_3D_TEXMAP_SHADE |
+        R128_MISC_SCALE_PIX_REPLICATE |
+        R128_ALPHA_COMB_ADD_CLAMP |
+        blend_cntl);
+    OUT_RING_REG(R128_TEX_CNTL_C,
+        R128_TEXMAP_ENABLE |
+        ((pMask != NULL) ? R128_SEC_TEXMAP_ENABLE : 0) |
+        R128_ALPHA_ENABLE |
+        R128_TEX_CACHE_FLUSH);
+    OUT_RING_REG(R128_PC_GUI_CTLSTAT, R128_PC_FLUSH_GUI);
+    ADVANCE_RING();
+
+    /* IN operator: Without a mask, only the first texture unit is enabled.
+     * With a mask, we put the source in the first unit and have it pass
+     * through as input to the 2nd.  The 2nd unit takes the incoming source
+     * pixel and modulates it with either the alpha or each of the channels
+     * in the mask, depending on componentAlpha.
+     */
+    BEGIN_RING( 15 );
+    /* R128_PRIM_TEX_CNTL_C,
+     * R128_PRIM_TEXTURE_COMBINE_CNTL_C,
+     * R128_TEX_SIZE_PITCH_C,
+     * R128_PRIM_TEX_0_OFFSET_C - R128_PRIM_TEX_10_OFFSET_C
+     */
+    OUT_RING(CCE_PACKET0(R128_PRIM_TEX_CNTL_C, 13));
+    OUT_RING(prim_tex_cntl_c);
+
+    /* If this is the only stage and the dest is a8, route the alpha result 
+     * to the color (red channel, in particular), too.  Otherwise, be sure
+     * to zero out color channels of an a8 source.
+     */
+    if (pMaskPicture == NULL && pDstPicture->format == PICT_a8)
+        color_factor = R128_COLOR_FACTOR_ALPHA;
+    else if (pSrcPicture->format == PICT_a8)
+        color_factor = R128_COLOR_FACTOR_CONST_COLOR;
+    else
+        color_factor = R128_COLOR_FACTOR_TEX;
+
+    if (PICT_FORMAT_A(pSrcPicture->format) == 0)
+        alpha_comb = R128_COMB_ALPHA_COPY_INP;
+    else
+        alpha_comb = R128_COMB_ALPHA_DIS;
+
+    OUT_RING(R128_COMB_COPY |
+        color_factor |
+        R128_INPUT_FACTOR_INT_COLOR |
+        alpha_comb |
+        R128_ALPHA_FACTOR_TEX_ALPHA |
+        R128_INP_FACTOR_A_CONST_ALPHA);
+    OUT_RING(txsize);
+    /* We could save some output by only writing the offset register that
+     * will actually be used.  On the other hand, this is easy.
+     */
+    for (i = 0; i <= 10; i++)
+        OUT_RING(exaGetPixmapOffset(pSrc));
+    ADVANCE_RING();
+
+    if (pMask != NULL) {
+        BEGIN_RING( 14 );
+	/* R128_SEC_TEX_CNTL_C,
+	 * R128_SEC_TEXTURE_COMBINE_CNTL_C,
+	 * R128_SEC_TEX_0_OFFSET_C - R128_SEC_TEX_10_OFFSET_C
+	 */
+        OUT_RING(CCE_PACKET0(R128_SEC_TEX_CNTL_C, 12));
+        OUT_RING(sec_tex_cntl_c);
+
+        if (pDstPicture->format == PICT_a8) {
+            color_factor = R128_COLOR_FACTOR_ALPHA;
+            in_color_factor = R128_INPUT_FACTOR_PREV_ALPHA;
+        } else if (pMaskPicture->componentAlpha) {
+            color_factor = R128_COLOR_FACTOR_TEX;
+            in_color_factor = R128_INPUT_FACTOR_PREV_COLOR;
+        } else {
+            color_factor = R128_COLOR_FACTOR_ALPHA;
+            in_color_factor = R128_INPUT_FACTOR_PREV_COLOR;
+        }
+
+        OUT_RING(R128_COMB_MODULATE |
+            color_factor |
+            in_color_factor |
+            R128_COMB_ALPHA_MODULATE |
+            R128_ALPHA_FACTOR_TEX_ALPHA |
+            R128_INP_FACTOR_A_PREV_ALPHA);
+        for (i = 0; i <= 10; i++)
+            OUT_RING(exaGetPixmapOffset(pMask));	
+	ADVANCE_RING();
+    }
+
+    return TRUE;
+}
+
+typedef union { float f; CARD32 i; } fi_type;
+
+static inline CARD32
+R128FloatAsInt(float val)
+{
+	fi_type fi;
+
+	fi.f = val;
+	return fi.i;
+}
+
+#define VTX_OUT_MASK(_dstX, _dstY, _srcX, _srcY, _maskX, _maskY)			\
+do {											\
+    OUT_RING(R128FloatAsInt((_dstX)));							\
+    OUT_RING(R128FloatAsInt(((float)(_dstY)) + 0.125));					\
+    OUT_RING(R128FloatAsInt(0.0));							\
+    OUT_RING(R128FloatAsInt(1.0));							\
+    OUT_RING(R128FloatAsInt((((float)(_srcX)) + 0.5) / (info->state_2d.widths[0])));	\
+    OUT_RING(R128FloatAsInt((((float)(_srcY)) + 0.5) / (info->state_2d.heights[0])));	\
+    OUT_RING(R128FloatAsInt((((float)(_maskX)) + 0.5) / (info->state_2d.widths[1])));	\
+    OUT_RING(R128FloatAsInt((((float)(_maskY)) + 0.5) / (info->state_2d.heights[1])));	\
+} while (0)
+
+#define VTX_OUT(_dstX, _dstY, _srcX, _srcY)						\
+do {								       			\
+    OUT_RING(R128FloatAsInt((_dstX)));							\
+    OUT_RING(R128FloatAsInt(((float)(_dstY)) + 0.125));					\
+    OUT_RING(R128FloatAsInt(0.0));							\
+    OUT_RING(R128FloatAsInt(1.0));							\
+    OUT_RING(R128FloatAsInt((((float)(_srcX)) + 0.5) / (info->state_2d.widths[0])));	\
+    OUT_RING(R128FloatAsInt((((float)(_srcY)) + 0.5) / (info->state_2d.heights[0])));	\
+} while (0)
+
+static void
+R128CCEComposite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY, int dstX, int dstY, int w, int h)
+{
+    ScreenPtr     pScreen   = pDst->drawable.pScreen;
+    ScrnInfoPtr   pScrn     = xf86ScreenToScrn(pScreen);
+    R128InfoPtr   info      = R128PTR(pScrn);
+    RING_LOCALS;
+    
+    int srcXend, srcYend, maskXend, maskYend;
+    PictVector v;
+    
+    srcXend = srcX + w;
+    srcYend = srcY + h;
+    maskXend = maskX + w;
+    maskYend = maskY + h;
+    if (info->state_2d.is_transform[0]) {
+        v.vector[0] = IntToxFixed(srcX);
+        v.vector[1] = IntToxFixed(srcY);
+        v.vector[2] = xFixed1;
+        PictureTransformPoint(info->state_2d.transform[0], &v);
+        srcX = xFixedToInt(v.vector[0]);
+        srcY = xFixedToInt(v.vector[1]);
+        v.vector[0] = IntToxFixed(srcXend);
+        v.vector[1] = IntToxFixed(srcYend);
+        v.vector[2] = xFixed1;
+        PictureTransformPoint(info->state_2d.transform[0], &v);
+        srcXend = xFixedToInt(v.vector[0]);
+        srcYend = xFixedToInt(v.vector[1]);
+    }
+    if (info->state_2d.is_transform[1]) {
+        v.vector[0] = IntToxFixed(maskX);
+        v.vector[1] = IntToxFixed(maskY);
+        v.vector[2] = xFixed1;
+        PictureTransformPoint(info->state_2d.transform[1], &v);
+        maskX = xFixedToInt(v.vector[0]);
+        maskY = xFixedToInt(v.vector[1]);
+        v.vector[0] = IntToxFixed(maskXend);
+        v.vector[1] = IntToxFixed(maskYend);
+        v.vector[2] = xFixed1;
+        PictureTransformPoint(info->state_2d.transform[1], &v);
+        maskXend = xFixedToInt(v.vector[0]);
+        maskYend = xFixedToInt(v.vector[1]);
+    }
+    
+    dstX -= info->state_2d.x_offset;
+    dstY -= info->state_2d.y_offset;
+    
+    R128CCE_REFRESH( pScrn, info );
+    
+    if (info->state_2d.has_mask) {
+        BEGIN_RING( 3 + 4 * 8 );
+        OUT_RING(CCE_PACKET3(R128_CCE_PACKET3_3D_RNDR_GEN_PRIM, 1 + 4 * 8));
+    
+	OUT_RING(R128_CCE_VC_FRMT_RHW |
+            R128_CCE_VC_FRMT_S_T |
+            R128_CCE_VC_FRMT_S2_T2);
+    } else {
+        BEGIN_RING( 3 + 4 * 6 );
+        OUT_RING(CCE_PACKET3(R128_CCE_PACKET3_3D_RNDR_GEN_PRIM, 1 + 4 * 6));
+    
+	OUT_RING(R128_CCE_VC_FRMT_RHW |
+            R128_CCE_VC_FRMT_S_T);
+    }
+    
+    OUT_RING(R128_CCE_VC_CNTL_PRIM_TYPE_TRI_FAN |
+        R128_CCE_VC_CNTL_PRIM_WALK_RING |
+        (4 << R128_CCE_VC_CNTL_NUM_SHIFT));
+    
+    if (info->state_2d.has_mask) {
+	VTX_OUT_MASK(dstX,     dstY,     srcX,    srcY,    maskX,    maskY);
+	VTX_OUT_MASK(dstX,     dstY + h, srcX,    srcYend, maskX,    maskYend);
+	VTX_OUT_MASK(dstX + w, dstY + h, srcXend, srcYend, maskXend, maskYend);
+	VTX_OUT_MASK(dstX + w, dstY,     srcXend, srcY,    maskXend, maskY);
+    } else {
+	VTX_OUT(dstX,     dstY,     srcX,    srcY);
+	VTX_OUT(dstX,     dstY + h, srcX,    srcYend);
+	VTX_OUT(dstX + w, dstY + h, srcXend, srcYend);
+	VTX_OUT(dstX + w, dstY,     srcXend, srcY);
+    }
+    
+    ADVANCE_RING();
+}
+
+#define R128CCEDoneComposite R128Done
diff --git a/src/r128_video.c b/src/r128_video.c
index 81b2ab6..d9bfafa 100644
--- a/src/r128_video.c
+++ b/src/r128_video.c
@@ -56,7 +56,8 @@ typedef struct {
    int           saturation;
    Bool          doubleBuffer;
    unsigned char currentBuffer;
-   FBLinearPtr   linear;
+   void*         BufferHandle;
+   int		 videoOffset;
    RegionRec     clip;
    CARD32        colorKey;
    CARD32        videoStatus;
@@ -270,9 +271,16 @@ R128StopVideo(ScrnInfoPtr pScrn, pointer data, Bool cleanup)
      if(pPriv->videoStatus & CLIENT_VIDEO_ON) {
 	OUTREG(R128_OV0_SCALE_CNTL, 0);
      }
-     if(pPriv->linear) {
-	xf86FreeOffscreenLinear(pPriv->linear);
-	pPriv->linear = NULL;
+     if(pPriv->BufferHandle) {
+        if (!info->useEXA) {
+	   xf86FreeOffscreenLinear((FBLinearPtr) pPriv->BufferHandle);
+	} 
+#ifdef USE_EXA
+	else {
+	   exaOffscreenFree(pScrn->pScreen, (ExaOffscreenArea *) pPriv->BufferHandle);
+	}
+#endif
+	pPriv->BufferHandle = NULL;
      }
      pPriv->videoStatus = 0;
   } else {
@@ -381,7 +389,7 @@ R128QueryBestSize(
  *
  */
 
-static Bool
+Bool
 R128DMA(
   R128InfoPtr info,
   unsigned char *src,
@@ -403,13 +411,13 @@ R128DMA(
     int sizes[MAXPASSES], list[MAXPASSES];
     drmDMAReq req;
     drmR128Blit blit;
-
+    
     /* Verify conditions and bail out as early as possible */
-    if (!info->directRenderingEnabled || !info->DMAForXv)
+    if (!info->DMAForXv || !info->directRenderingEnabled)
         return FALSE;
 
     if ((hpass = min(h,(BUFSIZE/w))) == 0)
-	return FALSE;
+        return FALSE;
 
     if ((passes = (h+hpass-1)/hpass) > MAXPASSES)
         return FALSE;
@@ -564,45 +572,78 @@ R128CopyData420(
 }
 
 
-static FBLinearPtr
+static CARD32
 R128AllocateMemory(
    ScrnInfoPtr pScrn,
-   FBLinearPtr linear,
+   void **mem_struct,
    int size
 ){
-   ScreenPtr pScreen;
-   FBLinearPtr new_linear;
+   R128InfoPtr info = R128PTR(pScrn);
+   ScreenPtr pScreen = xf86ScrnToScreen(pScrn);
+   int offset = 0;
 
-   if(linear) {
-	if(linear->size >= size)
-	   return linear;
+   if(!info->useEXA) {
+        FBLinearPtr linear = *mem_struct;
+        int cpp = info->CurrentLayout.pixel_bytes;
+	
+	/* XAA allocates in units of pixels at the screen bpp, so adjust size appropriately. */
+	size = (size + cpp - 1) / cpp;
+	
+        if(linear) {
+	     if(linear->size >= size)
+	        return linear->offset * cpp;
 
-	if(xf86ResizeOffscreenLinear(linear, size))
-	   return linear;
+	     if(xf86ResizeOffscreenLinear(linear, size))
+	        return linear->offset * cpp;
 
-	xf86FreeOffscreenLinear(linear);
-   }
+	     xf86FreeOffscreenLinear(linear);
+        }
 
-   pScreen = xf86ScrnToScreen(pScrn);
 
-   new_linear = xf86AllocateOffscreenLinear(pScreen, size, 8,
+        linear = xf86AllocateOffscreenLinear(pScreen, size, 8,
 						NULL, NULL, NULL);
+	*mem_struct = linear;
 
-   if(!new_linear) {
-	int max_size;
+        if(!linear) {
+	     int max_size;
 
-	xf86QueryLargestOffscreenLinear(pScreen, &max_size, 8,
+	     xf86QueryLargestOffscreenLinear(pScreen, &max_size, 8,
 						PRIORITY_EXTREME);
 
-	if(max_size < size)
-	   return NULL;
+	     if(max_size < size)
+	        return NULL;
 
-	xf86PurgeUnlockedOffscreenAreas(pScreen);
-	new_linear = xf86AllocateOffscreenLinear(pScreen, size, 8,
+	     xf86PurgeUnlockedOffscreenAreas(pScreen);
+	     linear = xf86AllocateOffscreenLinear(pScreen, size, 8,
 						NULL, NULL, NULL);
+	     
+	     if(!linear) return 0;
+        }
+	
+	offset = linear->offset * cpp;
+   } 
+#ifdef USE_EXA   
+   else {
+        /* EXA support based on mga driver */
+	ExaOffscreenArea *area = *mem_struct;
+	
+	if(area) {
+	     if(area->size >= size)
+	        return area->offset;
+	     
+	     exaOffscreenFree(pScrn->pScreen, area);
+	}
+	
+	area = exaOffscreenAlloc(pScrn->pScreen, size, 64, TRUE, NULL, NULL);
+	*mem_struct = area;
+	
+	if(!area) return 0;
+	
+	offset = area->offset;
    }
+#endif
 
-   return new_linear;
+   return offset;
 }
 
 static void
@@ -841,7 +882,7 @@ R128PutImage(
 	srcPitch = (width + 3) & ~3;
 	srcPitch2 = ((width >> 1) + 3) & ~3;
 	dstPitch = (width + 31) & ~31;  /* of luma */
-	new_size = ((dstPitch * (height + (height >> 1))) + bpp - 1) / bpp;
+	new_size = dstPitch * (height + (height >> 1));
 	s1offset = 0;
 	s2offset = srcPitch * height;
 	s3offset = (srcPitch2 * (height >> 1)) + s2offset;
@@ -852,14 +893,14 @@ R128PutImage(
 	srcPitch = width << 1;
 	srcPitch2 = 0;
 	dstPitch = ((width << 1) + 15) & ~15;
-	new_size = ((dstPitch * height) + bpp - 1) / bpp;
+	new_size = dstPitch * height;
 	s1offset = 0;
 	s2offset = 0;
 	s3offset = 0;
 	break;
    }
 
-   if(!(pPriv->linear = R128AllocateMemory(pScrn, pPriv->linear,
+   if(!(pPriv->videoOffset = R128AllocateMemory(pScrn, &(pPriv->BufferHandle),
 		pPriv->doubleBuffer ? (new_size << 1) : new_size)))
    {
 	return BadAlloc;
@@ -872,9 +913,9 @@ R128PutImage(
    left = (xa >> 16) & ~1;
    npixels = ((((xb + 0xffff) >> 16) + 1) & ~1) - left;
 
-   offset = pPriv->linear->offset * bpp;
+   offset = pPriv->videoOffset;
    if(pPriv->doubleBuffer)
-	offset += pPriv->currentBuffer * new_size * bpp;
+	offset += pPriv->currentBuffer * new_size;
 
    switch(id) {
     case FOURCC_YV12:
@@ -1015,9 +1056,16 @@ R128VideoTimerCallback(ScrnInfoPtr pScrn, Time now)
 	    }
 	} else {  /* FREE_TIMER */
 	    if(pPriv->freeTime < now) {
-		if(pPriv->linear) {
-		   xf86FreeOffscreenLinear(pPriv->linear);
-		   pPriv->linear = NULL;
+		if(pPriv->BufferHandle) {
+		   if (!info->useEXA) {
+		      xf86FreeOffscreenLinear((FBLinearPtr) pPriv->BufferHandle);
+		   }
+#ifdef USE_EXA 
+		   else {
+		      exaOffscreenAreaFree(pScrn->pScreen, (ExaOffscreenArea *) pPriv->BufferHandle);
+		   }
+#endif
+		   pPriv->BufferHandle = NULL;
 		}
 		pPriv->videoStatus = 0;
 		info->VideoTimerCallback = NULL;
-- 
1.7.11.1



More information about the xorg-devel mailing list