XV: Making use of Radeon's YV12 scaler

Tilman Sauerbeck tilman at code-monkey.de
Sun Jan 8 03:25:22 PST 2006


Hi,
I noticed that the Radeon's driver support for YUV420 just sucks
(it's going through software conversion from YUV420 to YUV422) ;)

Radeon has a YV12 BES though, which I think can be used for this.
I tried to use that YV12 scaler, but I cannot get it to work properly.
All I get is b/w video output (which tells me it cannot be *that* much
off).

Can anyone please look at these patches and tell me where the problem
might be?

Thanks,
Tilman

-- 
GnuPG key available at
http://code-monkey.de/files/tsauerbeck-public-key.asc
-------------- next part --------------
Index: xf86-video-ati/src/radeon_reg.h
===================================================================
--- xf86-video-ati.orig/src/radeon_reg.h
+++ xf86-video-ati/src/radeon_reg.h
@@ -1016,6 +1016,7 @@
 #       define  RADEON_SCALER_BURST_PER_PLANE      0x007F0000L
 #       define  RADEON_SCALER_DOUBLE_BUFFER        0x01000000L
 #       define  RADEON_SCALER_DIS_LIMIT            0x08000000L
+#       define  RADEON_SCALER_PRG_LOAD_START       0x10000000L
 #       define  RADEON_SCALER_INT_EMU              0x20000000L
 #       define  RADEON_SCALER_ENABLE               0x40000000L
 #       define  RADEON_SCALER_SOFT_RESET           0x80000000L
Index: xf86-video-ati/src/radeon_video.c
===================================================================
--- xf86-video-ati.orig/src/radeon_video.c
+++ xf86-video-ati/src/radeon_video.c
@@ -2665,35 +2665,36 @@ RADEONDisplayVideo(
     OUTREG(RADEON_OV0_P1_H_ACCUM_INIT, p1_h_accum_init);
     OUTREG(RADEON_OV0_P23_H_ACCUM_INIT, p23_h_accum_init);
 
-   scale_cntl = RADEON_SCALER_ADAPTIVE_DEINT | RADEON_SCALER_DOUBLE_BUFFER 
-        | RADEON_SCALER_ENABLE | RADEON_SCALER_SMART_SWITCH | (0x7f<<16) | scaler_src;
-   switch(id){
-        case FOURCC_UYVY:
-                OUTREG(RADEON_OV0_SCALE_CNTL, RADEON_SCALER_SOURCE_YVYU422 | scale_cntl);
-                break;
-        case FOURCC_RGB24:
-        case FOURCC_RGBA32:
-                OUTREG(RADEON_OV0_SCALE_CNTL, RADEON_SCALER_SOURCE_32BPP | scale_cntl | 0x10000000);
-                break;
-        case FOURCC_RGBT16:
-                OUTREG(RADEON_OV0_SCALE_CNTL, RADEON_SCALER_SOURCE_16BPP 
-                        | 0x10000000 
-                        | scale_cntl);
-                break;
-        case FOURCC_RGB16:
-                OUTREG(RADEON_OV0_SCALE_CNTL, RADEON_SCALER_SOURCE_16BPP 
-                        | 0x10000000 
-                        | scale_cntl);
-                break;
-        case FOURCC_YUY2:
-        case FOURCC_YV12:
-        case FOURCC_I420:
-        default:
-                OUTREG(RADEON_OV0_SCALE_CNTL,  RADEON_SCALER_SOURCE_VYUY422 
-                        | ((info->ChipFamily>=CHIP_FAMILY_R200) ? RADEON_SCALER_TEMPORAL_DEINT :0) 
-                        | scale_cntl);
-        }
+    scale_cntl = RADEON_SCALER_ADAPTIVE_DEINT |
+                 RADEON_SCALER_DOUBLE_BUFFER |
+                 RADEON_SCALER_ENABLE | RADEON_SCALER_SMART_SWITCH |
+                 (0x7f << 16) | scaler_src;
+
+    switch(id) {
+    case FOURCC_UYVY:
+        scale_cntl |= RADEON_SCALER_SOURCE_YVYU422;
+        break;
+    case FOURCC_RGB24:
+    case FOURCC_RGBA32:
+        scale_cntl |= RADEON_SCALER_SOURCE_32BPP |
+                      RADEON_SCALER_PRG_LOAD_START;
+        break;
+    case FOURCC_RGBT16:
+    case FOURCC_RGB16:
+        scale_cntl |= RADEON_SCALER_SOURCE_16BPP |
+                      RADEON_SCALER_PRG_LOAD_START;
+        break;
+    case FOURCC_YUY2:
+    case FOURCC_YV12:
+    case FOURCC_I420:
+    default:
+        scale_cntl |= RADEON_SCALER_SOURCE_VYUY422;
+
+        if (info->ChipFamily >= CHIP_FAMILY_R200)
+            scale_cntl |= RADEON_SCALER_TEMPORAL_DEINT;
+    }
 
+    OUTREG(RADEON_OV0_SCALE_CNTL, scale_cntl);
     OUTREG(RADEON_OV0_REG_LOAD_CNTL, 0);
 }
 
-------------- next part --------------
Index: xf86-video-ati/src/radeon_video.c
===================================================================
--- xf86-video-ati.orig/src/radeon_video.c
+++ xf86-video-ati/src/radeon_video.c
@@ -2100,6 +2100,55 @@ RADEONCopyData(
     }
 }
 
+static void
+RADEONCopyDataPlanar(ScrnInfoPtr pScrn,
+                     unsigned char *src1,
+                     unsigned char *src2,
+                     unsigned char *src3,
+                     unsigned char *dst1,
+                     unsigned char *dst2,
+                     unsigned char *dst3,
+                     int srcPitch,
+                     int srcPitch2,
+                     int dstPitch,
+                     int h,
+                     int w)
+{
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+    int count;
+
+    count = h;
+
+    while (count--) {
+        memcpy(dst1, src1, w);
+        src1 += srcPitch;
+        dst1 += dstPitch;
+    }
+
+    w >>= 1;
+    h >>= 1;
+    dstPitch >>= 1;
+
+    count = h;
+
+    while (count--) {
+        memcpy(dst2, src2, w);
+        src2 += srcPitch2;
+        dst2 += dstPitch;
+    }
+
+    count = h;
+
+    while (count--) {
+        memcpy(dst3, src3, w);
+        src3 += srcPitch2;
+        dst3 += dstPitch;
+    }
+
+	FLUSH_RING();
+}
+
+#if 0
 static void RADEON_420_422(
     unsigned int *d,
     unsigned char *s1,
@@ -2114,6 +2163,7 @@ static void RADEON_420_422(
 	n--;
     }
 }
+#endif
 
 
 static void
@@ -2190,6 +2240,7 @@ RADEONCopyRGB24Data(
     }
 }
 
+#if 0
 static void
 RADEONCopyMungedData(
    ScrnInfoPtr pScrn,
@@ -2285,6 +2336,7 @@ RADEONCopyMungedData(
 #endif
     }
 }
+#endif
 
 /* Allocates memory, either by resizing the allocation pointed to by mem_struct,
  * or by freeing mem_struct (if non-NULL) and allocating a new space.  The size
@@ -2684,9 +2736,11 @@ RADEONDisplayVideo(
         scale_cntl |= RADEON_SCALER_SOURCE_16BPP |
                       RADEON_SCALER_PRG_LOAD_START;
         break;
-    case FOURCC_YUY2:
     case FOURCC_YV12:
     case FOURCC_I420:
+        scale_cntl |= RADEON_SCALER_SOURCE_YUV12;
+        break;
+    case FOURCC_YUY2:
     default:
         scale_cntl |= RADEON_SCALER_SOURCE_VYUY422;
 
@@ -2715,8 +2769,9 @@ RADEONPutImage(
    RADEONPortPrivPtr pPriv = (RADEONPortPrivPtr)data;
    INT32 xa, xb, ya, yb;
    unsigned char *dst_start;
-   int new_size, offset, s2offset, s3offset;
+   int new_size, offset, s1offset, s2offset, s3offset;
    int srcPitch, srcPitch2, dstPitch;
+   int d1line, d2line, d3line, d1offset, d2offset, d3offset;
    int top, left, npixels, nlines, bpp;
    BoxRec dstBox;
    CARD32 tmp;
@@ -2736,7 +2791,7 @@ RADEONPutImage(
     */
 
    /* make the compiler happy */
-   s2offset = s3offset = srcPitch2 = 0;
+   s1offset = s2offset = s3offset = srcPitch2 = 0;
 
    if(src_w > (drw_w << 4))
 	drw_w = src_w >> 4;
@@ -2794,6 +2849,7 @@ RADEONPutImage(
 	break;
    case FOURCC_YV12:
    case FOURCC_I420:
+    /* working, but bw only */
 	dstPitch = ((width << 1) + 63) & ~63;
 	srcPitch = (width + 3) & ~3;
 	s2offset = srcPitch * height;
@@ -2832,21 +2888,32 @@ RADEONPutImage(
    switch(id) {
     case FOURCC_YV12:
     case FOURCC_I420:
-	top &= ~1;
-	dst_start += left << 1;
-	tmp = ((top >> 1) * srcPitch2) + (left >> 1);
-	s2offset += tmp;
-	s3offset += tmp;
-	if(id == FOURCC_I420) {
-	   tmp = s2offset;
-	   s2offset = s3offset;
-	   s3offset = tmp;
-	}
-	nlines = ((((yb + 0xffff) >> 16) + 1) & ~1) - top;
-	RADEONCopyMungedData(pScrn, buf + (top * srcPitch) + left,
-			     buf + s2offset, buf + s3offset, dst_start,
-			     srcPitch, srcPitch2, dstPitch, nlines, npixels);
-	break;
+        d1line = top * dstPitch;
+        d2line = (height * dstPitch) + ((top >> 1) * (dstPitch >> 1));
+        d3line = d2line + ((height >> 1) * (dstPitch >> 1));
+
+        top &= ~1;
+
+        d1offset = (top * dstPitch) + left + offset;
+        d2offset = d2line + (left >> 1) + offset;
+        d3offset = d3line + (left >> 1) + offset;
+
+        s1offset = (top * srcPitch) + left;
+        tmp = ((top >> 1) * srcPitch2) + (left >> 1);
+        s2offset += tmp;
+        s3offset += tmp;
+        if(id == FOURCC_YV12) {
+            tmp = s2offset;
+            s2offset = s3offset;
+            s3offset = tmp;
+        }
+
+        nlines = ((((yb + 0xffff) >> 16) + 1) & ~1) - top;
+        RADEONCopyDataPlanar(pScrn,
+                             buf + s1offset, buf + s2offset, buf + s3offset,
+                             info->FB + d1offset, info->FB + d2offset,
+                             info->FB+d3offset,
+                             srcPitch, srcPitch2, dstPitch, nlines, npixels);
     case FOURCC_RGBT16:
     case FOURCC_RGB16:
     case FOURCC_UYVY:
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: not available
URL: <http://lists.x.org/archives/xorg/attachments/20060108/d14692cf/attachment.pgp>


More information about the xorg mailing list