[Intel-gfx] [PATCH xf86-video-intel v6] sna: Added AYUV format support for textured and sprite video adapters.
Ville Syrjälä
ville.syrjala at linux.intel.com
Thu Nov 8 17:47:26 UTC 2018
On Fri, Nov 02, 2018 at 12:06:03PM +0200, Stanislav Lisovskiy wrote:
> v2: Renamed DRM_FORMAT_XYUV to DRM_FORMAT_XYUV8888.
> Added comment about AYUV byte ordering in Gstreamer.
>
> v3: Removed sna_composite_op flags related change to the separate patch.
>
> v4: Fixed review comments, done code refactoring
>
> v5: Fixed following review comments:
> - Fixed comment in shader code for ayuv kernel.
> - Fixed naming to VIDEO_AYUV_BT601/BT709 for ayuv kernels.
> - Removed duplicate gen9_kernel parameter, left from previous patches
> - Added colorspace handling for new AYUV kernel
> - Fixed naming of sna_copy_packed_data_ayuv to sna_copy_ayuv_data
> - Started using standard bswap_32 function for byte swapping in sna_copy_ayuv_data
> - Removed redundant code in sna_copy_ayuv_data so that it looks more neat
> - Fixed XVIMAGE_AYUV structure initialization to contain proper byte sequence for GST
> - Fixed bogus comment about subsampling for DRM_FORMAT_XYUV8888
> - Fixed AYUV advertisement for all platforms
> - Removed unnecessary RGB888 declaration.
>
> v6:
> - Fixed surface format not to use alpha as supposed
> - Now doing byte swapping always during copy
> - Changed hack, required for GST to work to be at one place
> - Fixed invalid sampling values for XVIMAGE_AYUV
> - Fixed sprite format checking order and images_ayuv definition.
>
> Signed-off-by: Stanislav Lisovskiy <stanislav.lisovskiy at intel.com>
> ---
> src/render_program/Makefile.am | 2 +
> .../exa_wm_src_sample_argb_ayuv.g8a | 76 ++++++++++++++++
> .../exa_wm_src_sample_argb_ayuv.g8b | 8 ++
> src/sna/gen9_render.c | 24 ++++-
> src/sna/sna_render.h | 3 +
> src/sna/sna_video.c | 89 ++++++++++++++++++-
> src/sna/sna_video.h | 20 +++++
> src/sna/sna_video_sprite.c | 20 ++++-
> src/sna/sna_video_textured.c | 7 ++
> 9 files changed, 244 insertions(+), 5 deletions(-)
> create mode 100644 src/render_program/exa_wm_src_sample_argb_ayuv.g8a
> create mode 100644 src/render_program/exa_wm_src_sample_argb_ayuv.g8b
>
> diff --git a/src/render_program/Makefile.am b/src/render_program/Makefile.am
> index dc58138f..e35ffa52 100644
> --- a/src/render_program/Makefile.am
> +++ b/src/render_program/Makefile.am
> @@ -196,6 +196,7 @@ INTEL_G7B = \
> INTEL_G8A = \
> exa_wm_src_affine.g8a \
> exa_wm_src_sample_argb.g8a \
> + exa_wm_src_sample_argb_ayuv.g8a \
> exa_wm_src_sample_nv12.g8a \
> exa_wm_src_sample_planar.g8a \
> exa_wm_write.g8a \
> @@ -205,6 +206,7 @@ INTEL_G8A = \
>
> INTEL_G8B = \
> exa_wm_src_affine.g8b \
> + exa_wm_src_sample_argb_ayuv.g8b \
> exa_wm_src_sample_argb.g8b \
> exa_wm_src_sample_nv12.g8b \
> exa_wm_src_sample_planar.g8b \
> diff --git a/src/render_program/exa_wm_src_sample_argb_ayuv.g8a b/src/render_program/exa_wm_src_sample_argb_ayuv.g8a
> new file mode 100644
> index 00000000..c0b84c2e
> --- /dev/null
> +++ b/src/render_program/exa_wm_src_sample_argb_ayuv.g8a
> @@ -0,0 +1,76 @@
> +/*
> + * Copyright © 2006 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + * Authors:
> + * Wang Zhenyu <zhenyu.z.wang at intel.com>
> + * Keith Packard <keithp at keithp.com>
> + */
> +
> +/* Sample the src surface */
> +
> +include(`exa_wm.g4i')
> +
> +undefine(`src_msg')
> +undefine(`src_msg_ind')
> +
> +define(`src_msg', `g65')
> +define(`src_msg_ind', `65')
> +
> +/* prepare sampler read back gX register, which would be written back to output */
> +
> +/* use simd16 sampler, param 0 is u, param 1 is v. */
> +/* 'payload' loading, assuming tex coord start from g4 */
> +
> +/* load argb */
> +mov (1) g0.8<1>UD 0x00000000UD { align1 mask_disable };
> +mov (8) src_msg<1>UD g0<8,8,1>UD { align1 }; /* copy to msg start reg*/
> +
> +/* src_msg will be copied with g0, as it contains send desc */
> +/* emit sampler 'send' cmd */
> +send (16) src_msg_ind /* msg reg index */
> + src_sample_base<1>UW /* readback */
> + null
> + sampler (1,0,F) /* sampler message description, (binding_table,sampler_index,datatype)
> + /* here(src->dst) we should use src_sampler and src_surface */
> + mlen 5 rlen 8 { align1 }; /* required message len 5, readback len 8 */
> +
> +/*
> + * Have to change bytes order, because the only
> + * player which supports AYUV format currently is
> + * Gstreamer and it supports in bad way, even though
> + * spec says MSB:AYUV, we get the bytes opposite way.
> + * We swap bytes both for sprite and texture modes during copy.
> + * So here we get argb which then becomes 1bgr.
> + */
> +mov (16) src_sample_a<1>UD src_sample_b<1>UD { align1 };
> +mov (16) src_sample_b<1>UD src_sample_g<1>UD { align1 };
> +mov (16) src_sample_g<1>UD src_sample_r<1>UD { align1 };
> +mov (16) src_sample_r<1>UD src_sample_a<1>UD { align1 };
> +mov (16) src_sample_a<1>F 1.0F;
> +
> +
> +
> +
> +
> +
> +
> +
> diff --git a/src/render_program/exa_wm_src_sample_argb_ayuv.g8b b/src/render_program/exa_wm_src_sample_argb_ayuv.g8b
> new file mode 100644
> index 00000000..f3ac4959
> --- /dev/null
> +++ b/src/render_program/exa_wm_src_sample_argb_ayuv.g8b
> @@ -0,0 +1,8 @@
> + { 0x00000001, 0x2008060c, 0x00000000, 0x00000000 },
> + { 0x00600001, 0x28200208, 0x008d0000, 0x00000000 },
> + { 0x02800031, 0x21c00a48, 0x06000820, 0x0a8c0001 },
> + { 0x00800001, 0x22800208, 0x00200240, 0x00000000 },
> + { 0x00800001, 0x22400208, 0x00200200, 0x00000000 },
> + { 0x00800001, 0x22000208, 0x002001c0, 0x00000000 },
> + { 0x00800001, 0x21c00208, 0x00200280, 0x00000000 },
> + { 0x00800001, 0x22803ee8, 0x38000000, 0x3f800000 },
> diff --git a/src/sna/gen9_render.c b/src/sna/gen9_render.c
> index eb22b642..90707b1f 100644
> --- a/src/sna/gen9_render.c
> +++ b/src/sna/gen9_render.c
> @@ -129,6 +129,20 @@ static const uint32_t ps_kernel_planar_bt709[][4] = {
> #include "exa_wm_write.g8b"
> };
>
> +static const uint32_t ps_kernel_ayuv_bt601[][4] = {
> +#include "exa_wm_src_affine.g8b"
> +#include "exa_wm_src_sample_argb_ayuv.g8b"
> +#include "exa_wm_yuv_rgb_bt601.g8b"
> +#include "exa_wm_write.g8b"
> +};
> +
> +static const uint32_t ps_kernel_ayuv_bt709[][4] = {
> +#include "exa_wm_src_affine.g8b"
> +#include "exa_wm_src_sample_argb_ayuv.g8b"
> +#include "exa_wm_yuv_rgb_bt709.g8b"
> +#include "exa_wm_write.g6b"
> +};
> +
> static const uint32_t ps_kernel_nv12_bt709[][4] = {
> #include "exa_wm_src_affine.g8b"
> #include "exa_wm_src_sample_nv12.g8b"
> @@ -177,6 +191,8 @@ static const struct wm_kernel_info {
> KERNEL(VIDEO_PLANAR_BT709, ps_kernel_planar_bt709, 7),
> KERNEL(VIDEO_NV12_BT709, ps_kernel_nv12_bt709, 7),
> KERNEL(VIDEO_PACKED_BT709, ps_kernel_packed_bt709, 2),
> + KERNEL(VIDEO_AYUV_BT601, ps_kernel_ayuv_bt601, 2),
> + KERNEL(VIDEO_AYUV_BT709, ps_kernel_ayuv_bt709, 2),
> KERNEL(VIDEO_RGB, ps_kernel_rgb, 2),
> #endif
> };
> @@ -2552,7 +2568,6 @@ gen9_render_composite(struct sna *sna,
> tmp->mask.bo != NULL,
> tmp->has_component_alpha,
> tmp->is_affine);
> -
> tmp->blt = gen9_render_composite_blt;
> tmp->box = gen9_render_composite_box;
> tmp->boxes = gen9_render_composite_boxes__blt;
> @@ -3853,6 +3868,8 @@ static void gen9_emit_video_state(struct sna *sna,
> src_surf_format[0] = SURFACEFORMAT_B8G8R8X8_UNORM;
> else if (frame->id == FOURCC_UYVY)
> src_surf_format[0] = SURFACEFORMAT_YCRCB_SWAPY;
> + else if (is_ayuv_fourcc(frame->id))
> + src_surf_format[0] = SURFACEFORMAT_B8G8R8X8_UNORM;
> else
> src_surf_format[0] = SURFACEFORMAT_YCRCB_NORMAL;
>
> @@ -3903,6 +3920,11 @@ static unsigned select_video_kernel(const struct sna_video *video,
> case FOURCC_RGB565:
> return GEN9_WM_KERNEL_VIDEO_RGB;
>
> + case FOURCC_AYUV:
> + return video->colorspace ?
> + GEN9_WM_KERNEL_VIDEO_AYUV_BT709 :
> + GEN9_WM_KERNEL_VIDEO_AYUV_BT601;
> +
> default:
> return video->colorspace ?
> GEN9_WM_KERNEL_VIDEO_PACKED_BT709 :
> diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h
> index a4e5b56a..891fc905 100644
> --- a/src/sna/sna_render.h
> +++ b/src/sna/sna_render.h
> @@ -617,6 +617,9 @@ enum {
> GEN9_WM_KERNEL_VIDEO_NV12_BT709,
> GEN9_WM_KERNEL_VIDEO_PACKED_BT709,
>
> + GEN9_WM_KERNEL_VIDEO_AYUV_BT601,
> + GEN9_WM_KERNEL_VIDEO_AYUV_BT709,
> +
> GEN9_WM_KERNEL_VIDEO_RGB,
> GEN9_WM_KERNEL_COUNT
> };
> diff --git a/src/sna/sna_video.c b/src/sna/sna_video.c
> index 55405f81..d4ed8464 100644
> --- a/src/sna/sna_video.c
> +++ b/src/sna/sna_video.c
> @@ -59,6 +59,7 @@
> #include "intel_options.h"
>
> #include <xf86xv.h>
> +#include <byteswap.h>
>
> #ifdef SNA_XVMC
> #define _SNA_XVMC_SERVER_
> @@ -281,6 +282,7 @@ sna_video_frame_set_rotation(struct sna_video *video,
> } else {
> switch (frame->id) {
> case FOURCC_RGB888:
> + case FOURCC_AYUV:
> if (rotation & (RR_Rotate_90 | RR_Rotate_270)) {
> frame->pitch[0] = ALIGN((height << 2), align);
> frame->size = (int)frame->pitch[0] * width;
> @@ -584,6 +586,89 @@ sna_copy_packed_data(struct sna_video *video,
> }
> }
>
> +static void
> +sna_copy_ayuv_data(struct sna_video *video,
> + const struct sna_video_frame *frame,
> + const uint8_t *buf,
> + uint8_t *dst,
> + bool reverse_bytes)
Why this parameter? It's always true no?
> +{
> + int pitch = frame->width << 2;
> + const uint8_t *src, *s;
> + const uint32_t *src_dw;
> + uint32_t *dst_dw = (uint32_t *)dst;
> + int x, y, w, h;
> + int i, j;
> + uint32_t dw;
> +
> + if (video->textured) {
> + /* XXX support copying cropped extents */
> + x = y = 0;
> + w = frame->width;
> + h = frame->height;
> + } else {
> + x = frame->image.x1;
> + y = frame->image.y1;
> + w = frame->image.x2 - frame->image.x1;
> + h = frame->image.y2 - frame->image.y1;
> + }
> +
> + src = buf + (y * pitch) + (x << 2);
> + src_dw = (uint32_t *)src;
> +
> + if (reverse_bytes) {
> + /*
> + * Have to reverse bytes order, because the only
> + * player which supports AYUV format currently is
> + * Gstreamer and it supports in bad way, even though
> + * spec says MSB:AYUV, we get the bytes opposite way.
> + */
> + for (i = 0; i < h; i++) {
> + for (j = 0; j < w; j++) {
> + uint32_t reverse_dw;
> + dw = src_dw[i * w + j];
> + reverse_dw = bswap_32(dw);
> + dst_dw[i * w + j] = reverse_dw;
> + }
> + }
> + }
Two loops through the data isn't quite what I was thinking.
> +
> + switch (frame->rotation) {
> + case RR_Rotate_0:
> + for (i = 0; i < h; i++) {
> + for (j = 0; j < w; j++) {
> + dw = dst_dw[i * w + j];
> + dst_dw[i * w + j] = dw;
What I had in mind was just
dst_dw[..] = bswap_32(src_dw[...]);
here.
> + }
> + }
> + break;
> + case RR_Rotate_90:
> + for (i = 0; i < h; i++) {
> + for (j = 0; j < w; j++) {
> + dw = dst_dw[i * w + j];
> + dst_dw[(w - j - 1) * h + i] = dw;
> + }
> + }
> + break;
> + case RR_Rotate_180:
> + for (i = 0; i < h; i++) {
> + for (j = 0; j < w; j++) {
> + dw = dst_dw[i * w + j];
> + dst_dw[(h - i - 1) * w + w - j - 1] = dw;
> + }
> + }
> + break;
> + case RR_Rotate_270:
> + for (i = 0; i < h; i++) {
> + for (j = 0; j < w; j++) {
> + dw = dst_dw[i * w + j];;
> + dst_dw[(w - j - 1) * h + i] = dw;
> + }
> + }
> + break;
> + }
> +}
> +
> bool
> sna_video_copy_data(struct sna_video *video,
> struct sna_video_frame *frame,
> @@ -604,7 +689,7 @@ sna_video_copy_data(struct sna_video *video,
> assert(frame->size);
>
> /* In the common case, we can simply the upload in a single pwrite */
> - if (frame->rotation == RR_Rotate_0 && !video->tiled) {
> + if (frame->rotation == RR_Rotate_0 && !video->tiled && !is_ayuv_fourcc(frame->id)) {
> DBG(("%s: unrotated, untiled fast paths: is-planar?=%d\n",
> __FUNCTION__, is_planar_fourcc(frame->id)));
> if (is_nv12_fourcc(frame->id)) {
> @@ -709,6 +794,8 @@ use_gtt: /* copy data, must use GTT so that we keep the overlay uncached */
> sna_copy_nv12_data(video, frame, buf, dst);
> else if (is_planar_fourcc(frame->id))
> sna_copy_planar_data(video, frame, buf, dst);
> + else if (is_ayuv_fourcc(frame->id))
> + sna_copy_ayuv_data(video, frame, buf, dst, true);
> else
> sna_copy_packed_data(video, frame, buf, dst);
>
> diff --git a/src/sna/sna_video.h b/src/sna/sna_video.h
> index bbd3f0fd..a3ffdc0b 100644
> --- a/src/sna/sna_video.h
> +++ b/src/sna/sna_video.h
> @@ -39,6 +39,7 @@ THE USE OR OTHER DEALINGS IN THE SOFTWARE.
> #define FOURCC_RGB565 ((16 << 24) + ('B' << 16) + ('G' << 8) + 'R')
> #define FOURCC_RGB888 ((24 << 24) + ('B' << 16) + ('G' << 8) + 'R')
> #define FOURCC_NV12 (('2' << 24) + ('1' << 16) + ('V' << 8) + 'N')
> +#define FOURCC_AYUV (('V' << 24) + ('U' << 16) + ('Y' << 8) + 'A')
>
> /*
> * Below, a dummy picture type that is used in XvPutImage
> @@ -79,6 +80,15 @@ THE USE OR OTHER DEALINGS IN THE SOFTWARE.
> XvTopToBottom \
> }
>
> +#define XVIMAGE_AYUV { \
> + FOURCC_AYUV, XvYUV, LSBFirst, \
> + {'A', 'Y', 'U', 'V', 0x00, 0x00, 0x00, 0x10, 0x80, 0x00, 0x00, 0xAA, 0x00, 0x38, 0x9B, 0x71}, \
> + 32, XvPacked, 1, 0, 0, 0, 0, 8, 8, 8, 1, 1, 1, 1, 1, 1, \
> + {'A', 'Y', 'U', 'V', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, \
> + XvTopToBottom \
> +}
> +
> +
> struct sna_video {
> struct sna *sna;
>
> @@ -189,6 +199,16 @@ static inline int is_nv12_fourcc(int id)
> }
> }
>
> +static inline int is_ayuv_fourcc(int id)
> +{
> + switch (id) {
> + case FOURCC_AYUV:
> + return 1;
> + default:
> + return 0;
> + }
> +}
> +
> bool
> sna_video_clip_helper(struct sna_video *video,
> struct sna_video_frame *frame,
> diff --git a/src/sna/sna_video_sprite.c b/src/sna/sna_video_sprite.c
> index 8b7ae8ae..3780dc0e 100644
> --- a/src/sna/sna_video_sprite.c
> +++ b/src/sna/sna_video_sprite.c
> @@ -47,7 +47,7 @@
> #define DRM_FORMAT_YUYV fourcc_code('Y', 'U', 'Y', 'V') /* [31:0] Cr0:Y1:Cb0:Y0 8:8:8:8 little endian */
> #define DRM_FORMAT_UYVY fourcc_code('U', 'Y', 'V', 'Y') /* [31:0] Y1:Cr0:Y0:Cb0 8:8:8:8 little endian */
> #define DRM_FORMAT_NV12 fourcc_code('N', 'V', '1', '2') /* 2x2 subsampled Cr:Cb plane */
> -
> +#define DRM_FORMAT_XYUV8888 fourcc_code('X', 'Y', 'U', 'V') /* [31:0] x:Y:U:V 8:8:8:8 little endian */
> #define has_hw_scaling(sna, video) ((sna)->kgem.gen < 071 || \
> (sna)->kgem.gen >= 0110)
>
> @@ -79,6 +79,8 @@ static const XvImageRec images_rgb565[] = { XVIMAGE_YUY2, XVIMAGE_UYVY,
> XVMC_RGB888, XVMC_RGB565 };
> static const XvImageRec images_nv12[] = { XVIMAGE_YUY2, XVIMAGE_UYVY,
> XVIMAGE_NV12, XVMC_RGB888, XVMC_RGB565 };
> +static const XvImageRec images_ayuv[] = { XVIMAGE_AYUV, XVIMAGE_YUY2, XVIMAGE_UYVY,
> + XVIMAGE_NV12, XVMC_RGB888, XVMC_RGB565 };
> static const XvAttributeRec attribs[] = {
> { XvSettable | XvGettable, 0, 1, (char *)"XV_COLORSPACE" }, /* BT.601, BT.709 */
> { XvSettable | XvGettable, 0, 0xffffff, (char *)"XV_COLORKEY" },
> @@ -364,6 +366,10 @@ sna_video_sprite_show(struct sna *sna,
> case FOURCC_UYVY:
> f.pixel_format = DRM_FORMAT_UYVY;
> break;
> + case FOURCC_AYUV:
> + /* i915 doesn't support alpha, so we use XYUV */
> + f.pixel_format = DRM_FORMAT_XYUV8888;
> + break;
> case FOURCC_YUY2:
> default:
> f.pixel_format = DRM_FORMAT_YUYV;
> @@ -705,7 +711,12 @@ static int sna_video_sprite_query(ddQueryImageAttributes_ARGS)
> tmp *= (*h >> 1);
> size += tmp;
> break;
> -
> + case FOURCC_AYUV:
> + tmp = *w << 2;
> + if (pitches)
> + pitches[0] = tmp;
> + size = *h * tmp;
> + break;
> default:
> *w = (*w + 1) & ~1;
> *h = (*h + 1) & ~1;
> @@ -805,7 +816,10 @@ void sna_video_sprite_setup(struct sna *sna, ScreenPtr screen)
> adaptor->nAttributes = ARRAY_SIZE(attribs);
> adaptor->pAttributes = (XvAttributeRec *)attribs;
>
> - if (sna_has_sprite_format(sna, DRM_FORMAT_NV12)) {
> + if (sna_has_sprite_format(sna, DRM_FORMAT_XYUV8888)) {
> + adaptor->pImages = (XvImageRec *)images_ayuv;
> + adaptor->nImages = ARRAY_SIZE(images_ayuv);
> + } else if (sna_has_sprite_format(sna, DRM_FORMAT_NV12)) {
> adaptor->pImages = (XvImageRec *)images_nv12;
> adaptor->nImages = ARRAY_SIZE(images_nv12);
> } else if (sna_has_sprite_format(sna, DRM_FORMAT_RGB565)) {
> diff --git a/src/sna/sna_video_textured.c b/src/sna/sna_video_textured.c
> index a784fe2e..46c213ef 100644
> --- a/src/sna/sna_video_textured.c
> +++ b/src/sna/sna_video_textured.c
> @@ -68,6 +68,7 @@ static const XvImageRec gen4_Images[] = {
> XVIMAGE_I420,
> XVIMAGE_NV12,
> XVIMAGE_UYVY,
> + XVIMAGE_AYUV,
> XVMC_YUV,
> };
>
> @@ -337,6 +338,12 @@ sna_video_textured_query(ddQueryImageAttributes_ARGS)
> pitches[0] = size;
> size *= *h;
> break;
> + case FOURCC_AYUV:
> + size = *w << 2;
> + if (pitches)
> + pitches[0] = size;
> + size *= *h;
> + break;
> case FOURCC_XVMC:
> *h = (*h + 1) & ~1;
> size = sizeof(uint32_t);
> --
> 2.17.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
--
Ville Syrjälä
Intel
More information about the Intel-gfx
mailing list