[Mesa-dev] [RFC 12/13] i965: support for YUV formatted external textures
Topi Pohjolainen
topi.pohjolainen at intel.com
Tue Feb 26 05:19:03 PST 2013
Namely YUV420, YVU420 (YV12) and NV12. Here one extends the shader
program with instructions that sample the separate Y- and U/V-planes
and convert the YUV to RGB as specified by ITU-R BT.601. Packed
formats are handled through the normal paths.
Here is only support for fragment shaders for now. I'm yet to study
the vec4-backend in order to add the support for vertex shaders.
(There aren't any tests or examples for them available anyway).
Signed-off-by: Topi Pohjolainen <topi.pohjolainen at intel.com>
---
src/mesa/drivers/dri/i965/Makefile.sources | 1 +
src/mesa/drivers/dri/i965/brw_fs.h | 1 +
src/mesa/drivers/dri/i965/brw_fs_ext_texture.cpp | 274 ++++++++++++++++++++++
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 5 +
4 files changed, 281 insertions(+)
create mode 100644 src/mesa/drivers/dri/i965/brw_fs_ext_texture.cpp
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index 243dda6..417fc2d 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -62,6 +62,7 @@ i965_FILES = \
brw_fs_reg_allocate.cpp \
brw_fs_schedule_instructions.cpp \
brw_fs_vector_splitting.cpp \
+ brw_fs_ext_texture.cpp \
brw_fs_visitor.cpp \
brw_gs.c \
brw_gs_emit.c \
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index ac6e631..03754a2 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -351,6 +351,7 @@ public:
fs_reg *emit_general_interpolation(ir_variable *ir);
void emit_interpolation_setup_gen4();
void emit_interpolation_setup_gen6();
+ bool emit_texture_external(ir_texture *ir, fs_reg& coordinate, int sampler);
fs_reg rescale_texcoord(ir_texture *ir, fs_reg coordinate,
bool is_rect, int sampler, int texunit);
fs_inst *emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
diff --git a/src/mesa/drivers/dri/i965/brw_fs_ext_texture.cpp b/src/mesa/drivers/dri/i965/brw_fs_ext_texture.cpp
new file mode 100644
index 0000000..602c41f
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_fs_ext_texture.cpp
@@ -0,0 +1,274 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Topi Pohjolainen <topi.pohjolainen at intel.com>
+ *
+ */
+
+/** @file brw_fs_ext_texture.cpp
+ *
+ * This file implements the generation of FS LIR for converting YUV samples into
+ * RGB as part of single GLSL texture sampling instruction. The implementation
+ * follows the one found in "brw_wm_fp.c::precalc_tex()".
+ *
+ * B = 1.164(Y - 16) + 2.018(U - 128)
+ * G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128)
+ * R = 1.164(Y - 16) + 1.596(V - 128)
+ *
+ * Implementation in 'brw_wm_fp.c':
+ *
+ * Need to emit YUV texture conversions by hand. Probably need to do this here
+ * - the alternative is in execution unit emisson time , but the conversion
+ * requires allocating a temporary variable which one doesn't have the facility
+ * to do that late in the compilation.
+ *
+ * CONST C0 = { -.0625, -.5, -.5, 1.164 }
+ * CONST C1 = { 1.596, -0.813, 2.018, -.391 }
+ * YUV = TEX ...
+ * YUV.xyz = ADD YUV, C0
+ * YUV.x = MUL YUV.x, C0.w
+ * RGB.xyz = MAD YUV.zzy, C1, YUV.x
+ * RGB.y = MAD YUV.y, C1.w, RGB.y
+ */
+extern "C" {
+
+#include <sys/types.h>
+
+#include "main/macros.h"
+#include "brw_context.h"
+}
+#include "brw_shader.h"
+#include "brw_fs.h"
+
+struct fs_reg_const_float4 : public fs_reg {
+ fs_reg_const_float4(fs_visitor *v, float f1, float f2, float f3, float f4) :
+ fs_reg(v, glsl_type::vec4_type)
+ {
+ fs_reg tmp = *this;
+
+ v->emit(BRW_OPCODE_MOV, tmp, fs_reg(f1));
+ ++tmp.reg_offset;
+ v->emit(BRW_OPCODE_MOV, tmp, fs_reg(f2));
+ ++tmp.reg_offset;
+ v->emit(BRW_OPCODE_MOV, tmp, fs_reg(f3));
+ ++tmp.reg_offset;
+ v->emit(BRW_OPCODE_MOV, tmp, fs_reg(f4));
+ }
+};
+
+static fs_reg
+add_mul_yuv_c0(fs_visitor *v, const fs_reg& yuv)
+{
+ /**
+ * Whereas original uses 16/256 = .0625, 128/256 = .5, here one uses the
+ * divider of 255 corresponding to the range [0,255]. The multiplier of 1.164
+ * in turn corresponds to treating Y-components being in the range of
+ * [16,235] (219 steps).
+ */
+ fs_reg_const_float4 c0(v, -16.0f/255, -128.0f/255, -128.0f/255, 255.0/219);
+ fs_reg res(v, glsl_type::vec4_type);
+ fs_reg tmp_res(res);
+ fs_reg tmp_yuv(yuv);
+
+ /* YUV.xyz = ADD YUV, C0 */
+ for (unsigned i = 0; i < 3; ++i) {
+ v->emit(BRW_OPCODE_ADD, tmp_res, tmp_yuv, c0);
+ tmp_yuv.reg_offset++;
+ c0.reg_offset++;
+ tmp_res.reg_offset++;
+ }
+
+ /* YUV.x = MUL YUV.x, C0.w */
+ tmp_res.reg_offset = 0;
+ v->emit(BRW_OPCODE_MUL, tmp_res, tmp_res, c0);
+
+ return res;
+}
+
+static fs_reg
+mad_yuv_c1(fs_visitor *v, const fs_reg& yuv)
+{
+ /**
+ * The multipliers are result of treating V- and U-components having range
+ * of [16,240] (224 steps).
+ */
+ fs_reg_const_float4 c1(v, 1.402 * 255.0 / 224, -0.714 * 255.0 / 224,
+ 1.772 * 255.0 / 224, -0.344 * 255.0 / 224);
+ fs_reg res(v, glsl_type::vec4_type);
+ fs_reg tmp_res(res);
+ fs_reg tmp_uv(yuv);
+ fs_reg tmp_y(yuv);
+
+ /* RGB.x = MAD YUV.x, C1.x, YUV.z */
+ tmp_uv.reg_offset = 2;
+ v->emit(BRW_OPCODE_MAD, tmp_res, tmp_y, c1, tmp_uv);
+ c1.reg_offset++;
+ tmp_res.reg_offset++;
+
+ /* RGB.y = MAD YUV.x, C1.y, YUV.z */
+ v->emit(BRW_OPCODE_MAD, tmp_res, tmp_y, c1, tmp_uv);
+ c1.reg_offset++;
+ tmp_res.reg_offset++;
+
+ /* RGB.z = MAD YUV.x, C1.z, YUV.y */
+ tmp_uv.reg_offset = 1;
+ v->emit(BRW_OPCODE_MAD, tmp_res, tmp_y, c1, tmp_uv);
+ c1.reg_offset++;
+
+ /* RGB.y = MAD RGB.y, C1.w, YUV.y */
+ tmp_res.reg_offset = 1;
+ v->emit(BRW_OPCODE_MAD, tmp_res, tmp_res, c1, tmp_uv);
+
+ /* Specification says that alpha is to be set to 255 if not available */
+ tmp_res.reg_offset = 3;
+ v->emit(BRW_OPCODE_MOV, tmp_res, fs_reg(1.0f));
+
+ return res;
+}
+
+static const struct intel_texture_image *
+resolve_tex_img(const struct gl_context *ctx, const GLbitfield *used, int unit)
+{
+ if (!(used[unit] & (1 << TEXTURE_EXTERNAL_INDEX)))
+ return 0;
+
+ const struct gl_texture_object *obj =
+ ctx->Texture.Unit[unit].CurrentTex[TEXTURE_EXTERNAL_INDEX];
+
+ if (!obj)
+ return 0;
+
+ return (const struct intel_texture_image *)obj->Image[0][obj->BaseLevel];
+}
+
+static fs_inst *
+emit_texture_gen(fs_visitor *visitor, ir_texture *ir, int gen,
+ fs_reg& dst, fs_reg& coord, int sampler,
+ int surf_index_offset)
+{
+ fs_reg shadow_c;
+ fs_reg lod;
+ fs_reg lod2;
+ fs_inst *inst;
+
+ if (gen >= 7) {
+ inst = visitor->emit_texture_gen7(ir, dst, coord, shadow_c, lod, lod2);
+ } else if (gen >= 5) {
+ inst = visitor->emit_texture_gen5(ir, dst, coord, shadow_c, lod, lod2);
+ } else {
+ inst = visitor->emit_texture_gen4(ir, dst, coord, shadow_c, lod, lod2);
+ }
+
+ /* The header is set up by generate_tex() when necessary. */
+ inst->src[0] = reg_undef;
+ inst->sampler = sampler;
+ inst->surf_index_offset = surf_index_offset;
+
+ return inst;
+}
+
+static void
+yuv420_chroma(fs_visitor *v, ir_texture *ir, int gen, fs_reg& yuv,
+ fs_reg& coord, int sampler,
+ unsigned chroma_src_i, unsigned chroma_dst_i)
+{
+ fs_reg tmp_yuv_dst(yuv);
+ fs_reg c = fs_reg(v, glsl_type::vec4_type);
+
+ emit_texture_gen(v, ir, gen, c, coord, sampler, chroma_src_i);
+
+ tmp_yuv_dst.reg_offset += chroma_dst_i;
+ v->emit(BRW_OPCODE_MOV, tmp_yuv_dst, c);
+}
+
+static void
+nv12_chroma(fs_visitor *v, ir_texture *ir, int gen, fs_reg& yuv,
+ fs_reg& coord, int sampler)
+{
+ fs_reg tmp_yuv_dst(yuv);
+ fs_reg uv = fs_reg(v, glsl_type::vec4_type);
+
+ emit_texture_gen(v, ir, gen, uv, coord, sampler, 1);
+
+ tmp_yuv_dst.reg_offset++;
+ v->emit(BRW_OPCODE_MOV, tmp_yuv_dst, uv);
+
+ tmp_yuv_dst.reg_offset++;
+ uv.reg_offset++;
+ v->emit(BRW_OPCODE_MOV, tmp_yuv_dst, uv);
+}
+
+/* Consider the YUV format in question and fetch the different components from
+ * single plane in case of packed formats or from separate planes in case of
+ * planar formats.
+ * The components are then packed into one register allowing one to use the same
+ * conversion logic independent on the component layout in the texture being
+ * sampled. Naturally one could write texture format aware converters and
+ * possibly improve the performance by reducing the amount of instructions. Here
+ * things are, however, rather kept simple.
+ */
+bool
+fs_visitor::emit_texture_external(ir_texture *ir, fs_reg& coordinate,
+ int sampler)
+{
+ const struct intel_texture_image *img = resolve_tex_img(ctx,
+ fp->Base.TexturesUsed,
+ fp->Base.SamplerUnits[sampler]);
+
+ if (img && !img->ext_format)
+ return false;
+
+ /**
+ * "Sampling an external texture which is not associated with any EGLImage
+ * sibling will return a sample value of (0,0,0,1)."
+ */
+ if (!img) {
+ swizzle_result(ir, fs_reg_const_float4(this, 0, 0, 0, 1), sampler);
+ return true;
+ }
+
+ fs_reg yuv = fs_reg(this, glsl_type::vec4_type);
+ emit_texture_gen(this, ir, intel->gen, yuv, coordinate, sampler, 0);
+
+ switch (img->ext_format->fourcc) {
+ case __DRI_IMAGE_FOURCC_YUV420:
+ yuv420_chroma(this, ir, intel->gen, yuv, coordinate, sampler, 1, 1);
+ yuv420_chroma(this, ir, intel->gen, yuv, coordinate, sampler, 2, 2);
+ break;
+ case __DRI_IMAGE_FOURCC_YVU420:
+ yuv420_chroma(this, ir, intel->gen, yuv, coordinate, sampler, 1, 2);
+ yuv420_chroma(this, ir, intel->gen, yuv, coordinate, sampler, 2, 1);
+ break;
+ case __DRI_IMAGE_FOURCC_NV12:
+ nv12_chroma(this, ir, intel->gen, yuv, coordinate, sampler);
+ break;
+ default:
+ return false;
+ }
+
+ fs_reg rgb = mad_yuv_c1(this, add_mul_yuv_c0(this, yuv));
+
+ swizzle_result(ir, rgb, sampler);
+
+ return true;
+}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 573921c..0ab274e 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -1316,6 +1316,11 @@ fs_visitor::visit(ir_texture *ir)
sampler, texunit);
}
+ if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_EXTERNAL &&
+ emit_texture_external(ir, coordinate, sampler)) {
+ return;
+ }
+
fs_reg shadow_comparitor;
if (ir->shadow_comparitor) {
ir->shadow_comparitor->accept(this);
--
1.7.9.5
More information about the mesa-dev
mailing list