[Mesa-dev] [PATCH 18/22] i965/fs/gen6: Support for sampling stencil with non-msaa coordinates
Topi Pohjolainen
topi.pohjolainen at intel.com
Mon Jun 9 00:45:52 PDT 2014
Signed-off-by: Topi Pohjolainen <topi.pohjolainen at intel.com>
---
src/mesa/drivers/dri/i965/Makefile.sources | 1 +
src/mesa/drivers/dri/i965/brw_fs.h | 1 +
src/mesa/drivers/dri/i965/brw_fs_stencil_tex.cpp | 411 +++++++++++++++++++++++
src/mesa/drivers/dri/i965/brw_fs_stencil_tex.h | 74 ++++
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 11 +
5 files changed, 498 insertions(+)
create mode 100644 src/mesa/drivers/dri/i965/brw_fs_stencil_tex.cpp
create mode 100644 src/mesa/drivers/dri/i965/brw_fs_stencil_tex.h
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index d43fc8e..179ea67 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -68,6 +68,7 @@ i965_FILES = \
brw_fs_saturate_propagation.cpp \
brw_fs_sel_peephole.cpp \
brw_fs_vector_splitting.cpp \
+ brw_fs_stencil_tex.cpp \
brw_fs_visitor.cpp \
brw_fs_emitter.cpp \
brw_gs.c \
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 55877c1..e1f540d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -52,6 +52,7 @@ extern "C" {
#include "glsl/glsl_types.h"
#include "glsl/ir.h"
#include "brw_fs_emit.h"
+#include "brw_fs_stencil_tex.h"
#define MAX_SAMPLER_MESSAGE_SIZE 11
diff --git a/src/mesa/drivers/dri/i965/brw_fs_stencil_tex.cpp b/src/mesa/drivers/dri/i965/brw_fs_stencil_tex.cpp
new file mode 100644
index 0000000..2d813e9
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_fs_stencil_tex.cpp
@@ -0,0 +1,411 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE e->AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/** @file brw_fs_stencil_tex.cpp
+ *
+ * Support for coordinate translations needed for sampling stencil buffers.
+ * Stencil buffers are W-tiled which is unfortunately not understood by the
+ * sampling engine. The surface is setup as Y-tiled instead and the fragment
+ * program is augmented with instructions translating the coordinates into
+ * equivalent in Y-tiled space.
+ *
+ * In W-tiling four 4x4 blocks form an 8x8 block. These 8x8 blocks are laid
+ * out in column major order as follows:
+ *
+ * 0 1 7
+ * +-------------+-------------+-----+---------------+
+ * 0 | 0, 0 | 1, 0 | 2, 0 | 3, 0 | ... | 14, 0 | 15, 0 |
+ * | 0, 1 | 1, 1 | 2, 1 | 3, 1 | ... | 14, 1 | 15, 1 |
+ * + ... +
+ * 7 | 0,14 | 1,14 | 2,14 | 3,14 | ... | 14,14 | 15,14 |
+ * | 0,15 | 1,15 | 2,15 | 3,15 | ... | 14,15 | 15,15 |
+ * +-------------------------------------------------+
+ * W-tile (8x8 16x16 blocks)
+ *
+ * In Y-tiling the 16x1 sub-tiles are also laid out in column major order,
+ * and the 4x4 sub-tiles (designated by their w-tiled coordnates) can be seen
+ * to be located in the Y-tiled layout as follows:
+ *
+ * ------------------------------------
+ * 0 | 0, 0 | 2, 0 | 4, 0 | ... | 14, 0 |
+ * 1 | 1, 0 | 3, 0 | 5, 0 | ... | 15, 0 |
+ * 2 | 0, 1 | 2, 1 | 4, 1 | ... | 14, 1 |
+ * 3 | 1, 1 | 3, 1 | 5, 1 | ... | 15, 1 |
+ * | ... |
+ * 30 | 0,15 | 2,15 | 4,15 | ... | 14,15 |
+ * 31 | 1,15 | 3,15 | 5,15 | ... | 15,15 |
+ * ------------------------------------
+ * Y-tile (8x32 16x1 blocks)
+ */
+#include "brw_fs_stencil_tex.h"
+#include "brw_defines.h"
+#include "intel_mipmap_tree.h"
+#include "program/hash_table.h"
+
+namespace {
+
+/**
+ * Emit translation of pixel coordinates src_x and src_y in W-tiled layout
+ * to corresponding coordinates dst_x and dst_y in Y-tiled layout.
+ * Note that source and destination registers cannot overlap.
+ *
+ * Both W-tiling and Y-tiling have equal tile size of one page. The difference
+ * is in how pixels are organised within the page: W-tile has 64 rows each
+ * holding in turn two 32 byte sub-tiles whereas Y-tile has 32 rows each
+ * holding eight 16 byte sub-tiles. The sub-tiles in turn have different
+ * layout: W is 8x4 bytes and Y is 16x1. Now, in Y-tiling two subsequent tiles
+ * are on top of each other. If each pair is thought to form one tile instead
+ * one can think Y-tiling to consist of 16 rows and eight columns of 32-byte
+ * subtiles.
+ *
+ * This organisation is independent of the pixel format used and
+ * hence the number of pixels within a tile varies based on how many bytes
+ * per pixel are needed.
+ *
+ * The operation here is fixed to one-byte-per-pixel formats only - it
+ * assumes that Y-subtile holds 16 pixels per row (and W 8 respectively).
+ *
+ * First examine the X coordinate representing an address using W-tiling.
+ * The lowest six bits represent a column within a tile while the higher bits
+ * designate a tile number horizontally.
+ * As a Y-tile can hold twice as many pixels horizontally than W-tile, the
+ * tile number needs to be multiplied by two in order to move to the desired
+ * tile horizontally:
+ *
+ * (X & ~0b111) << 1 == (X & 0xFFF8) << 1 (1)
+ *
+ * The lowest six can be further divided in two parts - the subtile number
+ * and then the remaining coordinate within the subtile. These are three and
+ * three respectively for W-tiling. Then unlike Y-subtile W is further
+ * divided into 4x4 and again into 2x2 tiles. Hence the third lowest bit
+ * represents the 4x4-subtile number, the second lowest the 2x2-subtile
+ * number and finally the lowest the offset within the 2x2 block.
+ *
+ * 01 23 45 67 0123456789ABCDEF W Y
+ * ++==+==++==+==++ +---------------+ a: 3,1 7,0
+ * 0 || | || | || 0 | a | b: 1,2 1,1
+ * 1 || | a|| | || 1 | b c | c: 5,3 10,1
+ * ++--+--++--+--++ +---------------+
+ * 2 || b| || | ||
+ * 3 || | ||c | ||
+ * ++==+==++==+==++
+ *
+ * Observing the W layout it can be seen that x-coordinates creater or equal
+ * to four reside on the second half of the subtile - in Y-tile this
+ * corresponds to the second row calling for the following compensation in
+ * the vertical coordinate:
+ *
+ * (X & 0b100) >> 2 == (X & 0x4) >> 2 (2)
+ *
+ * The 2x2 subtile in turn results into two x-coordinates x and x + 2 in the
+ * same row (in the same 8x4 subtile) to be 4 bytes apart in linear memory.
+ * As addresses in Y-subtile itself are linear, the compensation in the
+ * horizontal coordinate is:
+ *
+ * (X & 0b01) << 1 (3)
+ *
+ * Combined with (1):
+ *
+ * (X & ~0b101) << 1 == (X & 0xFFFA) << 1 (4)
+ *
+ * Similarly in 4x4 W-subtile in the same column any two y-coordinates y and
+ * y + 2 are 8 bytes apart in linear memory addresses. In 2x2-subtile in turn
+ * y and y + 1 are two bytes apart. This results into horizontal compensation
+ * in Y layout:
+ *
+ * (Y & 0b10) << 2 | (Y & 0b1) << 1 (5)
+ *
+ * Taking into account (4) and (5), one gets:
+ *
+ * X' = (X & ~0b101) << 1 | (Y & 0b10) << 2 | (Y & 0b1) << 1 | X & 0b1
+ *
+ * For y-coordinate one needs to consider full tiles and (2). As Y-layout
+ * has twice as many tiles as W horizontally, the number of tiles in vertical
+ * direction needs to be divided by two. Every two tiles on top each other
+ * in W-layout are "re-layouted" horiontally.
+ *
+ * Y' = (Y & ~0b11) >> 1 | (X & 0b100) >> 2
+ */
+static void
+emit_translate_w_to_y_tiling(fs_emitter *e, const fs_reg& coord)
+{
+ fs_reg src_x(coord);
+ fs_reg src_y(offset(coord, 1));
+ fs_reg dst_x(e, glsl_type::uint_type);
+ fs_reg dst_y(e, glsl_type::uint_type);
+ fs_reg t1(e, glsl_type::uint_type);
+ fs_reg t2(e, glsl_type::uint_type);
+
+ e->emit(e->AND(t1, src_x, fs_reg(0xfffa))); /* X & ~0b101 */
+ e->emit(e->SHL(t1, t1, fs_reg(1))); /* (X & ~0b101) << 1 */
+ e->emit(e->AND(t2, src_y, fs_reg(2))); /* Y & 0b10 */
+ e->emit(e->SHL(t2, t2, fs_reg(2))); /* (Y & 0b10) << 2 */
+ e->emit(e->OR(t1, t1, t2)); /* (X & ~0b101) << 1 | (Y & 0b10) << 2 */
+ e->emit(e->AND(t2, src_y, fs_reg(1))); /* Y & 0b1 */
+ e->emit(e->SHL(t2, t2, fs_reg(1))); /* (Y & 0b1) << 1 */
+ e->emit(e->OR(t1, t1, t2)); /* (X & ~0b101) << 1 | (Y & 0b10) << 2
+ | (Y & 0b1) << 1 */
+ e->emit(e->AND(t2, src_x, fs_reg(1))); /* X & 0b1 */
+ e->emit(e->OR(dst_x, t1, t2));
+ e->emit(e->AND(t1, src_y, fs_reg(0xfffc))); /* Y & ~0b11 */
+ e->emit(e->SHR(t1, t1, fs_reg(1))); /* (Y & ~0b11) >> 1 */
+ e->emit(e->AND(t2, src_x, fs_reg(4))); /* X & 0b100 */
+ e->emit(e->SHR(t2, t2, fs_reg(2))); /* (X & 0b100) >> 2 */
+ e->emit(e->OR(dst_y, t1, t2));
+
+ /* Finally write the translated over the original. */
+ e->emit(e->MOV(coord, dst_x));
+ e->emit(e->MOV(offset(coord, 1), dst_y));
+}
+
+/**
+ * All the miptrees have the same "below" layout where both levels one and two
+ * are just below level zero. From there on level three is just below level
+ * two, level four just below level three and so on.
+ * The operation requires one temporary register.
+ *
+ * The heights of the levels of a miptree form a geometric sequence:
+ *
+ * h/1 + h/2 + h/4 + ... + h/2^n
+ * = h * 0.5^0 + h * 0.5^1 + h * 0.5^2 + ... + h * 0.5^(n - 1)
+ * = h * (1 - 0.5^n) / (1 - 0.5)
+ * = h * 2 * (1 - 0.5^n)
+ *
+ * Unfortunately individual levels are further aligned by four (< gen7) and
+ * by eight on later hardware preventing the use of the simple formula for the
+ * sum. Instead the implementation calculates the sum by iterating over the
+ * given 'n'.
+ */
+static void
+emit_calc_level_y_offset(fs_emitter *e, const fs_reg& height,
+ const fs_reg& level, const fs_reg& y)
+{
+ const unsigned align_h = e->brw->gen == 6 ? 4 : 8;
+ const fs_reg align_h_mask(0xffff - (align_h - 1));
+ fs_reg curr_h(e, glsl_type::uint_type);
+ fs_reg tmp(e, glsl_type::uint_type);
+
+ /* Consider levels 0-2. Levels one and two are just below level zero. */
+ fs_inst *inst = e->emit(BRW_OPCODE_CMP, reg_null_d, level, fs_reg(0));
+ inst->conditional_mod = BRW_CONDITIONAL_G;
+ e->emit(BRW_OPCODE_IF);
+ e->emit(e->ADD(y, height, fs_reg(align_h - 1)));
+ e->emit(e->AND(y, y, align_h_mask)); /* y = ALIGN(height, align_h) */
+ e->emit(BRW_OPCODE_ELSE);
+ e->emit(e->MOV(y, fs_reg(0)));
+ e->emit(BRW_OPCODE_ENDIF);
+
+ /* Iterate over levels greater than two. */
+ e->emit(e->SHR(curr_h, height, fs_reg(2)));
+ e->emit(BRW_OPCODE_DO);
+ inst = e->emit(BRW_OPCODE_CMP, reg_null_ud, level, fs_reg(2));
+ inst->conditional_mod = BRW_CONDITIONAL_LE;
+ inst = e->emit(BRW_OPCODE_BREAK);
+ inst->predicate = BRW_PREDICATE_NORMAL;
+
+ e->emit(e->ADD(tmp, curr_h, fs_reg(align_h - 1)));
+ e->emit(e->AND(tmp, tmp, align_h_mask));
+ e->emit(e->ADD(y, y, tmp)); /* y = y + ALIGN(curr_h, align_h) */
+
+ e->emit(e->SHR(curr_h, curr_h, fs_reg(1)));
+ e->emit(e->ADD(level, level, fs_reg(-1)));
+ e->emit(BRW_OPCODE_WHILE);
+}
+
+/**
+ * The x-coordinate is zero for levels zero and one. For the rest it is fixed
+ * to the half of the base width - levels starting from two are just right to
+ * the level one (which has half the width of level zero).
+ */
+static void
+emit_calc_level_x_offset(fs_emitter *e, const fs_reg& width,
+ const fs_reg& level, const fs_reg& x)
+{
+ static const unsigned align_w = 8;
+ fs_inst *inst;
+
+ inst = e->emit(BRW_OPCODE_CMP, reg_null_f, level, fs_reg(2));
+ inst->conditional_mod = BRW_CONDITIONAL_GE;
+ e->emit(BRW_OPCODE_IF);
+ e->emit(e->SHR(x, width, fs_reg(1))); /* w / 2 */
+ e->emit(e->ADD(x, x, fs_reg(align_w - 1))); /* (w / 2) + (align_w - 1) */
+ e->emit(e->AND(x, x, fs_reg(0xfff8))); /* ALIGN(w / 2, align_w) */
+ e->emit(BRW_OPCODE_ELSE);
+ inst = e->emit(e->MOV(x, fs_reg(0)));
+ e->emit(BRW_OPCODE_ENDIF);
+}
+
+static void
+update_tex_base_dimensions(struct gl_context *ctx,
+ struct brw_fragment_program *fp, unsigned s)
+{
+ const struct gl_program *prog = &fp->program.Base;
+
+ if (!(prog->SamplersUsed & (1 << s)))
+ return;
+
+ const unsigned unit = prog->SamplerUnits[s];
+ struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
+ struct intel_texture_object *intelObj = intel_texture_object(tObj);
+ const struct intel_mipmap_tree *mt = intelObj->mt;
+
+ fp->tex_base_dimensions.base_width[s] = mt->logical_width0;
+ fp->tex_base_dimensions.base_height[s] = mt->logical_height0;
+}
+
+} /* empty namespace */
+
+fs_stencil_texturing::fs_stencil_texturing(fs_emitter *e,
+ unsigned base_level,
+ unsigned num_samples,
+ int sampler,
+ fs_reg *coord)
+ : e(e),
+ base_level(base_level),
+ num_samples(num_samples),
+ sampler(sampler),
+ coord(coord)
+{
+}
+
+void
+fs_stencil_texturing::offset_to_w_tiled_miplevel(const fs_reg& lod)
+{
+ fs_inst *inst;
+ fs_reg off(e, glsl_type::uint_type);
+
+ inst = e->emit(BRW_OPCODE_CMP, reg_null_ud, lod, fs_reg(0));
+ inst->conditional_mod = BRW_CONDITIONAL_G;
+ e->emit(BRW_OPCODE_IF);
+ emit_calc_level_y_offset(e, *base_h, lod, off);
+ e->emit(e->ADD(offset(*coord, 1), offset(*coord, 1), off));
+ emit_calc_level_x_offset(e, *base_w, lod, off);
+ e->emit(e->ADD(*coord, *coord, off));
+ e->emit(BRW_OPCODE_ENDIF);
+}
+
+/**
+ * In order to access individual mip-levels the program needs to know the
+ * unaligned original base level dimensions. Surface is configured for full
+ * slice (i.e., full miptree) dimensios and the program accesses a particular
+ * level manually by modifying the texel coordinates given to the sampling
+ * engine.
+ * These dimensions are supplied to the program in builtin uniforms which
+ * are maintained here.
+ */
+void
+fs_stencil_texturing::setup_base_level(struct brw_fragment_program *fp,
+ struct brw_stage_prog_data *prog_data)
+{
+ if (e->dispatch_width == 8) {
+ base_w = new(e->mem_ctx)
+ fs_reg(UNIFORM, e->uniforms, BRW_REGISTER_TYPE_UD);
+ base_h = new(e->mem_ctx)
+ fs_reg(UNIFORM, e->uniforms + 1, BRW_REGISTER_TYPE_UD);
+
+ /* Set uniform source locations for 3d-state setup. */
+ prog_data->param[e->uniforms++] =
+ (const float *)&fp->tex_base_dimensions.base_width[sampler];
+ prog_data->param[e->uniforms++] =
+ (const float *)&fp->tex_base_dimensions.base_height[sampler];
+
+ update_tex_base_dimensions(&e->brw->ctx, fp, sampler);
+
+ /* Rest of the fragment compiler uses pointers of ir_variable as keys
+ * but any unique pointer in fact will do. Here will use pointers to the
+ * storage of the values.
+ */
+ hash_table_insert(e->variable_ht, base_w,
+ &fp->tex_base_dimensions.base_width[sampler]);
+ hash_table_insert(e->variable_ht, base_h,
+ &fp->tex_base_dimensions.base_height[sampler]);
+ } else {
+ base_w = (fs_reg *)hash_table_find(
+ e->variable_ht,
+ &fp->tex_base_dimensions.base_width[sampler]);
+ base_h = (fs_reg *)hash_table_find(
+ e->variable_ht,
+ &fp->tex_base_dimensions.base_height[sampler]);
+ }
+}
+
+void
+fs_stencil_texturing::emit_w_to_y_tiling(struct brw_fragment_program *fp,
+ struct brw_stage_prog_data *prog_data,
+ enum ir_texture_opcode op,
+ const fs_reg& lod,
+ const fs_reg& sample_index)
+{
+ fs_reg lod_ud;
+
+ if (op != ir_txf_ms) {
+ setup_base_level(fp, prog_data);
+
+ /* Adjust level of detail to take into account texture setting. */
+ lod_ud = fs_reg(e, glsl_type::uint_type);
+ e->emit(e->MOV(lod_ud, lod));
+ e->emit(e->ADD(lod_ud, lod_ud, fs_reg(base_level)));
+ }
+
+ /* Surface is sampled as 2x2 blocks. The coordinates will modified
+ * accordingly and the lowest bits designating the inidividual sample/pixel
+ * need to be saved for final pixel selection.
+ */
+ x_lowest_bit = fs_reg(e, glsl_type::uint_type);
+ y_lowest_bit = fs_reg(e, glsl_type::uint_type);
+ e->emit(e->AND(x_lowest_bit, *coord, fs_reg(0x1)));
+ e->emit(e->AND(y_lowest_bit, offset(*coord, 1), fs_reg(0x1)));
+
+ if (op == ir_txf_ms) {
+ assert(!"Multisampled stencil texturing is not supported");
+ } else {
+ offset_to_w_tiled_miplevel(lod_ud);
+
+ /* Point the sampling engine to the beginning of the buffer. */
+ e->emit(e->MOV(lod, fs_reg(0)));
+ }
+
+ emit_translate_w_to_y_tiling(e, *coord);
+
+ /* Modify the pixel coordinates to point to the 2x2 block. */
+ e->emit(e->SHR(*coord, *coord, 2));
+}
+
+void
+fs_stencil_texturing::emit_pick_w_tiled_sample(const fs_reg& samples,
+ const fs_reg& sample_index)
+{
+ fs_inst *inst;
+
+ inst = e->emit(BRW_OPCODE_CMP, reg_null_f, y_lowest_bit, fs_reg(0));
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ inst = e->emit(e->SHR(samples, samples, fs_reg(16)));
+ inst->predicate = BRW_PREDICATE_NORMAL;
+
+ inst = e->emit(BRW_OPCODE_CMP, reg_null_f, x_lowest_bit, fs_reg(0));
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ inst = e->emit(e->SHR(samples, samples, fs_reg(8)));
+ inst->predicate = BRW_PREDICATE_NORMAL;
+
+ e->emit(e->AND(samples, samples, fs_reg(0xff)));
+}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_stencil_tex.h b/src/mesa/drivers/dri/i965/brw_fs_stencil_tex.h
new file mode 100644
index 0000000..5c7c42f
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_fs_stencil_tex.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef BRW_FS_STENCIL_TEX_H
+#define BRW_FS_STENCIL_TEX_H
+
+#include "brw_fs_emit.h"
+
+class fs_stencil_texturing {
+public:
+ fs_stencil_texturing(fs_emitter *e,
+ unsigned base_level,
+ unsigned num_samples,
+ int sampler,
+ fs_reg *coord);
+
+ void emit_w_to_y_tiling(struct brw_fragment_program *fp,
+ struct brw_stage_prog_data *prog_data,
+ enum ir_texture_opcode op,
+ const fs_reg& lod, const fs_reg& sample_index);
+
+ void emit_pick_w_tiled_sample(const fs_reg& samples,
+ const fs_reg& sample_index);
+
+private:
+ void setup_base_level(struct brw_fragment_program *fp,
+ struct brw_stage_prog_data *prog_data);
+ void offset_to_w_tiled_miplevel(const fs_reg& lod);
+
+ fs_emitter *e;
+ const unsigned base_level;
+ const unsigned num_samples;
+ const int sampler;
+ fs_reg *coord;
+
+ /**
+ * Registers holding the dimensions of the base level of the texture being
+ * sampled. These are needed to resolve mip-level offsets and clamping
+ * coordinates.
+ */
+ fs_reg *base_w;
+ fs_reg *base_h;
+
+ /**
+ * Registers holding the least significant bits of the pixel coordinates.
+ * These are stored before coordinates are manipulated to designate 2x2
+ * blocks instead of individual pixels and used for the final sample
+ * selection.
+ */
+ fs_reg x_lowest_bit;
+ fs_reg y_lowest_bit;
+};
+
+#endif /* BRW_FS_STENCIL_TEX_H */
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 2d9f421..f6bb010 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -1694,6 +1694,14 @@ fs_visitor::visit(ir_texture *ir)
assert(!"Unrecognized texture opcode");
};
+ fs_stencil_texturing stencil_tex(this,
+ key->tex.w_tiled_base_level[sampler],
+ key->tex.num_w_tiled_samples[sampler],
+ sampler, &coordinate);
+ if (key->tex.num_w_tiled_samples[sampler])
+ stencil_tex.emit_w_to_y_tiling(brw_fragment_program(fp), stage_prog_data,
+ ir->op, lod, sample_index);
+
/* Writemasking doesn't eliminate channels on SIMD8 texture
* samples, so don't worry about them.
*/
@@ -1736,6 +1744,9 @@ fs_visitor::visit(ir_texture *ir)
emit_gen6_gather_wa(key->tex.gen6_gather_wa[sampler], dst);
}
+ if (key->tex.num_w_tiled_samples[sampler])
+ stencil_tex.emit_pick_w_tiled_sample(dst, sample_index);
+
swizzle_result(ir, dst, sampler);
}
--
1.8.3.1
More information about the mesa-dev
mailing list