[Mesa-dev] [RFC 21/27] i965/blorp: Refactor w-tiling to y-tiling translation
Topi Pohjolainen
topi.pohjolainen at intel.com
Sat Feb 22 01:05:47 PST 2014
Signed-off-by: Topi Pohjolainen <topi.pohjolainen at intel.com>
---
src/mesa/drivers/dri/i965/brw_blorp_blit.cpp | 23 +-----
src/mesa/drivers/dri/i965/brw_fs.h | 7 ++
src/mesa/drivers/dri/i965/brw_fs_emitter.cpp | 110 +++++++++++++++++++++++++++
3 files changed, 118 insertions(+), 22 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index 9745c28..d8dc49b 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -1177,28 +1177,7 @@ brw_blorp_blit_program::translate_tiling(bool old_tiled_w, bool new_tiled_w)
emit(OR(Yp, t1, t2));
SWAP_XY_AND_XPYP();
} else {
- /* Applying the same logic as above, but in reverse, we obtain the
- * formulas:
- *
- * X' = (X & ~0b101) << 1 | (Y & 0b10) << 2 | (Y & 0b1) << 1 | X & 0b1
- * Y' = (Y & ~0b11) >> 1 | (X & 0b100) >> 2
- */
- emit(AND(t1, X, brw_imm_uw(0xfffa))); /* X & ~0b101 */
- emit(SHL(t1, t1, brw_imm_uw(1))); /* (X & ~0b101) << 1 */
- emit(AND(t2, Y, brw_imm_uw(2))); /* Y & 0b10 */
- emit(SHL(t2, t2, brw_imm_uw(2))); /* (Y & 0b10) << 2 */
- emit(OR(t1, t1, t2)); /* (X & ~0b101) << 1 | (Y & 0b10) << 2 */
- emit(AND(t2, Y, brw_imm_uw(1))); /* Y & 0b1 */
- emit(SHL(t2, t2, brw_imm_uw(1))); /* (Y & 0b1) << 1 */
- emit(OR(t1, t1, t2)); /* (X & ~0b101) << 1 | (Y & 0b10) << 2
- | (Y & 0b1) << 1 */
- emit(AND(t2, X, brw_imm_uw(1))); /* X & 0b1 */
- emit(OR(Xp, t1, t2));
- emit(AND(t1, Y, brw_imm_uw(0xfffc))); /* Y & ~0b11 */
- emit(SHR(t1, t1, brw_imm_uw(1))); /* (Y & ~0b11) >> 1 */
- emit(AND(t2, X, brw_imm_uw(4))); /* X & 0b100 */
- emit(SHR(t2, t2, brw_imm_uw(2))); /* (X & 0b100) >> 2 */
- emit(OR(Yp, t1, t2));
+ emit_translate_w_to_y_tiling(t1, t2, X, Y, Xp, Yp);
SWAP_XY_AND_XPYP();
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 5b0687e..e02c025 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -290,6 +290,13 @@ protected:
fs_inst *emit(fs_inst *inst);
void emit(exec_list list);
+ void emit_translate_w_to_y_tiling(const fs_reg& t1,
+ const fs_reg& t2,
+ const fs_reg& src_x,
+ const fs_reg& src_y,
+ const fs_reg& dst_x,
+ const fs_reg& dst_y);
+
void push_force_uncompressed();
void pop_force_uncompressed();
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emitter.cpp b/src/mesa/drivers/dri/i965/brw_fs_emitter.cpp
index 6f1e2dd..6ba6516 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emitter.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emitter.cpp
@@ -194,6 +194,116 @@ fs_emitter::pop_force_uncompressed()
assert(force_uncompressed_stack >= 0);
}
+/**
+ * Emit translation of pixel coordinates src_x and src_y in W-tiled layout
+ * to corresponding coordinates dst_x and dst_y in Y-tiled layout.
+ * The operation requires two temporary registers in addition to the source
+ * and destination. Note also that source and destination registers cannot
+ * overlap.
+ *
+ * Both W-tiling and Y-tiling have equal tile size of one page. The difference
+ * is in how pixels are organised within the page: W-tile has 64 rows each
+ * holding in turn two 32 byte sub-tiles whereas Y-tile has 32 rows each
+ * holding eight 16 byte sub-tiles. The sub-tiles in turn have different
+ * layout: W is 8x4 bytes and Y is 16x1. Now, in Y-tiling two subsequent tiles
+ * are on top of each other. If each pair is thought to form one tile instead
+ * one can think Y-tiling to consist of 16 rows and eight columns of 32-byte
+ * subtiles.
+ *
+ * This organisation is independent of the pixel format used and
+ * hence the number of pixels within a tile varies based on how many bytes
+ * per pixel are needed.
+ *
+ * The operation here is fixed to one-byte-per-pixel formats only - it
+ * assumes that Y-subtile holds 16 pixels per row (and W 8 respectively).
+ *
+ * First examine the X coordinate representing an address using W-tiling.
+ * The lowest six bits represent a column within a tile while the higher bits
+ * designate a tile number horizontally.
+ * As a Y-tile can hold twice as many pixels horizontally than W-tile, the
+ * tile number needs to be multiplied by two in order to move to the desired
+ * tile horizontally:
+ *
+ * (X & ~0b111) << 1 == (X & 0xFFF8) << 1 (1)
+ *
+ * The lowest six can be further divided in two parts - the subtile number
+ * and then the remaining coordinate within the subtile. These are three and
+ * three respectively for W-tiling. Then unlike Y-subtile W is further
+ * divided into 4x4 and again into 2x2 tiles. Hence the third lowest bit
+ * represents the 4x4-subtile number, the second lowest the 2x2-subtile
+ * number and finally the lowest the offset within the 2x2 block.
+ *
+ * 01 23 45 67 0123456789ABCDEF W Y
+ * ++==+==++==+==++ +---------------+ a: 3,1 7,0
+ * 0 || | || | || 0 | a | b: 1,2 1,1
+ * 1 || | a|| | || 1 | b c | c: 5,3 10,1
+ * ++--+--++--+--++ +---------------+
+ * 2 || b| || | ||
+ * 3 || | ||c | ||
+ * ++==+==++==+==++
+ *
+ * Observing the W layout it can be seen that x-coordinates creater or equal
+ * to four reside on the second half of the subtile - in Y-tile this
+ * corresponds to the second row calling for the following compensation in
+ * the vertical coordinate:
+ *
+ * (X & 0b100) >> 2 == (X & 0x4) >> 2 (2)
+ *
+ * The 2x2 subtile in turn results into two x-coordinates x and x + 2 in the
+ * same row (in the same 8x4 subtile) to be 4 bytes apart in linear memory.
+ * As addresses in Y-subtile itself are linear, the compensation in the
+ * horizontal coordinate is:
+ *
+ * (X & 0b01) << 1 (3)
+ *
+ * Combined with (1):
+ *
+ * (X & ~0b101) << 1 == (X & 0xFFFA) << 1 (4)
+ *
+ * Similarly in 4x4 W-subtile in the same column any two y-coordinates y and
+ * y + 2 are 8 bytes apart in linear memory addresses. In 2x2-subtile in turn
+ * y and y + 1 are two bytes apart. This results into horizontal compensation
+ * in Y layout:
+ *
+ * (Y & 0b10) << 2 | (Y & 0b1) << 1 (5)
+ *
+ * Taking into account (4) and (5), one gets:
+ *
+ * X' = (X & ~0b101) << 1 | (Y & 0b10) << 2 | (Y & 0b1) << 1 | X & 0b1
+ *
+ * For y-coordinate one needs to consider full tiles and (2). As Y-layout
+ * has twice as many tiles as W horizontally, the number of tiles in vertical
+ * direction needs to be divided by two. Every two tiles on top each other
+ * in W-layout are "re-layouted" horiontally.
+ *
+ * Y' = (Y & ~0b11) >> 1 | (X & 0b100) >> 2
+ */
+void
+fs_emitter::emit_translate_w_to_y_tiling(const fs_reg& t1,
+ const fs_reg& t2,
+ const fs_reg& src_x,
+ const fs_reg& src_y,
+ const fs_reg& dst_x,
+ const fs_reg& dst_y)
+{
+ emit(AND(t1, src_x, brw_imm_uw(0xfffa))); /* X & ~0b101 */
+ emit(SHL(t1, t1, brw_imm_uw(1))); /* (X & ~0b101) << 1 */
+ emit(AND(t2, src_y, brw_imm_uw(2))); /* Y & 0b10 */
+ emit(SHL(t2, t2, brw_imm_uw(2))); /* (Y & 0b10) << 2 */
+ emit(OR(t1, t1, t2)); /* (X & ~0b101) << 1 | (Y & 0b10) << 2 */
+ emit(AND(t2, src_y, brw_imm_uw(1))); /* Y & 0b1 */
+ emit(SHL(t2, t2, brw_imm_uw(1))); /* (Y & 0b1) << 1 */
+ emit(OR(t1, t1, t2)); /* (X & ~0b101) << 1 | (Y & 0b10) << 2
+ | (Y & 0b1) << 1 */
+ emit(AND(t2, src_x, brw_imm_uw(1))); /* X & 0b1 */
+ emit(OR(dst_x, t1, t2));
+ emit(AND(t1, src_y, brw_imm_uw(0xfffc))); /* Y & ~0b11 */
+ emit(SHR(t1, t1, brw_imm_uw(1))); /* (Y & ~0b11) >> 1 */
+ emit(AND(t2, src_x, brw_imm_uw(4))); /* X & 0b100 */
+ emit(SHR(t2, t2, brw_imm_uw(2))); /* (X & 0b100) >> 2 */
+ emit(OR(dst_y, t1, t2));
+}
+
fs_emitter::fs_emitter(struct brw_context *brw,
struct brw_wm_compile *_c,
unsigned _dispatch_width)
--
1.8.3.1
More information about the mesa-dev
mailing list