[Mesa-dev] [PATCH 15/20] i965/fs: Add support for linear to W-tiled coordinate translation
Topi Pohjolainen
topi.pohjolainen at intel.com
Fri Apr 11 00:28:55 PDT 2014
Signed-off-by: Topi Pohjolainen <topi.pohjolainen at intel.com>
---
src/mesa/drivers/dri/i965/brw_fs.h | 8 +++
src/mesa/drivers/dri/i965/brw_fs_emitter.cpp | 86 ++++++++++++++++++++++++++++
2 files changed, 94 insertions(+)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index a30351d..eaa5332 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -295,6 +295,14 @@ protected:
const fs_reg& x,
const fs_reg& y);
+ void emit_linear_to_w_tiling(const fs_reg& t1,
+ const fs_reg& t2,
+ const fs_reg& stride,
+ const fs_reg& src_x,
+ const fs_reg& src_y,
+ const fs_reg& dst_x,
+ const fs_reg& dst_y);
+
void push_force_uncompressed();
void pop_force_uncompressed();
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emitter.cpp b/src/mesa/drivers/dri/i965/brw_fs_emitter.cpp
index 22fa33d..0d5cfb4 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emitter.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emitter.cpp
@@ -208,6 +208,92 @@ fs_emitter::emit_coord_swizzling(const fs_reg& t1,
emit(BRW_OPCODE_ENDIF);
}
+/**
+ * Emit translation of coordinates src_x and src_y in linear memory space into
+ * corresponding coordinates dst_x and dst_y in W-tiled layout. The algorithm
+ * divides the linear space into W-tiles (64x64), sub-tiles (8x8),
+ * sub-sub-tiles (4x4) and finally into sub-sub-sub-tiles (2x2). Note that 8x8
+ * blocks are laid out in memory in column major order.
+ * The operation requires two temporary registers in addition to the source
+ * and destination. Note also that source and destination registers cannot
+ * overlap.
+ *
+ * offset = y * stride + x
+ * tile_y = y / 64
+ * offset_x = (y % 64) * stride + x
+ * tile_x = offset_x / (64 * 64)
+ * tile_offset = offset % (64 * 64) <==> (offset & 0xfff)
+ * s_tile_n = tile_offset / 64 <==> (offset & 0xfff) >> 6
+ * s_tile_x = sub_tile_n / 8 <==> (offset & 0xfff) >> 9
+ * s_tile_y = sub_tile_n % 8 <==> (offset & 0xff8) >> 6
+ * s_tile_offset = tile_offset % 64 <==> (x & 0x3f)
+ * s_s_tile_n = s_tile_offset / 16 <==> (x & 0x3f) >> 4
+ * s_s_tile_y = s_s_tile_n / 2 <==> (x & 0x3f) >> 5
+ * s_s_tile_x = s_s_tile_n % 2 <==> (x & 0x10) >> 4
+ * s_s_tile_offset = s_tile_offset % 16 <==> (x & 0x3c) & 0xf
+ * s_s_s_tile_n = s_s_tile_offset / 4 <==> (x & 0x0f) >> 2
+ * s_s_s_tile_y = s_s_s_tile_n / 2 <==> (x & 0x0f) >> 3
+ * s_s_s_tile_x = s_s_s_tile_n % 2 <==> (x & 0x0c) >> 2
+ * s_s_s_tile_offset = s_s_tile_offset % 4 <==> (x & 0x3)
+ *
+ * dst_y = tile_y * 64 + dst_y = (y & 0xffc0) +
+ * s_tile_y * 8 + ((offset & 0x1c0) >> 3) +
+ * s_s_tile_y * 4 + <==> ((x & 0x20) >> 3) +
+ * s_s_s_tile_y * 2 + ((x & 0x08) >> 2) +
+ * s_s_s_tile_offset / 2 ((x & 0x03) >> 1)
+ *
+ * dst_x = tile_x * 64 + dst_x = ((((y & 0x3f) * stride + x) &
+ * 0xf000) / 64) +
+ * s_tile_x * 8 + ((offset & 0xe00) >> 6) +
+ * s_s_tile_x * 4 + <==> ((x & 0x10) >> 2) +
+ * s_s_s_tile_x * 2 + ((x & 0x04) >> 1) +
+ * s_s_s_tile_offset % 2 (x & 0x1)
+ */
+void
+fs_emitter::emit_linear_to_w_tiling(const fs_reg& t1,
+ const fs_reg& t2,
+ const fs_reg& stride,
+ const fs_reg& src_x,
+ const fs_reg& src_y,
+ const fs_reg& dst_x,
+ const fs_reg& dst_y)
+{
+ emit(AND(t1, src_y, brw_imm_uw(0x3f))); /* src_y & 0x3f */
+ emit(MUL(t1, t1, stride)); /* (src_y & 0x3f) * stride */
+ emit(ADD(t1, t1, src_x)); /* (src_y & 0x3f) * stride + src_x */
+ emit(AND(t1, t1, brw_imm_uw(0xf000)));
+ emit(SHR(dst_x, t1, brw_imm_uw(6))); /* tile_x * 64 */
+ emit(MUL(t1, src_y, stride)); /* src_y * stride */
+ emit(ADD(t1, t1, src_x)); /* offset */
+ emit(AND(t2, t1, brw_imm_uw(0xe00))); /* offset & 0xe00 */
+ emit(SHR(t2, t2, brw_imm_uw(6))); /* (offset & 0xe00) >> 6 */
+ emit(ADD(dst_x, dst_x, t2)); /* dst_x += ((offset & 0xe00) >> 6) */
+ emit(AND(t2, t1, brw_imm_uw(0x1c0))); /* offset & 0x1c0 */
+ emit(SHR(dst_y, t2, brw_imm_uw(3))); /* dst_y = (offset & 0x1c0) >> 3 */
+ emit(AND(t1, src_x, brw_imm_uw(0x10))); /* src_x & 0x10 */
+ emit(SHR(t1, t1, brw_imm_uw(2))); /* (src_x & 0x10) >> 2 */
+ emit(ADD(dst_x, dst_x, t1)); /* dst_x += ((src_x & 0x10) >> 2) */
+ emit(AND(t1, src_x, brw_imm_uw(0x4))); /* src_x & 0x4 */
+ emit(SHR(t1, t1, brw_imm_uw(1))); /* (src_x & 0x4) >> 1 */
+ emit(ADD(dst_x, dst_x, t1)); /* dst_x += ((src_x & 0x4) >> 1) */
+ emit(AND(t1, src_x, brw_imm_uw(0x1))); /* src_x & 0x1 */
+ emit(ADD(dst_x, dst_x, t1)); /* dst_x += (src_x & 0x1) */
+ emit(AND(t1, src_y, brw_imm_uw(0xffc0))); /* src_y & 0xffc0 */
+ emit(ADD(dst_y, dst_y, t1)); /* dst_y += (src_y & 0xffc0) */
+ emit(AND(t1, src_x, brw_imm_uw(0x20))); /* src_x & 0x20 */
+ emit(SHR(t1, t1, brw_imm_uw(3))); /* (src_x & 0x20) >> 3 */
+ emit(ADD(dst_y, dst_y, t1)); /* dst_y += ((src_x & 0x20) >> 3) */
+ emit(AND(t1, src_x, brw_imm_uw(0x8))); /* src_x & 0x8 */
+ emit(SHR(t1, t1, brw_imm_uw(2))); /* (src_x & 0x8) >> 2 */
+ emit(ADD(dst_y, dst_y, t1)); /* dst_y += ((src_x & 0x8) >> 2) */
+ emit(AND(t1, src_x, brw_imm_uw(0x3))); /* src_x & 0x3 */
+ emit(SHR(t1, t1, brw_imm_uw(1))); /* (src_x & 0x3) >> 1 */
+ emit(ADD(dst_y, dst_y, t1)); /* dst_y += ((src_x & 0x3) >> 1) */
+
+ if (brw->has_swizzling)
+ emit_coord_swizzling(t1, dst_x, dst_y);
+}
+
fs_emitter::fs_emitter(struct brw_context *brw,
struct brw_wm_compile *c,
unsigned dispatch_width)
--
1.8.3.1
More information about the mesa-dev
mailing list