[Mesa-dev] [PATCH 16/20] i965/fs: Add support for W-tiled to linear coordinate translation
Topi Pohjolainen
topi.pohjolainen at intel.com
Fri Apr 11 00:28:56 PDT 2014
Signed-off-by: Topi Pohjolainen <topi.pohjolainen at intel.com>
---
src/mesa/drivers/dri/i965/brw_fs.h | 8 +++
src/mesa/drivers/dri/i965/brw_fs_emitter.cpp | 93 ++++++++++++++++++++++++++++
2 files changed, 101 insertions(+)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index eaa5332..0d9cbd1 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -303,6 +303,14 @@ protected:
const fs_reg& dst_x,
const fs_reg& dst_y);
+ void emit_w_tiling_to_linear(const fs_reg& t1,
+ const fs_reg& t2,
+ const fs_reg& stride,
+ const fs_reg& src_x,
+ const fs_reg& src_y,
+ const fs_reg& dst_x,
+ const fs_reg& dst_y);
+
void push_force_uncompressed();
void pop_force_uncompressed();
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emitter.cpp b/src/mesa/drivers/dri/i965/brw_fs_emitter.cpp
index 0d5cfb4..88e898d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emitter.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emitter.cpp
@@ -294,6 +294,99 @@ fs_emitter::emit_linear_to_w_tiling(const fs_reg& t1,
emit_coord_swizzling(t1, dst_x, dst_y);
}
+/**
+ * Emit translation of coordinates src_x and src_y in W-tiled space into
+ * corresponding coordinates dst_x and dst_y in linear layout.
+ * Consider the tiling algorithm formula in Ironlake and Sandybridge PRMs:
+ * Volume 1, Part 2, Section 4.5.3. The equation translates coordinates x and
+ * y in w-tiling layout into corresponding byte offset in linear memory.
+ * The operation requires two temporary registers in addition to the source
+ * and destination. Note also that source and destination registers cannot
+ * overlap.
+ *
+ * tile_x = x / 64
+ * tile_y = y / 64
+ * byte_x = x % 64
+ * byte_y = y % 64
+ *
+ * u = tile_y * 64 * stride u = (y & 0xffc0) * stride
+ * + tile_x * 4096 + (x & 0xfff8) * 64
+ * + 512 * (byte_x / 8)
+ * + 64 * (byte_y / 8) + 8 * (y & 0x3c)
+ * + 32 * ((byte_y / 4) % 2)
+ * + 16 * ((byte_x / 4) % 2) <==> + 4 * (x & 0x4)
+ * + 8 * ((byte_y / 2) % 2) + 4 * (y & 0x2)
+ * + 4 * ((byte_x / 2) % 2) + 2 * (x & 0x2)
+ * + 2 * (byte_y % 2) + 2 * (y & 0x1)
+ * + 1 * (byte_x % 2) + 1 * (x & 0x1)
+ *
+ * where
+ *
+ * 8 * (y & 0x3c) = 8 * (y & 0x38) + 8 * (y & 0x4)
+ * = 64 * ((y % 64) % 8) + 8 * 4 * (((y % 64) / 4) % 2)
+ *
+ * (x & 0xfff8) * 64 = 64 * (x & ~0x3f) + 64 * (x & 0x38)
+ * = 64 * 64 * (x / 64) + 64 * 8 * ((x % 64) / 8)
+ *
+ * The linear offset corresponds to linear coordinates x_p, y_p simply as:
+ * u = y_p * stride + x_p. Dividing both sides by stride and taking into
+ * account the integer rounding to zero yields:
+ *
+ * y_p = (y & 0xffc0)
+ * + ((x & 0xfff8) * 64 + 8 * (y & 0x3c)) / stride
+ *
+ * x_p = 4 * (x & 0x4)
+ * + 4 * (y & 0x2)
+ * + 2 * (x & 0x2)
+ * + 2 * (y & 0x1)
+ * + 1 * (x & 0x1)
+ * + ((x & 0xfff8) * 64 + 8 * (y & 0x3c)) % stride
+ */
+void
+fs_emitter::emit_w_tiling_to_linear(const fs_reg& t1,
+ const fs_reg& t2,
+ const fs_reg& stride,
+ const fs_reg& src_x,
+ const fs_reg& src_y,
+ const fs_reg& dst_x,
+ const fs_reg& dst_y)
+{
+ if (brw->has_swizzling)
+ emit_coord_swizzling(t1, src_x, src_y);
+
+ emit(AND(t1, src_y, brw_imm_uw(0x3c))); /* src_y & 0x3c */
+ emit(SHL(t1, t1, brw_imm_uw(3))); /* 8 * (src_y & 0x3c) */
+ emit(AND(t2, src_x, brw_imm_uw(0xfff8))); /* src_x & 0xfff8 */
+ emit(SHL(t2, t2, brw_imm_uw(6))); /* (src_x & 0xfff8) * 64 */
+ emit(ADD(t1, t1, t2)); /* (src_x & 0xfff8) * 64 + 8 * (src_y & 0x3c) */
+
+ /* On gen6 math needs a register with hstride == 1, make a copy. */
+ if (brw->gen == 6)
+ emit(MOV(t2, stride));
+
+ /* ((src_x & 0xfff8) * 64 + 8 * (src_y & 0x3c)) / stride */
+ emit(SHADER_OPCODE_INT_QUOTIENT, dst_y, t1, brw->gen == 6 ? t2 : stride);
+ /* ((src_x & 0xfff8) * 64 + 8 * (src_y & 0x3c)) % stride */
+ emit(SHADER_OPCODE_INT_REMAINDER, dst_x, t1, brw->gen == 6 ? t2 : stride);
+
+ emit(AND(t1, src_y, brw_imm_uw(0xffc0))); /* src_y & 0xffc0 */
+ emit(ADD(dst_y, dst_y, t1)); /* dst_y += (src_y & 0xffc0) */
+ emit(AND(t1, src_x, brw_imm_uw(0x4))); /* src_x & 0x4 */
+ emit(SHL(t1, t1, brw_imm_uw(2))); /* (src_x & 0x4) * 4 */
+ emit(ADD(dst_x, dst_x, t1)); /* dst_x += ((src_x & 0x4) * 4) */
+ emit(AND(t1, src_y, brw_imm_uw(0x2))); /* src_y & 0x2 */
+ emit(SHL(t1, t1, brw_imm_uw(2))); /* (src_y & 0x2) * 4 */
+ emit(ADD(dst_x, dst_x, t1)); /* dst_x += ((src_y & 0x2) * 4) */
+ emit(AND(t1, src_x, brw_imm_uw(0x2))); /* src_x & 0x2 */
+ emit(SHL(t1, t1, brw_imm_uw(1))); /* (src_x & 0x2) * 2 */
+ emit(ADD(dst_x, dst_x, t1)); /* dst_x += ((src_x & 0x2) * 2) */
+ emit(AND(t1, src_y, brw_imm_uw(0x1))); /* src_y & 0x1 */
+ emit(SHL(t1, t1, brw_imm_uw(1))); /* (src_y & 0x1) * 2 */
+ emit(ADD(dst_x, dst_x, t1)); /* dst_x += ((src_y & 0x1) * 2) */
+ emit(AND(t1, src_x, brw_imm_uw(0x1))); /* src_x & 0x1 */
+ emit(ADD(dst_x, dst_x, t1)); /* dst_x += (src_x & 0x1) */
+}
+
fs_emitter::fs_emitter(struct brw_context *brw,
struct brw_wm_compile *c,
unsigned dispatch_width)
--
1.8.3.1
More information about the mesa-dev
mailing list