[Mesa-dev] [PATCH 15/20] i965/fs: Add support for linear to W-tiled coordinate translation

Fri Apr 11 00:28:55 PDT 2014

Signed-off-by: Topi Pohjolainen <topi.pohjolainen at intel.com>
---
 src/mesa/drivers/dri/i965/brw_fs.h           |  8 +++
 src/mesa/drivers/dri/i965/brw_fs_emitter.cpp | 86 ++++++++++++++++++++++++++++
 2 files changed, 94 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index a30351d..eaa5332 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -295,6 +295,14 @@ protected:
                              const fs_reg& x,
                              const fs_reg& y);
 
+   void emit_linear_to_w_tiling(const fs_reg& t1,
+                                const fs_reg& t2,
+                                const fs_reg& stride,
+                                const fs_reg& src_x,
+                                const fs_reg& src_y,
+                                const fs_reg& dst_x,
+                                const fs_reg& dst_y);
+
    void push_force_uncompressed();
    void pop_force_uncompressed();
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emitter.cpp b/src/mesa/drivers/dri/i965/brw_fs_emitter.cpp
index 22fa33d..0d5cfb4 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emitter.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emitter.cpp
@@ -208,6 +208,92 @@ fs_emitter::emit_coord_swizzling(const fs_reg& t1,
    emit(BRW_OPCODE_ENDIF);
 }
 
+/**
+ * Emit translation of coordinates src_x and src_y in linear memory space into
+ * corresponding coordinates dst_x and dst_y in W-tiled layout. The algorithm
+ * divides the linear space into W-tiles (64x64), sub-tiles (8x8),
+ * sub-sub-tiles (4x4) and finally into sub-sub-sub-tiles (2x2). Note that 8x8
+ * blocks are laid out in memory in column major order.
+ * The operation requires two temporary registers in addition to the source
+ * and destination. Note also that source and destination registers cannot
+ * overlap.
+ *
+ *  offset = y * stride + x
+ *  tile_y = y / 64
+ *  offset_x = (y % 64) * stride + x
+ *  tile_x = offset_x / (64 * 64)
+ *  tile_offset = offset % (64 * 64)           <==>   (offset & 0xfff)
+ *  s_tile_n = tile_offset / 64                <==>   (offset & 0xfff) >> 6
+ *  s_tile_x = sub_tile_n / 8                  <==>   (offset & 0xfff) >> 9
+ *  s_tile_y = sub_tile_n % 8                  <==>   (offset & 0xff8) >> 6
+ *  s_tile_offset = tile_offset % 64           <==>   (x & 0x3f)
+ *  s_s_tile_n = s_tile_offset / 16            <==>   (x & 0x3f) >> 4
+ *  s_s_tile_y = s_s_tile_n / 2                <==>   (x & 0x3f) >> 5
+ *  s_s_tile_x = s_s_tile_n % 2                <==>   (x & 0x10) >> 4
+ *  s_s_tile_offset = s_tile_offset % 16       <==>   (x & 0x3c) & 0xf
+ *  s_s_s_tile_n = s_s_tile_offset / 4         <==>   (x & 0x0f) >> 2
+ *  s_s_s_tile_y = s_s_s_tile_n / 2            <==>   (x & 0x0f) >> 3
+ *  s_s_s_tile_x = s_s_s_tile_n % 2            <==>   (x & 0x0c) >> 2
+ *  s_s_s_tile_offset = s_s_tile_offset % 4    <==>   (x & 0x3)
+ *
+ *  dst_y = tile_y            * 64 +     dst_y = (y & 0xffc0)            +
+ *          s_tile_y          * 8  +             ((offset & 0x1c0) >> 3) +
+ *          s_s_tile_y        * 4  +  <==>       ((x & 0x20) >> 3)       +
+ *          s_s_s_tile_y      * 2  +             ((x & 0x08) >> 2)       +
+ *          s_s_s_tile_offset / 2                ((x & 0x03) >> 1)
+ *
+ *  dst_x = tile_x            * 64 +     dst_x = ((((y & 0x3f) * stride + x) &
+ *                                                0xf000) / 64)           +
+ *          s_tile_x          * 8  +             ((offset & 0xe00) >> 6)  +
+ *          s_s_tile_x        * 4  +  <==>       ((x & 0x10) >> 2)        +
+ *          s_s_s_tile_x      * 2  +             ((x & 0x04) >> 1)        +
+ *          s_s_s_tile_offset % 2                 (x & 0x1)
+ */
+void
+fs_emitter::emit_linear_to_w_tiling(const fs_reg& t1,
+                                    const fs_reg& t2,
+                                    const fs_reg& stride,
+                                    const fs_reg& src_x,
+                                    const fs_reg& src_y,
+                                    const fs_reg& dst_x,
+                                    const fs_reg& dst_y)
+{
+   emit(AND(t1, src_y, brw_imm_uw(0x3f))); /* src_y & 0x3f */
+   emit(MUL(t1, t1, stride)); /* (src_y & 0x3f) * stride */
+   emit(ADD(t1, t1, src_x)); /* (src_y & 0x3f) * stride + src_x */
+   emit(AND(t1, t1, brw_imm_uw(0xf000)));
+   emit(SHR(dst_x, t1, brw_imm_uw(6))); /* tile_x * 64 */
+   emit(MUL(t1, src_y, stride)); /* src_y * stride */
+   emit(ADD(t1, t1, src_x)); /* offset */
+   emit(AND(t2, t1, brw_imm_uw(0xe00))); /* offset & 0xe00 */
+   emit(SHR(t2, t2, brw_imm_uw(6))); /* (offset & 0xe00) >> 6 */
+   emit(ADD(dst_x, dst_x, t2)); /* dst_x += ((offset & 0xe00) >> 6) */
+   emit(AND(t2, t1, brw_imm_uw(0x1c0))); /* offset & 0x1c0 */
+   emit(SHR(dst_y, t2, brw_imm_uw(3))); /* dst_y = (offset & 0x1c0) >> 3 */
+   emit(AND(t1, src_x, brw_imm_uw(0x10))); /* src_x & 0x10 */
+   emit(SHR(t1, t1, brw_imm_uw(2))); /* (src_x & 0x10) >> 2 */
+   emit(ADD(dst_x, dst_x, t1)); /* dst_x += ((src_x & 0x10) >> 2) */
+   emit(AND(t1, src_x, brw_imm_uw(0x4))); /* src_x & 0x4 */
+   emit(SHR(t1, t1, brw_imm_uw(1))); /* (src_x & 0x4) >> 1 */
+   emit(ADD(dst_x, dst_x, t1)); /* dst_x += ((src_x & 0x4) >> 1) */
+   emit(AND(t1, src_x, brw_imm_uw(0x1))); /* src_x & 0x1 */
+   emit(ADD(dst_x, dst_x, t1)); /* dst_x += (src_x & 0x1) */
+   emit(AND(t1, src_y, brw_imm_uw(0xffc0))); /* src_y & 0xffc0 */
+   emit(ADD(dst_y, dst_y, t1)); /* dst_y += (src_y & 0xffc0) */
+   emit(AND(t1, src_x, brw_imm_uw(0x20))); /* src_x & 0x20 */
+   emit(SHR(t1, t1, brw_imm_uw(3))); /* (src_x & 0x20) >> 3 */
+   emit(ADD(dst_y, dst_y, t1)); /* dst_y += ((src_x & 0x20) >> 3) */
+   emit(AND(t1, src_x, brw_imm_uw(0x8))); /* src_x & 0x8 */
+   emit(SHR(t1, t1, brw_imm_uw(2))); /* (src_x & 0x8) >> 2 */
+   emit(ADD(dst_y, dst_y, t1)); /* dst_y += ((src_x & 0x8) >> 2) */
+   emit(AND(t1, src_x, brw_imm_uw(0x3))); /* src_x & 0x3 */
+   emit(SHR(t1, t1, brw_imm_uw(1))); /* (src_x & 0x3) >> 1 */
+   emit(ADD(dst_y, dst_y, t1)); /* dst_y += ((src_x & 0x3) >> 1) */
+
+   if (brw->has_swizzling)
+      emit_coord_swizzling(t1, dst_x, dst_y);
+}
+
 fs_emitter::fs_emitter(struct brw_context *brw,
                        struct brw_wm_compile *c,
                        unsigned dispatch_width)
-- 
1.8.3.1