[Freedreno] [RFC 4/4] ir3: Use ir3_nir_lower_sampler_io pass
Eduardo Lima Mitev
elima at igalia.com
Fri Jan 25 15:48:33 UTC 2019
This effectively removes all offset calculations in
ir3_compiler_nir::get_image_offset().
No regressions observed on affected tests from Khronos CTS and piglit
suites, compared to master.
Collecting useful stats on helps/hurts caused by this pass is WIP. Very
few shaders in shader-db data-base exercise image store or image
atomic ops, and of those that do, most require higher versions of
GLSL than what freedreno supports, so they get skipped.
There is on-going work writing/porting shaders to collect useful
stats. So far, all tested show no meaningful difference compared
to master.
---
src/freedreno/ir3/ir3_compiler_nir.c | 61 +++++++++++++---------------
src/freedreno/ir3/ir3_nir.c | 1 +
2 files changed, 29 insertions(+), 33 deletions(-)
diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c
index fd641735620..fe329db658c 100644
--- a/src/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/freedreno/ir3/ir3_compiler_nir.c
@@ -548,6 +548,9 @@ emit_alu(struct ir3_context *ctx, nir_alu_instr *alu)
ir3_MADSH_M16(b, src[0], 0, src[1], 0,
ir3_MULL_U(b, src[0], 0, src[1], 0), 0), 0);
break;
+ case nir_op_imad:
+ dst[0] = ir3_MAD_S24(b, src[0], 0, src[1], 0, src[2], 0);
+ break;
case nir_op_ineg:
dst[0] = ir3_ABSNEG_S(b, src[0], IR3_REG_SNEG);
break;
@@ -1172,44 +1175,19 @@ get_image_type(const nir_variable *var)
static struct ir3_instruction *
get_image_offset(struct ir3_context *ctx, const nir_variable *var,
- struct ir3_instruction * const *coords, bool byteoff)
+ struct ir3_instruction * const *coords)
{
struct ir3_block *b = ctx->block;
- struct ir3_instruction *offset;
- unsigned ncoords = get_image_coords(var, NULL);
-
- /* to calculate the byte offset (yes, uggg) we need (up to) three
- * const values to know the bytes per pixel, and y and z stride:
- */
- unsigned cb = regid(ctx->so->constbase.image_dims, 0) +
- ctx->so->const_layout.image_dims.off[var->data.driver_location];
debug_assert(ctx->so->const_layout.image_dims.mask &
(1 << var->data.driver_location));
- /* offset = coords.x * bytes_per_pixel: */
- offset = ir3_MUL_S(b, coords[0], 0, create_uniform(b, cb + 0), 0);
- if (ncoords > 1) {
- /* offset += coords.y * y_pitch: */
- offset = ir3_MAD_S24(b, create_uniform(b, cb + 1), 0,
- coords[1], 0, offset, 0);
- }
- if (ncoords > 2) {
- /* offset += coords.z * z_pitch: */
- offset = ir3_MAD_S24(b, create_uniform(b, cb + 2), 0,
- coords[2], 0, offset, 0);
- }
-
- if (!byteoff) {
- /* Some cases, like atomics, seem to use dword offset instead
- * of byte offsets.. blob just puts an extra shr.b in there
- * in those cases:
- */
- offset = ir3_SHR_B(b, offset, 0, create_immed(b, 2), 0);
- }
-
+ /* ir3_nir_lower_sampler_io pass should have placed the final
+ * byte-offset (or dword offset for atomics) at the 4th component
+ * of the coordinate vector.
+ */
return ir3_create_collect(ctx, (struct ir3_instruction*[]){
- offset,
+ coords[3],
create_immed(b, 0),
}, 2);
}
@@ -1341,7 +1319,7 @@ emit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
* src2 is 64b byte offset
*/
- offset = get_image_offset(ctx, var, coords, true);
+ offset = get_image_offset(ctx, var, coords);
/* NOTE: stib seems to take byte offset, but stgb.typed can be used
* too and takes a dword offset.. not quite sure yet why blob uses
@@ -1443,7 +1421,7 @@ emit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr)
*/
src0 = ir3_get_src(ctx, &intr->src[3])[0];
src1 = ir3_create_collect(ctx, coords, ncoords);
- src2 = get_image_offset(ctx, var, coords, false);
+ src2 = get_image_offset(ctx, var, coords);
switch (intr->intrinsic) {
case nir_intrinsic_image_deref_atomic_add:
@@ -1612,6 +1590,23 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
}
switch (intr->intrinsic) {
+ case nir_intrinsic_load_image_stride: {
+ idx = intr->const_index[0];
+
+ /* this is the index into image_dims offsets, which can take
+ * values 0, 1 or 2 (bpp, y-stride, z-stride respectively).
+ */
+ uint8_t off = intr->const_index[1];
+ debug_assert(off <= 2);
+
+ unsigned cb = regid(ctx->so->constbase.image_dims, 0) +
+ ctx->so->const_layout.image_dims.off[idx];
+ debug_assert(ctx->so->const_layout.image_dims.mask & (1 << idx));
+
+ dst[0] = create_uniform(b, cb + off);
+ break;
+ }
+
case nir_intrinsic_load_uniform:
idx = nir_intrinsic_base(intr);
const_offset = nir_src_as_const_value(intr->src[0]);
diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c
index d9fcf798b3d..68a0edb343c 100644
--- a/src/freedreno/ir3/ir3_nir.c
+++ b/src/freedreno/ir3/ir3_nir.c
@@ -160,6 +160,7 @@ ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s,
OPT_V(s, nir_opt_global_to_local);
OPT_V(s, nir_lower_regs_to_ssa);
+ OPT_V(s, ir3_nir_lower_sampler_io);
if (key) {
if (s->info.stage == MESA_SHADER_VERTEX) {
--
2.20.1
More information about the Freedreno
mailing list