[Mesa-dev] [PATCH 2/2] panfrost: Implement Midgard shader toolchain

Thu Jan 31 16:26:21 UTC 2019

Alyssa Rosenzweig <alyssa at rosenzweig.io> writes:

> This patch implements the free Midgard shader toolchain: the assembler,
> the disassembler, and the NIR-based compiler. The assembler is a
> standalone inaccessible Python script for reference purposes. The
> disassembler and the compiler are implemented in C, accessible via the
> standalone `midgard_compiler` binary. Later patches will use these
> interfaces from the driver for online compilation.
>
> Signed-off-by: Alyssa Rosenzweig <alyssa at rosenzweig.io>
> ---

> +static void
> +emit_load_const(compiler_context *ctx, nir_load_const_instr *instr)
> +{
> +        nir_ssa_def def = instr->def;
> +
> +        float *v = ralloc_array(NULL, float, 4);
> +        memcpy(v, &instr->value.f32, 4 * sizeof(float));
> +        _mesa_hash_table_u64_insert(ctx->ssa_constants, def.index + 1, v);
> +}

Looks like you leak the constants?  You could pass ctx->ssa_constants
instead of NULL and the allocation would be automatically freed.

> +static unsigned
> +nir_src_index(nir_src *src)
> +{
> +        if (src->is_ssa)
> +                return src->ssa->index;
> +        else
> +                return 4096 + src->reg.reg->index;
> +}
> +
> +static unsigned
> +nir_dest_index(nir_dest *dst)
> +{
> +        if (dst->is_ssa)
> +                return dst->ssa.index;
> +        else
> +                return 4096 + dst->reg.reg->index;
> +}

Instead of hardcoding 4096, use impl->ssa_index?

> +#define ALU_CASE(_components, nir, _op) \
> +	case nir_op_##nir: \
> +		components = _components; \
> +		op = midgard_alu_op_##_op; \
> +		break;
> +
> +static void
> +emit_alu(compiler_context *ctx, nir_alu_instr *instr)
> +{
> +        bool is_ssa = instr->dest.dest.is_ssa;
> +
> +        unsigned dest = nir_dest_index(&instr->dest.dest);
> +        unsigned nr_components = is_ssa ? instr->dest.dest.ssa.num_components : instr->dest.dest.reg.reg->num_components;
> +
> +        /* Most Midgard ALU ops have a 1:1 correspondance to NIR ops; these are
> +         * supported. A few do not and are commented for now. Also, there are a
> +         * number of NIR ops which Midgard does not support and need to be
> +         * lowered, also TODO. This switch block emits the opcode and calling
> +         * convention of the Midgard instruction; actual packing is done in
> +         * emit_alu below */
> +
> +        unsigned op, components;
> +
> +        switch (instr->op) {
> +                ALU_CASE(2, fadd, fadd);
> +                ALU_CASE(2, fmul, fmul);
> +                ALU_CASE(2, fmin, fmin);
> +                ALU_CASE(2, fmax, fmax);
> +                ALU_CASE(2, imin, imin);
> +                ALU_CASE(2, imax, imax);
> +                ALU_CASE(1, fmov, fmov);
> +                ALU_CASE(0, ffloor, ffloor);
> +                ALU_CASE(0, fceil, fceil);
> +                ALU_CASE(2, fdot3, fdot3);
> +                //ALU_CASE(2, fdot3r);
> +                ALU_CASE(2, fdot4, fdot4);
> +                //ALU_CASE(2, freduce);
> +                ALU_CASE(2, iadd, iadd);
> +                ALU_CASE(2, isub, isub);
> +                ALU_CASE(2, imul, imul);
> +
> +                /* XXX: Use fmov, not imov, since imov was causing major
> +                 * issues with texture precision? XXX research */
> +                ALU_CASE(1, imov, fmov);
> +
> +                ALU_CASE(2, feq, feq);
> +                ALU_CASE(2, fne, fne);
> +                ALU_CASE(2, flt, flt);
> +                ALU_CASE(2, ieq, ieq);
> +                ALU_CASE(2, ine, ine);
> +                ALU_CASE(2, ilt, ilt);
> +                //ALU_CASE(2, icsel, icsel);
> +                ALU_CASE(0, frcp, frcp);
> +                ALU_CASE(0, frsq, frsqrt);
> +                ALU_CASE(0, fsqrt, fsqrt);
> +                ALU_CASE(0, fexp2, fexp2);
> +                ALU_CASE(0, flog2, flog2);
> +
> +                ALU_CASE(3, f2i32, f2i);
> +                ALU_CASE(3, f2u32, f2u);
> +                ALU_CASE(3, i2f32, i2f);
> +                ALU_CASE(3, u2f32, u2f);
> +
> +                ALU_CASE(0, fsin, fsin);
> +                ALU_CASE(0, fcos, fcos);
> +
> +                ALU_CASE(2, iand, iand);
> +                ALU_CASE(2, ior, ior);
> +                ALU_CASE(2, ixor, ixor);
> +                ALU_CASE(0, inot, inot);
> +                ALU_CASE(2, ishl, ishl);
> +                ALU_CASE(2, ishr, iasr);
> +                ALU_CASE(2, ushr, ilsr);
> +                //ALU_CASE(2, ilsr, ilsr);
> +
> +                ALU_CASE(2, ball_fequal4, fball_eq);
> +                ALU_CASE(2, bany_fnequal4, fbany_neq);
> +                ALU_CASE(2, ball_iequal4, iball_eq);
> +                ALU_CASE(2, bany_inequal4, ibany_neq);

Looks like instead of encoding components here you could use
nir_op_info->num_inputs?

> +        nir_foreach_variable(var, &nir->uniforms) {
> +                if (glsl_get_base_type(var->type) == GLSL_TYPE_SAMPLER) continue;
> +
> +                unsigned length = glsl_get_aoa_size(var->type);
> +
> +                if (!length) {
> +                        length = glsl_get_length(var->type);
> +                }
> +
> +                if (!length) {
> +                        length = glsl_get_matrix_columns(var->type);
> +                }

This seems suspicious -- I don't have anything like this for my uniforms.

> +        if (ctx->stage == MESA_SHADER_VERTEX) {
> +                ctx->varying_nir_to_mdg = _mesa_hash_table_u64_create(NULL);
> +
> +                /* First, collect the special varyings */
> +                nir_foreach_variable(var, &nir->outputs) {
> +                        if (var->data.location == VARYING_SLOT_POS) {
> +                                /* Set position first, always. It takes up two
> +                                 * spots, the latter one is de facto unused (at
> +                                 * least from the shader's perspective), we
> +                                 * just need to skip over the spot*/
> +
> +                                _mesa_hash_table_u64_insert(ctx->varying_nir_to_mdg, var->data.driver_location + 1, (void *) ((uintptr_t) (0 + 1)));
> +                                ctx->varying_count = MAX2(ctx->varying_count, 2);
> +                        } else if (var->data.location == VARYING_SLOT_PSIZ) {
> +                                /* Set point size second (third, see above) */
> +                                _mesa_hash_table_u64_insert(ctx->varying_nir_to_mdg, var->data.driver_location + 1, (void *) ((uintptr_t) (2 + 1)));
> +                                ctx->varying_count = MAX2(ctx->varying_count, 3);
> +
> +                                program->writes_point_size = true;
> +                        }
> +                }

Using info.outputs_written might be nicer here.

> +
> +        /* Lower vars -- not I/O -- before epilogue */
> +
> +        NIR_PASS_V(nir, nir_lower_var_copies);
> +        NIR_PASS_V(nir, nir_lower_vars_to_ssa);
> +        NIR_PASS_V(nir, nir_split_var_copies);
> +        NIR_PASS_V(nir, nir_lower_var_copies);
> +        NIR_PASS_V(nir, nir_lower_global_vars_to_local);
> +        NIR_PASS_V(nir, nir_lower_var_copies);
> +        NIR_PASS_V(nir, nir_lower_vars_to_ssa);
> +        NIR_PASS_V(nir, nir_lower_io, nir_var_all, glsl_type_size, 0);

I'm skeptical that this many lower_var_copies() is needed :)

> diff --git a/src/gallium/drivers/panfrost/midgard/midgard_nir_algebraic.py b/src/gallium/drivers/panfrost/midgard/midgard_nir_algebraic.py
> new file mode 100644
> index 0000000000..44441727b7
> --- /dev/null
> +++ b/src/gallium/drivers/panfrost/midgard/midgard_nir_algebraic.py

> +algebraic = [
> +    (('b2i32', a), ('iand at 32', "a at 32", 1)),
> +    (('isign', a), ('imin', ('imax', a, -1), 1)),

I need to steal your isign.

> +    (('fge', a, b), ('flt', b, a)),
> +
> +    # XXX: We have hw ops for this, just unknown atm..
> +    #(('fsign at 32', a), ('i2f32 at 32', ('isign', ('f2i32 at 32', ('fmul', a, 0x43800000)))))
> +    #(('fsign', a), ('fcsel', ('fge', a, 0), 1.0, ('fcsel', ('flt', a, 0.0), -1.0, 0.0)))
> +    (('fsign', a), ('bcsel', ('fge', a, 0), 1.0, -1.0)),

Looks like your fsign never returns 0.0 like it should?

All of this is suggestions for future work.  I'm mostly glad to see the
driver coming into the tree at last.  Both patches are:

Acked-by: Eric Anholt <eric at anholt.net>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 832 bytes
Desc: not available
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20190131/833d0b9e/attachment-0001.sig>