[Mesa-dev] [PATCH 08/21] etnaviv: nir: add virtual register classes
Rob Clark
robdclark at gmail.com
Tue Jun 5 15:39:00 UTC 2018
On Tue, Jun 5, 2018 at 10:38 AM, Philipp Zabel <p.zabel at pengutronix.de> wrote:
> Since all threads share a global temporary vec4 register file, it is
> important to reduce temporary register use of shaders.
> Using source swizzles and destination write mask of ALU operations we
> can layer smaller virtual registers on top of the physical base
> registers that overlap with their base register and partially with each
> other:
>
> +----+---------+-------------+---------+
> |VEC4| VEC3 | VEC2 | SCALAR |
> +----+---------+-------------+---------+
> | X | X X X | X X X | X |
> | Y | Y Y Y | Y Y Y | Y |
> | Z | Z Z Z | Z Z Z | Z |
> | W | W W W | W W W | W |
> +----+---------+-------------+---------+
>
> There are four possible virtual vec3 registers that leave the remaining
> component usable as a scalar virtual register, six possible vec2
> registers, and four possible scalar registers that only use a single
> component.
>
> This patch adds an interference graph for virtual registers to the
> register allocator, using information about SSA interference and virtual
> register overlap. If possible, SSAs with smaller num_components are
> allocated from the unused components of already partially used temporary
> registers.
>
> Signed-off-by: Philipp Zabel <p.zabel at pengutronix.de>
> Signed-off-by: Michael Tretter <m.tretter at pengutronix.de>
> ---
so one quick note, constructing the register classes can be
expensive.. you probably only want to do this once and then re-use for
each shader
BR,
-R
> src/gallium/drivers/etnaviv/etnaviv_nir.c | 282 ++++++++++++++++++++--
> 1 file changed, 259 insertions(+), 23 deletions(-)
>
> diff --git a/src/gallium/drivers/etnaviv/etnaviv_nir.c b/src/gallium/drivers/etnaviv/etnaviv_nir.c
> index b73d4be31bc6..752e87248e31 100644
> --- a/src/gallium/drivers/etnaviv/etnaviv_nir.c
> +++ b/src/gallium/drivers/etnaviv/etnaviv_nir.c
> @@ -375,11 +375,111 @@ etna_instr_replaceable_ssa_dest(nir_instr *instr)
> return NULL;
> }
>
> -/* Return the NIR global register corresponding to a given temporary register,
> - * creating it if necessary.
> +/* Swizzles and write masks can be used to layer virtual non-interfering
> + * registers on top of the real VEC4 registers. For example, the virtual
> + * VEC3_XYZ register and the virtual SCALAR_W register that use the same
> + * physical VEC4 base register do not interfere.
> + */
> +enum {
> + ETNA_REG_CLASS_VEC4,
> + ETNA_REG_CLASS_VIRT_VEC3,
> + ETNA_REG_CLASS_VIRT_VEC2,
> + ETNA_REG_CLASS_VIRT_SCALAR,
> + ETNA_NUM_REG_CLASSES,
> +} etna_reg_class;
> +
> +enum {
> + ETNA_REG_TYPE_VEC4,
> + ETNA_REG_TYPE_VIRT_VEC3_XYZ,
> + ETNA_REG_TYPE_VIRT_VEC3_XYW,
> + ETNA_REG_TYPE_VIRT_VEC3_XZW,
> + ETNA_REG_TYPE_VIRT_VEC3_YZW,
> + ETNA_REG_TYPE_VIRT_VEC2_XY,
> + ETNA_REG_TYPE_VIRT_VEC2_XZ,
> + ETNA_REG_TYPE_VIRT_VEC2_XW,
> + ETNA_REG_TYPE_VIRT_VEC2_YZ,
> + ETNA_REG_TYPE_VIRT_VEC2_YW,
> + ETNA_REG_TYPE_VIRT_VEC2_ZW,
> + ETNA_REG_TYPE_VIRT_SCALAR_X,
> + ETNA_REG_TYPE_VIRT_SCALAR_Y,
> + ETNA_REG_TYPE_VIRT_SCALAR_Z,
> + ETNA_REG_TYPE_VIRT_SCALAR_W,
> + ETNA_NUM_REG_TYPES,
> +} etna_reg_type;
> +
> +static const uint8_t
> +etna_reg_writemask[ETNA_NUM_REG_TYPES] = {
> + [ETNA_REG_TYPE_VEC4] = 0xf,
> + [ETNA_REG_TYPE_VIRT_SCALAR_X] = 0x1,
> + [ETNA_REG_TYPE_VIRT_SCALAR_Y] = 0x2,
> + [ETNA_REG_TYPE_VIRT_VEC2_XY] = 0x3,
> + [ETNA_REG_TYPE_VIRT_SCALAR_Z] = 0x4,
> + [ETNA_REG_TYPE_VIRT_VEC2_XZ] = 0x5,
> + [ETNA_REG_TYPE_VIRT_VEC2_YZ] = 0x6,
> + [ETNA_REG_TYPE_VIRT_VEC3_XYZ] = 0x7,
> + [ETNA_REG_TYPE_VIRT_SCALAR_W] = 0x8,
> + [ETNA_REG_TYPE_VIRT_VEC2_XW] = 0x9,
> + [ETNA_REG_TYPE_VIRT_VEC2_YW] = 0xa,
> + [ETNA_REG_TYPE_VIRT_VEC3_XYW] = 0xb,
> + [ETNA_REG_TYPE_VIRT_VEC2_ZW] = 0xc,
> + [ETNA_REG_TYPE_VIRT_VEC3_XZW] = 0xd,
> + [ETNA_REG_TYPE_VIRT_VEC3_YZW] = 0xe,
> +};
> +
> +static inline int etna_reg_get_type(int virt_reg)
> +{
> + return virt_reg % ETNA_NUM_REG_TYPES;
> +}
> +
> +static inline int etna_reg_get_base(int virt_reg)
> +{
> + return virt_reg / ETNA_NUM_REG_TYPES;
> +}
> +
> +static inline int etna_reg_get_class(int virt_reg)
> +{
> + switch (etna_reg_get_type(virt_reg)) {
> + case ETNA_REG_TYPE_VEC4:
> + return ETNA_REG_CLASS_VEC4;
> + case ETNA_REG_TYPE_VIRT_VEC3_XYZ:
> + case ETNA_REG_TYPE_VIRT_VEC3_XYW:
> + case ETNA_REG_TYPE_VIRT_VEC3_XZW:
> + case ETNA_REG_TYPE_VIRT_VEC3_YZW:
> + return ETNA_REG_CLASS_VIRT_VEC3;
> + case ETNA_REG_TYPE_VIRT_VEC2_XY:
> + case ETNA_REG_TYPE_VIRT_VEC2_XZ:
> + case ETNA_REG_TYPE_VIRT_VEC2_XW:
> + case ETNA_REG_TYPE_VIRT_VEC2_YZ:
> + case ETNA_REG_TYPE_VIRT_VEC2_YW:
> + case ETNA_REG_TYPE_VIRT_VEC2_ZW:
> + return ETNA_REG_CLASS_VIRT_VEC2;
> + case ETNA_REG_TYPE_VIRT_SCALAR_X:
> + case ETNA_REG_TYPE_VIRT_SCALAR_Y:
> + case ETNA_REG_TYPE_VIRT_SCALAR_Z:
> + case ETNA_REG_TYPE_VIRT_SCALAR_W:
> + return ETNA_REG_CLASS_VIRT_SCALAR;
> + }
> +
> + assert(false);
> +}
> +
> +/* Q values for the full set. Each virtual register interferes
> + * with exactly one base register. And possibly with other virtual
> + * registers on top of the same base register.
> + */
> +static const unsigned int
> +q_val[ETNA_NUM_REG_CLASSES][ETNA_NUM_REG_CLASSES] = {
> + { 0, 4, 6, 4 },
> + { 1, 3, 6, 3 },
> + { 1, 4, 4, 2 },
> + { 1, 3, 3, 0 },
> +};
> +
> +/* Return a NIR global register corresponding to a given temporary register.
> + * The register is created if necessary.
> */
> static nir_register *
> -etna_ensure_temporary(nir_shader *shader, int index)
> +etna_ensure_register(nir_shader *shader, int index)
> {
> nir_foreach_register(reg, &shader->registers) {
> if (reg->index == index)
> @@ -387,13 +487,9 @@ etna_ensure_temporary(nir_shader *shader, int index)
> }
>
> nir_register *reg = nir_global_reg_create(shader);
> + shader->reg_alloc = MAX2(shader->reg_alloc - 1, index + 1);
> reg->num_components = 4;
> - reg->num_array_elems = 0;
> - reg->bit_size = 32;
> reg->index = index;
> - if (shader->reg_alloc < index + 1)
> - shader->reg_alloc = index + 1;
> - reg->name = NULL;
>
> return reg;
> }
> @@ -405,15 +501,19 @@ etna_ensure_temporary(nir_shader *shader, int index)
> * and all store intrinsics to be moved to the end of the function already, so
> * that interference between input, output, and temporary values is described
> * correctly.
> + * All SSAs that qualify will be replaced with the assigned registers.
> + * Destination SSAs to constant/uniform load and output store intrinsics
> + * as well as undefined assignments are kept and will be removed later.
> */
> static void
> etna_assign_registers(nir_shader *shader)
> {
> - struct ra_regs *regs = ra_alloc_reg_set(NULL, 64, false);
> - int class = ra_alloc_reg_class(regs);
> + struct ra_regs *regs = ra_alloc_reg_set(NULL, ETNA_MAX_TEMPS *
> + ETNA_NUM_REG_TYPES, false);
> + int class[ETNA_NUM_REG_CLASSES];
> unsigned int **q_values;
> - unsigned int *input_reg;
> - unsigned int *output_reg;
> + unsigned int *input_reg = NULL;
> + unsigned int *output_reg = NULL;
>
> /* Input/output registers only have to be assigned manually to the beginning
> * of the temporary register range in the fragment shader. Otherwise the
> @@ -427,13 +527,29 @@ etna_assign_registers(nir_shader *shader)
> }
>
> /* A single register file with 64 registers is available to each running
> - * shader, with no conflicts between them.
> + * shader, with no conflicts between them. We add virtual registers on
> + * top of that.
> */
> - for (int r = 0; r < 64; r++)
> - ra_class_add_reg(regs, class, r);
> - q_values = ralloc_array(regs, unsigned *, 1);
> - q_values[0] = rzalloc_array(q_values, unsigned, 1);
> - q_values[0][0] = 0;
> + for (int c = 0; c < ETNA_NUM_REG_CLASSES; c++)
> + class[c] = ra_alloc_reg_class(regs);
> + for (int r = 0; r < ETNA_NUM_REG_TYPES * ETNA_MAX_TEMPS; r++)
> + ra_class_add_reg(regs, class[etna_reg_get_class(r)], r);
> + q_values = ralloc_array(regs, unsigned *, ETNA_NUM_REG_CLASSES);
> + for (int i = 0; i < ETNA_NUM_REG_CLASSES; i++) {
> + q_values[i] = rzalloc_array(q_values, unsigned, ETNA_NUM_REG_CLASSES);
> + for (int j = 0; j < ETNA_NUM_REG_CLASSES; j++)
> + q_values[i][j] = q_val[i][j];
> + }
> + for (int r = 0; r < ETNA_MAX_TEMPS; r++) {
> + for (int i = 0; i < ETNA_NUM_REG_TYPES; i++) {
> + for (int j = 0; j < i; j++) {
> + if (etna_reg_writemask[i] & etna_reg_writemask[j]) {
> + ra_add_reg_conflict(regs, ETNA_NUM_REG_TYPES * r + i,
> + ETNA_NUM_REG_TYPES * r + j);
> + }
> + }
> + }
> + }
> ra_set_finalize(regs, q_values);
>
> nir_foreach_function(function, shader) {
> @@ -462,7 +578,54 @@ etna_assign_registers(nir_shader *shader)
> }
> }
>
> - struct ra_graph *g = ra_alloc_interference_graph(regs, count);
> + int num_nodes = count;
> +
> + /* Add space for one dummy node, to grab the position register */
> + if (shader->info.stage == MESA_SHADER_FRAGMENT)
> + num_nodes++;
> +
> + struct ra_graph *g = ra_alloc_interference_graph(regs, num_nodes);
> +
> + /* Assign nodes to the appropriate register class */
> + for (i = 0; i < count; i++) {
> + bool can_use_virt = list_empty(&ssa_defs[i]->if_uses);
> + can_use_virt &= ssa_defs[i]->parent_instr->type == nir_instr_type_alu;
> + if (can_use_virt) {
> + nir_foreach_use(use_src, ssa_defs[i]) {
> + if (use_src->parent_instr->type != nir_instr_type_alu) {
> + can_use_virt = false;
> + break;
> + }
> + /* These instructions are scalar and only read src.x */
> + nir_alu_instr *alu = nir_instr_as_alu(use_src->parent_instr);
> + if (alu->op == nir_op_fexp2 ||
> + alu->op == nir_op_flog2) {
> + can_use_virt = false;
> + break;
> + }
> + }
> + }
> +
> + /* Only choose virtual registers if all uses can be swizzled */
> + if (can_use_virt && ssa_defs[i]->num_components == 1)
> + ra_set_node_class(g, i, ETNA_REG_CLASS_VIRT_SCALAR);
> + else if (can_use_virt && ssa_defs[i]->num_components == 2)
> + ra_set_node_class(g, i, ETNA_REG_CLASS_VIRT_VEC2);
> + else if (can_use_virt && ssa_defs[i]->num_components == 3)
> + ra_set_node_class(g, i, ETNA_REG_CLASS_VIRT_VEC3);
> + else
> + ra_set_node_class(g, i, ETNA_REG_CLASS_VEC4);
> + }
> +
> + /* Prevent writes to the position register (temporary register 0) by
> + * assigning it to a dummy node that interferes with all other nodes.
> + */
> + if (shader->info.stage == MESA_SHADER_FRAGMENT) {
> + ra_set_node_class(g, num_nodes - 1, ETNA_REG_CLASS_VEC4);
> + ra_set_node_reg(g, num_nodes - 1, 0);
> + for (int i = 0; i < count; i++)
> + ra_add_node_interference(g, i, num_nodes - 1);
> + }
>
> /* Collect SSA interference information and force input loads to
> * the correct registers in the fragment shader.
> @@ -491,7 +654,7 @@ etna_assign_registers(nir_shader *shader)
>
> assert(offset == 0);
>
> - ra_set_node_reg(g, i, base);
> + ra_set_node_reg(g, i, base * ETNA_NUM_REG_TYPES);
> }
> }
>
> @@ -517,7 +680,7 @@ etna_assign_registers(nir_shader *shader)
> /* Find the replaceable SSA used as source */
> for (i = 0; i < count; i++) {
> if (ssa_defs[i] == intr->src[0].ssa)
> - ra_set_node_reg(g, i, base);
> + ra_set_node_reg(g, i, base * ETNA_NUM_REG_TYPES);
> }
> }
> }
> @@ -530,19 +693,92 @@ etna_assign_registers(nir_shader *shader)
> /* Replace SSA assignments with allocated registers */
> for (i = 0; i < count; i++) {
> int r = ra_get_node_reg(g, i);
> - nir_register *reg = etna_ensure_temporary(shader, r);
> + nir_register *reg = etna_ensure_register(shader, etna_reg_get_base(r));
> nir_ssa_def *ssa = ssa_defs[i];
>
> - nir_ssa_def_rewrite_uses(ssa, nir_src_for_reg(reg));
> + /* Rewrite uses */
> + if (etna_reg_get_type(r) == ETNA_REG_TYPE_VEC4) {
> + nir_ssa_def_rewrite_uses(ssa, nir_src_for_reg(reg));
> + } else {
> + nir_src new_src = nir_src_for_reg(reg);
> + nir_foreach_use_safe(use_src, ssa) {
> + static const unsigned reswizzle[ETNA_NUM_REG_TYPES][4] = {
> + { 0, 1, 2, 3 }, /* XYZW */
> + { 0, 1, 2, 2 }, /* XYZ */
> + { 0, 1, 3, 3 }, /* XYW */
> + { 0, 2, 3, 3 }, /* XZW */
> + { 1, 2, 3, 3 }, /* YZW */
> + { 0, 1, 1, 1 }, /* XY */
> + { 0, 2, 2, 2 }, /* XZ */
> + { 0, 3, 3, 3 }, /* XW */
> + { 1, 2, 2, 2 }, /* YZ */
> + { 1, 3, 3, 3 }, /* YW */
> + { 2, 3, 3, 3 }, /* ZW */
> + { 0, 0, 0, 0 }, /* X */
> + { 1, 1, 1 ,1 }, /* Y */
> + { 2, 2, 2, 2 }, /* Z */
> + { 3, 3, 3, 3 }, /* W */
> + };
> + nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src);
> + nir_alu_src *alu_src = container_of(use_src, alu_src, src);
> + int t = etna_reg_get_type(r);
> + alu_src->swizzle[0] = reswizzle[t][alu_src->swizzle[0]];
> + alu_src->swizzle[1] = reswizzle[t][alu_src->swizzle[1]];
> + alu_src->swizzle[2] = reswizzle[t][alu_src->swizzle[2]];
> + alu_src->swizzle[3] = reswizzle[t][alu_src->swizzle[3]];
> + }
> + }
> +
> assert(list_empty(&ssa->uses) && list_empty(&ssa->if_uses));
>
> nir_instr *instr = ssa->parent_instr;
>
> + /* Rewrite destination */
> if (instr->type == nir_instr_type_alu) {
> nir_alu_instr *alu = nir_instr_as_alu(instr);
>
> nir_instr_rewrite_dest(&alu->instr, &alu->dest.dest,
> nir_dest_for_reg(reg));
> + int t = etna_reg_get_type(r);
> + alu->dest.write_mask = etna_reg_writemask[t];
> + /* The dot product instructions broadcast their result to all
> + * destination components. There is no need to reswizzle their
> + * sources here.
> + */
> + if (alu->op != nir_op_fdot2 &&
> + alu->op != nir_op_fdot3 &&
> + alu->op != nir_op_fdot4) {
> + unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
> + for (unsigned i = 0; i < num_srcs; i++) {
> + static const unsigned reswizzle[ETNA_NUM_REG_TYPES][4] = {
> + { 0, 1, 2, 3 }, /* XYZW */
> + { 0, 1, 2, 2 }, /* XYZ */
> + { 0, 1, 1, 2 }, /* XYW */
> + { 0, 0, 1, 2 }, /* XZW */
> + { 0, 0, 1, 2 }, /* YZW */
> + { 0, 1, 1, 1 }, /* XY */
> + { 0, 0, 1, 1 }, /* XZ */
> + { 0, 0, 0, 1 }, /* XW */
> + { 0, 0, 1, 1 }, /* YZ */
> + { 0, 0, 0, 1 }, /* YW */
> + { 0, 0, 0, 1 }, /* ZW */
> + { 0, 0, 0, 0 }, /* X */
> + { 0, 0, 0, 0 }, /* Y */
> + { 0, 0, 0, 0 }, /* Z */
> + { 0, 0, 0, 0 }, /* W */
> + };
> + nir_alu_src *alu_src = &alu->src[i];
> + uint8_t swizzle[4];
> + swizzle[0] = alu_src->swizzle[0];
> + swizzle[1] = alu_src->swizzle[1];
> + swizzle[2] = alu_src->swizzle[2];
> + swizzle[3] = alu_src->swizzle[3];
> + alu_src->swizzle[0] = swizzle[reswizzle[t][0]];
> + alu_src->swizzle[1] = swizzle[reswizzle[t][1]];
> + alu_src->swizzle[2] = swizzle[reswizzle[t][2]];
> + alu_src->swizzle[3] = swizzle[reswizzle[t][3]];
> + }
> + }
> } else if (instr->type == nir_instr_type_tex) {
> nir_tex_instr *tex = nir_instr_as_tex(instr);
>
> --
> 2.17.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the etnaviv
mailing list