[Mesa-dev] [PATCH v2] nv50/ir: rework the madsp subop handling

Sat Dec 2 14:39:22 UTC 2017

Hi,

comments down below...

Greetings,

Tobias

On 12/2/17 12:34 PM, Karol Herbst wrote:
> From: Karol Herbst <kherbst at redhat.com>
>
> Creating correct SubOps for OP_MADSP wasn't easy, because devs needed to know
> the proper values for each data type. Also the third source doesn't know any
> explicit signess and the current code tried to work around this fact.
>
> To make the creating of the subops much easier, this commit introduces a helper
> macro, which can be used in an intuitive way and eliminates the need to know
> the correct numbers for creating such subops.
>
> Values for src0 and src1 kept their meaning, src2 changed significantly:
>
> Before:
> bit 0 changed the sign of src0
> bit 1 changed the sign of src1
> bits 2-3 choosed the data type out of 32, 24 and 16L
>
> Now:
> bits 0-1 choose the data type out of 32, 24 and 16L
>
> This change also lead to an additional << 2 shift in the emmiter for src2
>
> No regressions in piglit on NVE4
>
> v2: fixed mistake in nvc0 emiter
>
> Signed-off-by: Karol Herbst <kherbst at redhat.com>
> ---
>   src/gallium/drivers/nouveau/codegen/nv50_ir.h      | 24 +++++++++++++++++-----
>   .../drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp |  8 +++-----
>   .../drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp  |  8 +++-----
>   .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp      | 10 ++++-----
>   4 files changed, 30 insertions(+), 20 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
> index bc15992df0..10b872e0dd 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
> @@ -239,11 +239,25 @@ enum operation
>   #define NV50_IR_SUBOP_LOAD_LOCKED    1
>   #define NV50_IR_SUBOP_STORE_UNLOCKED 2
>   #define NV50_IR_SUBOP_MADSP_SD     0xffff
> -// Yes, we could represent those with DataType.
> -// Or put the type into operation and have a couple 1000 values in that enum.
> -// This will have to do for now.
> -// The bitfields are supposed to correspond to nve4 ISA.
> -#define NV50_IR_SUBOP_MADSP(a,b,c) (((c) << 8) | ((b) << 4) | (a))
> +#define NV50_IR_SUBOP_MADSP_SRC0_U32  0x000
> +#define NV50_IR_SUBOP_MADSP_SRC0_S32  0x001
> +#define NV50_IR_SUBOP_MADSP_SRC0_U24  0x002
> +#define NV50_IR_SUBOP_MADSP_SRC0_S24  0x003
> +#define NV50_IR_SUBOP_MADSP_SRC0_U16L 0x004
> +#define NV50_IR_SUBOP_MADSP_SRC0_S16L 0x005
> +#define NV50_IR_SUBOP_MADSP_SRC0_U16H 0x006
> +#define NV50_IR_SUBOP_MADSP_SRC0_S16H 0x007
> +#define NV50_IR_SUBOP_MADSP_SRC1_U24  0x000
> +#define NV50_IR_SUBOP_MADSP_SRC1_S24  0x010
> +#define NV50_IR_SUBOP_MADSP_SRC1_U16L 0x020
> +#define NV50_IR_SUBOP_MADSP_SRC1_S16L 0x030
> +#define NV50_IR_SUBOP_MADSP_SRC2_32   0x000
> +#define NV50_IR_SUBOP_MADSP_SRC2_24   0x100
> +#define NV50_IR_SUBOP_MADSP_SRC2_16L  0x200
> +#define NV50_IR_SUBOP_MADSP_TUPLE(a, b, c) \

call this one _3TUPLE ?

> +   ( NV50_IR_SUBOP_MADSP_SRC0_ ## a | \
> +     NV50_IR_SUBOP_MADSP_SRC1_ ## b | \
> +     NV50_IR_SUBOP_MADSP_SRC2_ ## c )
>   #define NV50_IR_SUBOP_V1(d,a,b)    (((d) << 10) | ((b) << 5) | (a) | 0x0000)
>   #define NV50_IR_SUBOP_V2(d,a,b)    (((d) << 10) | ((b) << 5) | (a) | 0x4000)
>   #define NV50_IR_SUBOP_V4(d,a,b)    (((d) << 10) | ((b) << 5) | (a) | 0x8000)
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
> index 370427d0d1..f9c34a2760 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
> @@ -556,11 +556,9 @@ CodeEmitterGK110::emitMADSP(const Instruction *i)
>      if (i->subOp == NV50_IR_SUBOP_MADSP_SD) {
>         code[1] |= 0x00c00000;
>      } else {
> -      code[1] |= (i->subOp & 0x00f) << 19; // imadp1
> -      code[1] |= (i->subOp & 0x0f0) << 20; // imadp2
> -      code[1] |= (i->subOp & 0x100) << 11; // imadp3
> -      code[1] |= (i->subOp & 0x200) << 15; // imadp3
> -      code[1] |= (i->subOp & 0xc00) << 12; // imadp3
> +      code[1] |= (i->subOp & 0x007) << 19; // src0
> +      code[1] |= (i->subOp & 0x030) << 20; // src1
> +      code[1] |= (i->subOp & 0x300) << 14; // src2
>      }
>   
>      if (i->flagsDef >= 0)
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
> index 58594f02c7..38080f04d5 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
> @@ -826,11 +826,9 @@ CodeEmitterNVC0::emitMADSP(const Instruction *i)
>      if (i->subOp == NV50_IR_SUBOP_MADSP_SD) {
>         code[1] |= 0x01800000;
>      } else {
> -      code[0] |= (i->subOp & 0x00f) << 7;
> -      code[0] |= (i->subOp & 0x0f0) << 1;
> -      code[0] |= (i->subOp & 0x100) >> 3;
> -      code[0] |= (i->subOp & 0x200) >> 2;
> -      code[1] |= (i->subOp & 0xc00) << 13;
> +      code[0] |= (i->subOp & 0x007) << 7;
> +      code[0] |= (i->subOp & 0x030) << 1;
> +      code[1] |= (i->subOp & 0x300) << 15;
>      }
>   
>      if (i->flagsDef >= 0)
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> index 7243b1d2e4..d0eff75bef 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> @@ -1893,17 +1893,17 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su)
>      if (dim == 3) {
>         v = loadSuInfo32(ind, slot, NVC0_SU_INFO_UNK1C);
>         bld.mkOp3(OP_MADSP, TYPE_U32, off, src[2], v, src[1])
> -         ->subOp = NV50_IR_SUBOP_MADSP(4,2,8); // u16l u16l u16l
> +         ->subOp = NV50_IR_SUBOP_MADSP_TUPLE(U16L, U16L, 16L);
>   
>         v = loadSuInfo32(ind, slot, NVC0_SU_INFO_PITCH);
>         bld.mkOp3(OP_MADSP, TYPE_U32, off, off, v, src[0])
> -         ->subOp = NV50_IR_SUBOP_MADSP(0,2,8); // u32 u16l u16l
> +         ->subOp = NV50_IR_SUBOP_MADSP_TUPLE(U32, U16L, 16L);
>      } else {
>         assert(dim == 2);
>         v = loadSuInfo32(ind, slot, NVC0_SU_INFO_PITCH);
>         bld.mkOp3(OP_MADSP, TYPE_U32, off, src[1], v, src[0])
>            ->subOp = (su->tex.target.isArray() || su->tex.target.isCube()) ?
> -         NV50_IR_SUBOP_MADSP_SD : NV50_IR_SUBOP_MADSP(4,2,8); // u16l u16l u16l
> +         NV50_IR_SUBOP_MADSP_SD : NV50_IR_SUBOP_MADSP_TUPLE(U16L, U16L, 16L);
>      }
>   
>      // calculate effective address part 1
> @@ -1955,10 +1955,10 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su)
>         v = loadSuInfo32(ind, slot, NVC0_SU_INFO_ARRAY);
>         if (dim == 1)
>            bld.mkOp3(OP_MADSP, TYPE_U32, eau, src[1], v, eau)
> -            ->subOp = NV50_IR_SUBOP_MADSP(4,0,0); // u16 u24 u32
> +            ->subOp = NV50_IR_SUBOP_MADSP_TUPLE(U16L, U24, 32);

missing an U up here, so (U16L, U24, U32) seems the way to go

>         else
>            bld.mkOp3(OP_MADSP, TYPE_U32, eau, v, src[2], eau)
> -            ->subOp = NV50_IR_SUBOP_MADSP(0,0,0); // u32 u24 u32
> +            ->subOp = NV50_IR_SUBOP_MADSP_TUPLE(U32, U24, 32);

same as above

>         // combine predicates
>         assert(p1);
>         bld.mkOp2(OP_OR, TYPE_U8, pred, pred, p1);