[Mesa-dev] [PATCH v2] nv50/ir: rework the madsp subop handling
Tobias Klausmann
tobias.johannes.klausmann at mni.thm.de
Sat Dec 2 14:39:22 UTC 2017
Hi,
comments down below...
Greetings,
Tobias
On 12/2/17 12:34 PM, Karol Herbst wrote:
> From: Karol Herbst <kherbst at redhat.com>
>
> Creating correct SubOps for OP_MADSP wasn't easy, because devs needed to know
> the proper values for each data type. Also the third source doesn't know any
> explicit signess and the current code tried to work around this fact.
>
> To make the creating of the subops much easier, this commit introduces a helper
> macro, which can be used in an intuitive way and eliminates the need to know
> the correct numbers for creating such subops.
>
> Values for src0 and src1 kept their meaning, src2 changed significantly:
>
> Before:
> bit 0 changed the sign of src0
> bit 1 changed the sign of src1
> bits 2-3 choosed the data type out of 32, 24 and 16L
>
> Now:
> bits 0-1 choose the data type out of 32, 24 and 16L
>
> This change also lead to an additional << 2 shift in the emmiter for src2
>
> No regressions in piglit on NVE4
>
> v2: fixed mistake in nvc0 emiter
>
> Signed-off-by: Karol Herbst <kherbst at redhat.com>
> ---
> src/gallium/drivers/nouveau/codegen/nv50_ir.h | 24 +++++++++++++++++-----
> .../drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp | 8 +++-----
> .../drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp | 8 +++-----
> .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 10 ++++-----
> 4 files changed, 30 insertions(+), 20 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
> index bc15992df0..10b872e0dd 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
> @@ -239,11 +239,25 @@ enum operation
> #define NV50_IR_SUBOP_LOAD_LOCKED 1
> #define NV50_IR_SUBOP_STORE_UNLOCKED 2
> #define NV50_IR_SUBOP_MADSP_SD 0xffff
> -// Yes, we could represent those with DataType.
> -// Or put the type into operation and have a couple 1000 values in that enum.
> -// This will have to do for now.
> -// The bitfields are supposed to correspond to nve4 ISA.
> -#define NV50_IR_SUBOP_MADSP(a,b,c) (((c) << 8) | ((b) << 4) | (a))
> +#define NV50_IR_SUBOP_MADSP_SRC0_U32 0x000
> +#define NV50_IR_SUBOP_MADSP_SRC0_S32 0x001
> +#define NV50_IR_SUBOP_MADSP_SRC0_U24 0x002
> +#define NV50_IR_SUBOP_MADSP_SRC0_S24 0x003
> +#define NV50_IR_SUBOP_MADSP_SRC0_U16L 0x004
> +#define NV50_IR_SUBOP_MADSP_SRC0_S16L 0x005
> +#define NV50_IR_SUBOP_MADSP_SRC0_U16H 0x006
> +#define NV50_IR_SUBOP_MADSP_SRC0_S16H 0x007
> +#define NV50_IR_SUBOP_MADSP_SRC1_U24 0x000
> +#define NV50_IR_SUBOP_MADSP_SRC1_S24 0x010
> +#define NV50_IR_SUBOP_MADSP_SRC1_U16L 0x020
> +#define NV50_IR_SUBOP_MADSP_SRC1_S16L 0x030
> +#define NV50_IR_SUBOP_MADSP_SRC2_32 0x000
> +#define NV50_IR_SUBOP_MADSP_SRC2_24 0x100
> +#define NV50_IR_SUBOP_MADSP_SRC2_16L 0x200
> +#define NV50_IR_SUBOP_MADSP_TUPLE(a, b, c) \
call this one _3TUPLE ?
> + ( NV50_IR_SUBOP_MADSP_SRC0_ ## a | \
> + NV50_IR_SUBOP_MADSP_SRC1_ ## b | \
> + NV50_IR_SUBOP_MADSP_SRC2_ ## c )
> #define NV50_IR_SUBOP_V1(d,a,b) (((d) << 10) | ((b) << 5) | (a) | 0x0000)
> #define NV50_IR_SUBOP_V2(d,a,b) (((d) << 10) | ((b) << 5) | (a) | 0x4000)
> #define NV50_IR_SUBOP_V4(d,a,b) (((d) << 10) | ((b) << 5) | (a) | 0x8000)
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
> index 370427d0d1..f9c34a2760 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
> @@ -556,11 +556,9 @@ CodeEmitterGK110::emitMADSP(const Instruction *i)
> if (i->subOp == NV50_IR_SUBOP_MADSP_SD) {
> code[1] |= 0x00c00000;
> } else {
> - code[1] |= (i->subOp & 0x00f) << 19; // imadp1
> - code[1] |= (i->subOp & 0x0f0) << 20; // imadp2
> - code[1] |= (i->subOp & 0x100) << 11; // imadp3
> - code[1] |= (i->subOp & 0x200) << 15; // imadp3
> - code[1] |= (i->subOp & 0xc00) << 12; // imadp3
> + code[1] |= (i->subOp & 0x007) << 19; // src0
> + code[1] |= (i->subOp & 0x030) << 20; // src1
> + code[1] |= (i->subOp & 0x300) << 14; // src2
> }
>
> if (i->flagsDef >= 0)
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
> index 58594f02c7..38080f04d5 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
> @@ -826,11 +826,9 @@ CodeEmitterNVC0::emitMADSP(const Instruction *i)
> if (i->subOp == NV50_IR_SUBOP_MADSP_SD) {
> code[1] |= 0x01800000;
> } else {
> - code[0] |= (i->subOp & 0x00f) << 7;
> - code[0] |= (i->subOp & 0x0f0) << 1;
> - code[0] |= (i->subOp & 0x100) >> 3;
> - code[0] |= (i->subOp & 0x200) >> 2;
> - code[1] |= (i->subOp & 0xc00) << 13;
> + code[0] |= (i->subOp & 0x007) << 7;
> + code[0] |= (i->subOp & 0x030) << 1;
> + code[1] |= (i->subOp & 0x300) << 15;
> }
>
> if (i->flagsDef >= 0)
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> index 7243b1d2e4..d0eff75bef 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> @@ -1893,17 +1893,17 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su)
> if (dim == 3) {
> v = loadSuInfo32(ind, slot, NVC0_SU_INFO_UNK1C);
> bld.mkOp3(OP_MADSP, TYPE_U32, off, src[2], v, src[1])
> - ->subOp = NV50_IR_SUBOP_MADSP(4,2,8); // u16l u16l u16l
> + ->subOp = NV50_IR_SUBOP_MADSP_TUPLE(U16L, U16L, 16L);
>
> v = loadSuInfo32(ind, slot, NVC0_SU_INFO_PITCH);
> bld.mkOp3(OP_MADSP, TYPE_U32, off, off, v, src[0])
> - ->subOp = NV50_IR_SUBOP_MADSP(0,2,8); // u32 u16l u16l
> + ->subOp = NV50_IR_SUBOP_MADSP_TUPLE(U32, U16L, 16L);
> } else {
> assert(dim == 2);
> v = loadSuInfo32(ind, slot, NVC0_SU_INFO_PITCH);
> bld.mkOp3(OP_MADSP, TYPE_U32, off, src[1], v, src[0])
> ->subOp = (su->tex.target.isArray() || su->tex.target.isCube()) ?
> - NV50_IR_SUBOP_MADSP_SD : NV50_IR_SUBOP_MADSP(4,2,8); // u16l u16l u16l
> + NV50_IR_SUBOP_MADSP_SD : NV50_IR_SUBOP_MADSP_TUPLE(U16L, U16L, 16L);
> }
>
> // calculate effective address part 1
> @@ -1955,10 +1955,10 @@ NVC0LoweringPass::processSurfaceCoordsNVE4(TexInstruction *su)
> v = loadSuInfo32(ind, slot, NVC0_SU_INFO_ARRAY);
> if (dim == 1)
> bld.mkOp3(OP_MADSP, TYPE_U32, eau, src[1], v, eau)
> - ->subOp = NV50_IR_SUBOP_MADSP(4,0,0); // u16 u24 u32
> + ->subOp = NV50_IR_SUBOP_MADSP_TUPLE(U16L, U24, 32);
missing an U up here, so (U16L, U24, U32) seems the way to go
> else
> bld.mkOp3(OP_MADSP, TYPE_U32, eau, v, src[2], eau)
> - ->subOp = NV50_IR_SUBOP_MADSP(0,0,0); // u32 u24 u32
> + ->subOp = NV50_IR_SUBOP_MADSP_TUPLE(U32, U24, 32);
same as above
> // combine predicates
> assert(p1);
> bld.mkOp2(OP_OR, TYPE_U8, pred, pred, p1);
More information about the mesa-dev
mailing list