[Nouveau] [PATCH] gm107/ir: use lane 0 for manual textureGrad handling
Karol Herbst
kherbst at redhat.com
Thu Dec 21 08:50:39 UTC 2017
On Wed, Dec 20, 2017 at 3:44 PM, Ilia Mirkin <imirkin at alum.mit.edu> wrote:
> On Tue, Dec 19, 2017 at 11:41 PM, Ilia Mirkin <imirkin at alum.mit.edu> wrote:
>> This is parallel to the pre-SM50 change which does this. Adjusts the
>> shuffles / quadops to make the values correct relative to lane 0, and
>> then splat the results to all lanes for the final move into the target
>> register.
>>
>> Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
>> ---
>>
>> Entirely untested beyond compilation. Should check
>>
>> bin/tex-miplevel-selection textureGrad Cube
>> bin/tex-miplevel-selection textureGrad CubeShadow
>> bin/tex-miplevel-selection textureGrad CubeArray
>> KHR-GL45.texture_cube_map_array.sampling
>>
>> to see if they start passing with this change.
>>
>> .../nouveau/codegen/nv50_ir_lowering_gm107.cpp | 56 ++++++++++++++--------
>> 1 file changed, 35 insertions(+), 21 deletions(-)
>>
>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
>> index 6b9edd48645..a2427526a81 100644
>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
>> @@ -95,18 +95,15 @@ GM107LegalizeSSA::visit(Instruction *i)
>> bool
>> GM107LoweringPass::handleManualTXD(TexInstruction *i)
>> {
>> - static const uint8_t qOps[4][2] =
>> - {
>> - { QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(MOV2, MOV2, ADD, ADD) }, // l0
>> - { QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(MOV2, MOV2, ADD, ADD) }, // l1
>> - { QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l2
>> - { QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l3
>> - };
>> + // See NVC0LoweringPass::handleManualTXD for rationale. This function
>> + // implements the same logic, but using SM50-friendly primitives.
>> + static const uint8_t qOps[2] =
>> + { QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(MOV2, MOV2, ADD, ADD) };
>> Value *def[4][4];
>> - Value *crd[3];
>> + Value *crd[3], *arr, *shadow;
>> Value *tmp;
>> Instruction *tex, *add;
>> - Value *zero = bld.loadImm(bld.getSSA(), 0);
>> + Value *quad = bld.mkImm(SHFL_BOUND_QUAD);
>> int l, c;
>> const int dim = i->tex.target.getDim() + i->tex.target.isCube();
>> const int array = i->tex.target.isArray();
>> @@ -115,35 +112,40 @@ GM107LoweringPass::handleManualTXD(TexInstruction *i)
>>
>> for (c = 0; c < dim; ++c)
>> crd[c] = bld.getScratch();
>> + arr = bld.getScratch();
>> + shadow = bld.getScratch();
>> tmp = bld.getScratch();
>>
>> for (l = 0; l < 4; ++l) {
>> Value *src[3], *val;
>> - // mov coordinates from lane l to all lanes
>> + Value *lane = bld.mkImm(l);
>> bld.mkOp(OP_QUADON, TYPE_NONE, NULL);
>> + // Make sure lane 0 has the appropriate array/depth compare values
>> + if (l != 0) {
>> + if (array)
>> + bld.mkOp3(OP_SHFL, TYPE_F32, arr, i->getSrc(0), lane, quad);
>> + if (i->tex.target.isShadow())
>> + bld.mkOp3(OP_SHFL, TYPE_F32, shadow, i->getSrc(array + dim), lane, quad);
>
> In the great argument switcheroo between each SM version, the shadow
> compare is actually after the indirect handle (which in turn is after
> array + dim). So this should become array + dim + indirect (and
> similarly below).
>
>> + }
>> +
>> + // mov coordinates from lane l to all lanes
>> for (c = 0; c < dim; ++c) {
>> - bld.mkOp3(OP_SHFL, TYPE_F32, crd[c], i->getSrc(c + array),
>> - bld.mkImm(l), bld.mkImm(SHFL_BOUND_QUAD));
>> - add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], crd[c], zero);
>> - add->subOp = 0x00;
>> - add->lanes = 1; /* abused for .ndv */
>> + bld.mkOp3(OP_SHFL, TYPE_F32, crd[c], i->getSrc(c + array), lane, quad);
>> }
>>
>> // add dPdx from lane l to lanes dx
>> for (c = 0; c < dim; ++c) {
>> - bld.mkOp3(OP_SHFL, TYPE_F32, tmp, i->dPdx[c].get(), bld.mkImm(l),
>> - bld.mkImm(SHFL_BOUND_QUAD));
>> + bld.mkOp3(OP_SHFL, TYPE_F32, tmp, i->dPdx[c].get(), lane, quad);
>> add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], tmp, crd[c]);
>> - add->subOp = qOps[l][0];
>> + add->subOp = qOps[0];
>> add->lanes = 1; /* abused for .ndv */
>> }
>>
>> // add dPdy from lane l to lanes dy
>> for (c = 0; c < dim; ++c) {
>> - bld.mkOp3(OP_SHFL, TYPE_F32, tmp, i->dPdy[c].get(), bld.mkImm(l),
>> - bld.mkImm(SHFL_BOUND_QUAD));
>> + bld.mkOp3(OP_SHFL, TYPE_F32, tmp, i->dPdy[c].get(), lane, quad);
>> add = bld.mkOp2(OP_QUADOP, TYPE_F32, crd[c], tmp, crd[c]);
>> - add->subOp = qOps[l][1];
>> + add->subOp = qOps[1];
>> add->lanes = 1; /* abused for .ndv */
>> }
>>
>> @@ -164,8 +166,20 @@ GM107LoweringPass::handleManualTXD(TexInstruction *i)
>>
>> // texture
>> bld.insert(tex = cloneForward(func, i));
>> + if (l != 0) {
>> + if (array)
>> + tex->setSrc(0, arr);
>> + if (i->tex.target.isShadow())
>> + tex->setSrc(array + dim, shadow);
>> + }
>> for (c = 0; c < dim; ++c)
>> tex->setSrc(c + array, src[c]);
>> + // broadcast results from lane 0 to all lanes
>> + if (l != 0) {
>> + Value *lane = bld.mkImm(l);
>
> This should of course be bld.mkImm(0), not l, since we're broadcasting
> from lane *0* to all lanes.
>
> These are all fixed up in
> https://github.com/imirkin/mesa/commit/618b99d86396417e31551dc464ab2ca5d038151f
>
I did a piglit './piglit run -x glx -x egl -x streaming-texture-leak
-x max-texture-size tests/gpu.py' on a GP107 and the three tests pass
now as you said.
In addition to that the CTS one and
'spec at arb_shader_texture_lod@execution at tex-miplevel-selection *gradarb
cube' pass as well.
So this is Tested-By: Karol Herbst <kherbst at redhat.com>
>> + for (c = 0; i->defExists(c); ++c)
>> + bld.mkOp3(OP_SHFL, TYPE_F32, tex->getDef(c), tex->getDef(c), lane, quad);
>> + }
>> bld.mkOp(OP_QUADPOP, TYPE_NONE, NULL);
>>
>> // save results
>> --
>> 2.13.6
>>
> _______________________________________________
> Nouveau mailing list
> Nouveau at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/nouveau
More information about the Nouveau
mailing list