[Mesa-dev] [PATCH 7/7] nvc0: enable FBFETCH with a special slot for color buffer 0
Samuel Pitoiset
samuel.pitoiset at gmail.com
Thu Jan 5 17:22:27 UTC 2017
Would be nice to test on Maxwell as well.
More comments inline.
Thanks.
On 01/02/2017 07:01 AM, Ilia Mirkin wrote:
> We don't need to support all the color buffers for advanced blend, just
> cb0. For Fermi, we use the special binding slots so that we don't
> overlap with user textures, while Kepler+ gets a dedicated position for
> the fb handle in the driver constbuf.
>
> This logic is only triggered when a FBREAD is actually present so it
> should be a no-op most of the time.
>
> Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
> ---
>
> Firstly, I'm not 100% sure that the default_tsc is needed. I had temporary
> failures with srgb decoding that went away on later runs that I was unable
> to reproduce even by forcing it to have srgb conversion disabled in the
> sampler. And TXF shouldn't need samplers. However I remember a weird situation
> where TXF was producing failures without a sampler bound.
>
> Secondly this needs to get testing on Fermi. All the dEQP advanced blend
> tests pass on Kepler though.
>
> docs/features.txt | 2 +-
> docs/relnotes/13.1.0.html | 1 +
> .../drivers/nouveau/codegen/nv50_ir_driver.h | 2 +
> .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 41 ++++++++++
> .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 21 +++--
> src/gallium/drivers/nouveau/nvc0/nvc0_context.h | 4 +
> src/gallium/drivers/nouveau/nvc0/nvc0_program.c | 6 ++
> src/gallium/drivers/nouveau/nvc0/nvc0_program.h | 1 +
> src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 6 +-
> src/gallium/drivers/nouveau/nvc0/nvc0_screen.h | 2 +
> .../drivers/nouveau/nvc0/nvc0_state_validate.c | 92 +++++++++++++++++++++-
> 11 files changed, 170 insertions(+), 8 deletions(-)
>
> diff --git a/docs/features.txt b/docs/features.txt
> index c27d521..60cb68c 100644
> --- a/docs/features.txt
> +++ b/docs/features.txt
> @@ -253,7 +253,7 @@ GLES3.1, GLSL ES 3.1 -- all DONE: i965/hsw+, nvc0, radeonsi
> GLES3.2, GLSL ES 3.2 -- all DONE: i965/gen9+
>
> GL_EXT_color_buffer_float DONE (all drivers)
> - GL_KHR_blend_equation_advanced DONE (i965)
> + GL_KHR_blend_equation_advanced DONE (i965, nvc0)
> GL_KHR_debug DONE (all drivers)
> GL_KHR_robustness DONE (i965, nvc0, radeonsi)
> GL_KHR_texture_compression_astc_ldr DONE (i965/gen9+)
> diff --git a/docs/relnotes/13.1.0.html b/docs/relnotes/13.1.0.html
> index 4dce843..be2c206 100644
> --- a/docs/relnotes/13.1.0.html
> +++ b/docs/relnotes/13.1.0.html
> @@ -45,6 +45,7 @@ Note: some of the new features are only available with certain drivers.
>
> <ul>
> <li>GL_ARB_post_depth_coverage on i965/gen9+</li>
> +<li>GL_KHR_blend_equation_advanced on nvc0</li>
> <li>GL_INTEL_conservative_rasterization on i965/gen9+</li>
> <li>GL_NV_image_formats on any driver supporting GL_ARB_shader_image_load_store (i965, nvc0, radeonsi, softpipe)</li>
> </ul>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
> index 9fdabcc..fbb692d 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
> @@ -146,6 +146,7 @@ struct nv50_ir_prog_info
> bool usesDiscard;
> bool persampleInvocation;
> bool usesSampleMaskIn;
> + bool readsFramebuffer;
> } fp;
> struct {
> uint32_t inputOffset; /* base address for user args */
> @@ -179,6 +180,7 @@ struct nv50_ir_prog_info
> bool nv50styleSurfaces; /* generate gX[] access for raw buffers */
> bool halfPixelCenter; /* externally set half pixel center state */
> uint16_t texBindBase; /* base address for tex handles (nve4) */
> + uint16_t fbtexBindBase; /* base address for fbtex handle (nve4) */
> uint16_t suInfoBase; /* base address for surface info (nve4) */
> uint16_t bufInfoBase; /* base address for buffer info */
> uint16_t sampleInfoBase; /* base address for sample positions */
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
> index d4e29a0..84d04d1 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
> @@ -1460,6 +1460,9 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
> if (insn.getOpcode() == TGSI_OPCODE_BARRIER)
> info->numBarriers = 1;
>
> + if (insn.getOpcode() == TGSI_OPCODE_FBFETCH)
> + info->prop.fp.readsFramebuffer = true;
> +
> if (insn.dstCount()) {
> Instruction::DstRegister dst = insn.getDst(0);
>
> @@ -1575,6 +1578,7 @@ private:
> void handleTEX(Value *dst0[4], int R, int S, int L, int C, int Dx, int Dy);
> void handleTXF(Value *dst0[4], int R, int L_M);
> void handleTXQ(Value *dst0[4], enum TexQuery, int R);
> + void handleFBFETCH(Value *dst0[4]);
> void handleLIT(Value *dst0[4]);
> void handleUserClipPlanes();
>
> @@ -2294,6 +2298,40 @@ Converter::handleTXF(Value *dst[4], int R, int L_M)
> }
>
> void
> +Converter::handleFBFETCH(Value *dst[4])
> +{
> + TexInstruction *texi = new_TexInstruction(func, OP_TXF);
> + unsigned int c, d;
> +
> + texi->tex.target = TEX_TARGET_2D_MS_ARRAY;
> + texi->tex.levelZero = 1;
> + texi->tex.useOffsets = 0;
> +
> + for (c = 0, d = 0; c < 4; ++c) {
> + if (dst[c]) {
> + texi->setDef(d++, dst[c]);
> + texi->tex.mask |= 1 << c;
> + }
> + }
> +
> + Value *x = mkOp1v(OP_RDSV, TYPE_F32, getScratch(), mkSysVal(SV_POSITION, 0));
> + Value *y = mkOp1v(OP_RDSV, TYPE_F32, getScratch(), mkSysVal(SV_POSITION, 1));
> + Value *z = mkOp1v(OP_RDSV, TYPE_U32, getScratch(), mkSysVal(SV_LAYER, 0));
> + Value *ms = mkOp1v(OP_RDSV, TYPE_U32, getScratch(), mkSysVal(SV_SAMPLE_INDEX, 0));
> +
> + mkCvt(OP_CVT, TYPE_U32, x, TYPE_F32, x)->rnd = ROUND_Z;
> + mkCvt(OP_CVT, TYPE_U32, y, TYPE_F32, y)->rnd = ROUND_Z;
> + texi->setSrc(0, x);
> + texi->setSrc(1, y);
> + texi->setSrc(2, z);
> + texi->setSrc(3, ms);
> +
> + texi->tex.r = texi->tex.s = -1;
> +
> + bb->insertTail(texi);
> +}
> +
> +void
> Converter::handleLIT(Value *dst0[4])
> {
> Value *val0 = NULL;
> @@ -3336,6 +3374,9 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
> handleTXQ(dst0, TXQ_TYPE, 0);
> std::swap(dst0[0], dst0[2]);
> break;
> + case TGSI_OPCODE_FBFETCH:
> + handleFBFETCH(dst0);
> + break;
> case TGSI_OPCODE_F2I:
> case TGSI_OPCODE_F2U:
> FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> index fe18f47..0d1ca2d 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> @@ -118,7 +118,7 @@ NVC0LegalizeSSA::handleFTZ(Instruction *i)
> void
> NVC0LegalizeSSA::handleTEXLOD(TexInstruction *i)
> {
> - if (i->tex.target.isMS())
> + if (i->tex.levelZero)
> return;
Why do you need that change? isMS() should return true for the
TexInstruction you create in handleBFETCH().
>
> ImmediateValue lod;
> @@ -748,7 +748,10 @@ NVC0LoweringPass::handleTEX(TexInstruction *i)
> i->setIndirectR(hnd);
> i->setIndirectS(NULL);
> } else if (i->tex.r == i->tex.s || i->op == OP_TXF) {
> - i->tex.r += prog->driver->io.texBindBase / 4;
> + if (i->tex.r == 0xffff)
> + i->tex.r = prog->driver->io.fbtexBindBase / 4;
> + else
> + i->tex.r += prog->driver->io.texBindBase / 4;
> i->tex.s = 0; // only a single cX[] value possible here
> } else {
> Value *hnd = bld.getScratch();
> @@ -804,6 +807,10 @@ NVC0LoweringPass::handleTEX(TexInstruction *i)
> Value *ticRel = i->getIndirectR();
> Value *tscRel = i->getIndirectS();
>
> + if (i->tex.r == 0xffff) {
> + i->tex.r = i->tex.s = 0x10;
> + }
Why 16?
> +
> if (ticRel) {
> i->setSrc(i->tex.rIndirectSrc, NULL);
> if (i->tex.r)
> @@ -2506,9 +2513,13 @@ NVC0LoweringPass::handleRDSV(Instruction *i)
> default:
> if (prog->getType() == Program::TYPE_TESSELLATION_EVAL && !i->perPatch)
> vtx = bld.mkOp1v(OP_PFETCH, TYPE_U32, bld.getSSA(), bld.mkImm(0));
> - ld = bld.mkFetch(i->getDef(0), i->dType,
> - FILE_SHADER_INPUT, addr, i->getIndirect(0, 0), vtx);
> - ld->perPatch = i->perPatch;
> + if (prog->getType() == Program::TYPE_FRAGMENT) {
> + bld.mkInterp(NV50_IR_INTERP_FLAT, i->getDef(0), addr, NULL);
> + } else {
> + ld = bld.mkFetch(i->getDef(0), i->dType,
> + FILE_SHADER_INPUT, addr, i->getIndirect(0, 0), vtx);
> + ld->perPatch = i->perPatch;
> + }
> break;
> }
> bld.getBB()->remove(i);
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
> index dfe68de..ad21509 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
> @@ -120,6 +120,9 @@
> /* block/grid size, at 3 32-bits integers each, gridid and work_dim */
> #define NVC0_CB_AUX_GRID_INFO(i) 0x100 + (i) * 4 /* CP */
> #define NVC0_CB_AUX_GRID_SIZE (8 * 4)
> +/* FB texture handle */
> +#define NVC0_CB_AUX_FB_TEX_INFO 0x100 /* FP */
> +#define NVC0_CB_AUX_FB_TEX_SIZE (4)
> /* 8 user clip planes, at 4 32-bits floats each */
> #define NVC0_CB_AUX_UCP_INFO 0x120
> #define NVC0_CB_AUX_UCP_SIZE (PIPE_MAX_CLIP_PLANES * 4 * 4)
> @@ -206,6 +209,7 @@ struct nvc0_context {
> unsigned num_samplers[6];
> uint32_t samplers_dirty[6];
> bool seamless_cube_map;
> + struct pipe_sampler_view *fbtexture;
>
> uint32_t tex_handles[6][PIPE_MAX_SAMPLERS]; /* for nve4 */
>
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
> index 50f8083..ccd859e 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
> @@ -486,6 +486,11 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info)
>
> fp->fp.early_z = info->prop.fp.earlyFragTests;
> fp->fp.sample_mask_in = info->prop.fp.usesSampleMaskIn;
> + fp->fp.reads_framebuffer = info->prop.fp.readsFramebuffer;
> +
> + /* Mark position xy and layer as read */
> + if (fp->fp.reads_framebuffer)
> + fp->hdr[5] |= 0x32000000;
>
> return 0;
> }
> @@ -586,6 +591,7 @@ nvc0_program_translate(struct nvc0_program *prog,
> info->io.suInfoBase = NVC0_CB_AUX_SU_INFO(0);
> if (info->target >= NVISA_GK104_CHIPSET) {
> info->io.texBindBase = NVC0_CB_AUX_TEX_INFO(0);
> + info->io.fbtexBindBase = NVC0_CB_AUX_FB_TEX_INFO;
> }
>
> if (prog->type == PIPE_SHADER_COMPUTE) {
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
> index d33aa04..421ca19 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
> @@ -49,6 +49,7 @@ struct nvc0_program {
> bool sample_mask_in;
> bool force_persample_interp;
> bool flatshade;
> + bool reads_framebuffer;
> } fp;
> struct {
> uint32_t tess_mode; /* ~0 if defined by the other stage */
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
> index fe637af..ec7bfe0 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
> @@ -242,6 +242,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
> case PIPE_CAP_TGSI_VOTE:
> case PIPE_CAP_POLYGON_OFFSET_UNITS_UNSCALED:
> case PIPE_CAP_TGSI_ARRAY_COMPONENTS:
> + case PIPE_CAP_TGSI_FS_FBFETCH:
> return 1;
> case PIPE_CAP_COMPUTE:
> return (class_3d < GP100_3D_CLASS);
> @@ -276,7 +277,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
> case PIPE_CAP_VIEWPORT_SUBPIXEL_BITS:
> case PIPE_CAP_TGSI_CAN_READ_OUTPUTS:
> case PIPE_CAP_NATIVE_FENCE_FD:
> - case PIPE_CAP_TGSI_FS_FBFETCH:
> return 0;
>
> case PIPE_CAP_VENDOR_ID:
> @@ -537,6 +537,7 @@ nvc0_screen_destroy(struct pipe_screen *pscreen)
> nouveau_heap_destroy(&screen->lib_code);
> nouveau_heap_destroy(&screen->text_heap);
>
> + FREE(screen->default_tsc);
> FREE(screen->tic.entries);
>
> nouveau_object_del(&screen->eng3d);
> @@ -1228,6 +1229,9 @@ nvc0_screen_create(struct nouveau_device *dev)
> if (!nvc0_blitter_create(screen))
> goto fail;
>
> + screen->default_tsc = CALLOC_STRUCT(nv50_tsc_entry);
> + screen->default_tsc->tsc[0] = G80_TSC_0_SRGB_CONVERSION;
> +
> nouveau_fence_new(&screen->base, &screen->base.fence.current, false);
>
> return &screen->base;
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
> index aff0308..a6d4a2b 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
> @@ -81,6 +81,8 @@ struct nvc0_screen {
>
> struct nvc0_blitter *blitter;
>
> + struct nv50_tsc_entry *default_tsc;
> +
> struct {
> void **entries;
> int next;
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
> index f5060cc..35ae1b4 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
> @@ -605,7 +605,9 @@ nvc0_validate_min_samples(struct nvc0_context *nvc0)
> // If we're using the incoming sample mask and doing sample shading, we
> // have to do sample shading "to the max", otherwise there's no way to
> // tell which sets of samples are covered by the current invocation.
> - if (nvc0->fragprog->fp.sample_mask_in)
> + // Similarly for reading the framebuffer.
> + if (nvc0->fragprog->fp.sample_mask_in ||
> + nvc0->fragprog->fp.reads_framebuffer)
> samples = util_framebuffer_get_num_samples(&nvc0->framebuffer);
> samples |= NVC0_3D_SAMPLE_SHADING_ENABLE;
> }
> @@ -701,6 +703,92 @@ nvc0_validate_tess_state(struct nvc0_context *nvc0)
> PUSH_DATAp(push, nvc0->default_tess_inner, 2);
> }
>
> +/* If we have a frag shader bound which tries to read from the framebuffer, we
> + * have to make sure that the fb is bound as a texture in the expected
> + * location. For Fermi, that's in the special driver slot 16, while for Kepler
> + * it's a regular binding stored in the driver constbuf.
> + */
> +static void
> +nvc0_validate_fbread(struct nvc0_context *nvc0)
> +{
> + struct nouveau_pushbuf *push = nvc0->base.pushbuf;
> + struct nvc0_screen *screen = nvc0->screen;
> + struct pipe_context *pipe = &nvc0->base.pipe;
> + struct pipe_sampler_view *old_view = nvc0->fbtexture;
> + struct pipe_sampler_view *new_view = NULL;
> +
> + if (nvc0->fragprog->fp.reads_framebuffer &&
> + nvc0->framebuffer.nr_cbufs &&
> + nvc0->framebuffer.cbufs[0]) {
> + struct pipe_sampler_view tmpl;
> + struct pipe_surface *sf = nvc0->framebuffer.cbufs[0];
> +
> + tmpl.target = PIPE_TEXTURE_2D_ARRAY;
> + tmpl.format = sf->format;
> + tmpl.u.tex.first_level = tmpl.u.tex.last_level = sf->u.tex.level;
> + tmpl.u.tex.first_layer = sf->u.tex.first_layer;
> + tmpl.u.tex.last_layer = sf->u.tex.last_layer;
> + tmpl.swizzle_r = PIPE_SWIZZLE_X;
> + tmpl.swizzle_g = PIPE_SWIZZLE_Y;
> + tmpl.swizzle_b = PIPE_SWIZZLE_Z;
> + tmpl.swizzle_a = PIPE_SWIZZLE_W;
> +
> + /* Bail if it's the same parameters */
> + if (old_view && old_view->texture == sf->texture &&
> + old_view->format == sf->format &&
> + old_view->u.tex.first_level == sf->u.tex.level &&
> + old_view->u.tex.first_layer == sf->u.tex.first_layer &&
> + old_view->u.tex.last_layer == sf->u.tex.last_layer)
> + return;
> +
> + new_view = pipe->create_sampler_view(pipe, sf->texture, &tmpl);
> + } else if (old_view == NULL) {
> + return;
> + }
> +
> + if (old_view)
> + pipe_sampler_view_reference(&nvc0->fbtexture, NULL);
> + nvc0->fbtexture = new_view;
> +
> + if (screen->default_tsc->id < 0) {
> + struct nv50_tsc_entry *tsc = nv50_tsc_entry(screen->default_tsc);
> + tsc->id = nvc0_screen_tsc_alloc(screen, tsc);
> + nvc0->base.push_data(&nvc0->base, screen->txc, 65536 + tsc->id * 32,
> + NV_VRAM_DOMAIN(&screen->base), 32, tsc->tsc);
> + screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);
> +
> + IMMED_NVC0(push, NVC0_3D(TSC_FLUSH), 0);
> + if (screen->base.class_3d < NVE4_3D_CLASS) {
> + BEGIN_NVC0(push, NVC0_3D(BIND_TSC2(0)), 1);
> + PUSH_DATA (push, (tsc->id << 12) | 1);
> + }
> + }
> +
> + if (new_view) {
> + struct nv50_tic_entry *tic = nv50_tic_entry(new_view);
> + assert(tic->id < 0);
> + tic->id = nvc0_screen_tic_alloc(screen, tic);
> + nvc0->base.push_data(&nvc0->base, screen->txc, tic->id * 32,
> + NV_VRAM_DOMAIN(&screen->base), 32, tic->tic);
> + screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
> +
> + if (screen->base.class_3d >= NVE4_3D_CLASS) {
> + BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
> + PUSH_DATA (push, NVC0_CB_AUX_SIZE);
> + PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
> + PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
> + BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 1);
> + PUSH_DATA (push, NVC0_CB_AUX_FB_TEX_INFO);
> + PUSH_DATA (push, (screen->default_tsc->id << 20) | tic->id);
> + } else {
> + BEGIN_NVC0(push, NVC0_3D(BIND_TIC2(0)), 1);
> + PUSH_DATA (push, (tic->id << 9) | 1);
> + }
> +
> + IMMED_NVC0(push, NVC0_3D(TIC_FLUSH), 0);
> + }
> +}
> +
> static void
> nvc0_switch_pipe_context(struct nvc0_context *ctx_to)
> {
> @@ -782,6 +870,8 @@ validate_list_3d[] = {
> { nvc0_validate_textures, NVC0_NEW_3D_TEXTURES },
> { nvc0_validate_samplers, NVC0_NEW_3D_SAMPLERS },
> { nve4_set_tex_handles, NVC0_NEW_3D_TEXTURES | NVC0_NEW_3D_SAMPLERS },
> + { nvc0_validate_fbread, NVC0_NEW_3D_FRAGPROG |
> + NVC0_NEW_3D_FRAMEBUFFER },
> { nvc0_vertex_arrays_validate, NVC0_NEW_3D_VERTEX | NVC0_NEW_3D_ARRAYS },
> { nvc0_validate_surfaces, NVC0_NEW_3D_SURFACES },
> { nvc0_validate_buffers, NVC0_NEW_3D_BUFFERS },
>
More information about the mesa-dev
mailing list