[Mesa-dev] [RFC PATCH 2/3] nvc0: add support for bindless textures on kepler+

Ilia Mirkin imirkin at alum.mit.edu
Sat Jun 24 16:01:39 UTC 2017


This keeps a list of resident textures (per context), and dumps that
list into the active buffer list when submitting. We also treat bindless
texture fetches slightly differently, wrt the meaning of indirect, and
not requiring the SAMPLER file to be used.

Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
---
 src/gallium/drivers/nouveau/codegen/nv50_ir.h      |  1 +
 .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp  | 10 +++
 .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp      | 10 ++-
 src/gallium/drivers/nouveau/nvc0/nvc0_context.c    |  2 +
 src/gallium/drivers/nouveau/nvc0/nvc0_context.h    | 17 +++-
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c     |  4 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.h     |  4 +
 src/gallium/drivers/nouveau/nvc0/nvc0_tex.c        | 94 ++++++++++++++++++++++
 src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c        |  6 ++
 src/gallium/drivers/nouveau/nvc0/nve4_compute.c    |  7 ++
 10 files changed, 148 insertions(+), 7 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
index 5c09fed05c1..e6cdcc4447a 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
@@ -1022,6 +1022,7 @@ public:
       bool liveOnly; // only execute on live pixels of a quad (optimization)
       bool levelZero;
       bool derivAll;
+      bool bindless;
 
       int8_t useOffsets; // 0, 1, or 4 for textureGatherOffsets
       int8_t offset[3]; // only used on nv50
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index 51ec7c0c8cb..e19accbc8ea 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -2217,6 +2217,16 @@ Converter::setTexRS(TexInstruction *tex, unsigned int& s, int R, int S)
 {
    unsigned rIdx = 0, sIdx = 0;
 
+   if (R >= 0 && tgsi.getSrc(R).getFile() != TGSI_FILE_SAMPLER) {
+      // This is the bindless case. We have to get the actual value and pass
+      // it in. This will be the complete handle.
+      tex->tex.rIndirectSrc = s;
+      tex->setSrc(s++, fetchSrc(R, 0));
+      tex->setTexture(tgsi.getTexture(code, R), 0xff, 0x1f);
+      tex->tex.bindless = true;
+      return;
+   }
+
    if (R >= 0)
       rIdx = tgsi.getSrc(R).getIndex(0);
    if (S >= 0)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 95280494038..93c579d2540 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -866,10 +866,12 @@ NVC0LoweringPass::handleTEX(TexInstruction *i)
       if (i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) {
          // XXX this ignores tsc, and assumes a 1:1 mapping
          assert(i->tex.rIndirectSrc >= 0);
-         Value *hnd = loadTexHandle(i->getIndirectR(), i->tex.r);
-         i->tex.r = 0xff;
-         i->tex.s = 0x1f;
-         i->setIndirectR(hnd);
+         if (!i->tex.bindless) {
+            Value *hnd = loadTexHandle(i->getIndirectR(), i->tex.r);
+            i->tex.r = 0xff;
+            i->tex.s = 0x1f;
+            i->setIndirectR(hnd);
+         }
          i->setIndirectS(NULL);
       } else if (i->tex.r == i->tex.s || i->op == OP_TXF) {
          if (i->tex.r == 0xffff)
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
index d5ef5851daa..8ef6d08655c 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
@@ -401,6 +401,8 @@ nvc0_create(struct pipe_screen *pscreen, void *priv, unsigned ctxflags)
    nvc0_init_state_functions(nvc0);
    nvc0_init_transfer_functions(nvc0);
    nvc0_init_resource_functions(pipe);
+   if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS)
+      nvc0_init_bindless_functions(pipe);
 
    nvc0->base.invalidate_resource_storage = nvc0_invalidate_resource_storage;
 
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
index 6f631b993c0..0538a50a71b 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
@@ -5,6 +5,7 @@
 #include "pipe/p_defines.h"
 #include "pipe/p_state.h"
 
+#include "util/list.h"
 #include "util/u_memory.h"
 #include "util/u_math.h"
 #include "util/u_inlines.h"
@@ -81,6 +82,7 @@
 #define NVC0_BIND_3D_SUF         245
 #define NVC0_BIND_3D_BUF         246
 #define NVC0_BIND_3D_SCREEN      247
+#define NVC0_BIND_3D_BINDLESS    248
 #define NVC0_BIND_3D_TLS         249
 #define NVC0_BIND_3D_TEXT        250
 #define NVC0_BIND_3D_COUNT       251
@@ -95,7 +97,8 @@
 #define NVC0_BIND_CP_QUERY       52
 #define NVC0_BIND_CP_BUF         53
 #define NVC0_BIND_CP_TEXT        54
-#define NVC0_BIND_CP_COUNT       55
+#define NVC0_BIND_CP_BINDLESS    55
+#define NVC0_BIND_CP_COUNT       56
 
 /* bufctx for other operations */
 #define NVC0_BIND_2D            0
@@ -151,6 +154,13 @@ struct nvc0_blitctx;
 bool nvc0_blitctx_create(struct nvc0_context *);
 void nvc0_blitctx_destroy(struct nvc0_context *);
 
+struct nvc0_resident {
+   struct list_head list;
+   uint64_t handle;
+   struct nv04_resource *buf;
+   uint32_t flags;
+};
+
 struct nvc0_context {
    struct nouveau_context base;
 
@@ -212,6 +222,9 @@ struct nvc0_context {
 
    uint32_t tex_handles[6][PIPE_MAX_SAMPLERS]; /* for nve4 */
 
+   struct list_head tex_head;
+   struct list_head img_head;
+
    struct pipe_framebuffer_state framebuffer;
    struct pipe_blend_color blend_colour;
    struct pipe_stencil_ref stencil_ref;
@@ -362,6 +375,8 @@ struct pipe_sampler_view *
 gm107_create_texture_view_from_image(struct pipe_context *,
                                      const struct pipe_image_view *);
 
+void nvc0_init_bindless_functions(struct pipe_context *);
+
 /* nvc0_transfer.c */
 void
 nvc0_init_transfer_functions(struct nvc0_context *);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 2398b48f553..3970c46a1ff 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -1275,7 +1275,7 @@ nvc0_screen_tic_alloc(struct nvc0_screen *screen, void *entry)
 {
    int i = screen->tic.next;
 
-   while (screen->tic.lock[i / 32] & (1 << (i % 32)))
+   while ((screen->tic.lock[i / 32] | screen->tic.lock2[i / 32]) & (1 << (i % 32)))
       i = (i + 1) & (NVC0_TIC_MAX_ENTRIES - 1);
 
    screen->tic.next = (i + 1) & (NVC0_TIC_MAX_ENTRIES - 1);
@@ -1292,7 +1292,7 @@ nvc0_screen_tsc_alloc(struct nvc0_screen *screen, void *entry)
 {
    int i = screen->tsc.next;
 
-   while (screen->tsc.lock[i / 32] & (1 << (i % 32)))
+   while ((screen->tsc.lock[i / 32] | screen->tsc.lock2[i / 32]) & (1 << (i % 32)))
       i = (i + 1) & (NVC0_TSC_MAX_ENTRIES - 1);
 
    screen->tsc.next = (i + 1) & (NVC0_TSC_MAX_ENTRIES - 1);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
index de0a02d9cb8..e3b1d5dbcbf 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
@@ -87,6 +87,7 @@ struct nvc0_screen {
       void **entries;
       int next;
       uint32_t lock[NVC0_TIC_MAX_ENTRIES / 32];
+      uint32_t lock2[NVC0_TIC_MAX_ENTRIES / 32];
       bool maxwell;
    } tic;
 
@@ -94,6 +95,7 @@ struct nvc0_screen {
       void **entries;
       int next;
       uint32_t lock[NVC0_TSC_MAX_ENTRIES / 32];
+      uint32_t lock2[NVC0_TSC_MAX_ENTRIES / 32];
    } tsc;
 
    struct {
@@ -210,6 +212,7 @@ nvc0_screen_tic_free(struct nvc0_screen *screen, struct nv50_tic_entry *tic)
    if (tic->id >= 0) {
       screen->tic.entries[tic->id] = NULL;
       screen->tic.lock[tic->id / 32] &= ~(1 << (tic->id % 32));
+      screen->tic.lock2[tic->id / 32] &= ~(1 << (tic->id % 32));
    }
 }
 
@@ -219,6 +222,7 @@ nvc0_screen_tsc_free(struct nvc0_screen *screen, struct nv50_tsc_entry *tsc)
    if (tsc->id >= 0) {
       screen->tsc.entries[tsc->id] = NULL;
       screen->tsc.lock[tsc->id / 32] &= ~(1 << (tsc->id % 32));
+      screen->tsc.lock2[tsc->id / 32] &= ~(1 << (tsc->id % 32));
    }
 }
 
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
index e57391e9a32..ab8515ea7e0 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
@@ -755,6 +755,100 @@ nve4_set_tex_handles(struct nvc0_context *nvc0)
    }
 }
 
+static uint64_t
+nve4_create_texture_handle(struct pipe_context *pipe,
+                           struct pipe_sampler_view *view,
+                           const struct pipe_sampler_state *sampler)
+{
+   /* We have to create persistent handles that won't change for these objects
+    * That means that we have to upload them into place and lock them so that
+    * they can't be kicked out later.
+    */
+   struct nvc0_context *nvc0 = nvc0_context(pipe);
+   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+   struct nv50_tic_entry *tic = nv50_tic_entry(view);
+   struct nv50_tsc_entry *tsc = pipe->create_sampler_state(pipe, sampler);
+
+   tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc);
+   if (tsc->id < 0)
+      goto fail;
+
+   if (tic->id < 0) {
+      tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
+      if (tic->id < 0)
+         goto fail;
+
+      nve4_p2mf_push_linear(&nvc0->base, nvc0->screen->txc, tic->id * 32,
+                            NV_VRAM_DOMAIN(&nvc0->screen->base), 32,
+                            tic->tic);
+
+      IMMED_NVC0(push, NVC0_3D(TIC_FLUSH), 0);
+   }
+
+   nve4_p2mf_push_linear(&nvc0->base, nvc0->screen->txc,
+                         65536 + tsc->id * 32,
+                         NV_VRAM_DOMAIN(&nvc0->screen->base),
+                         32, tsc->tsc);
+
+   IMMED_NVC0(push, NVC0_3D(TSC_FLUSH), 0);
+
+   nvc0->screen->tic.lock2[tic->id / 32] |= 1 << (tic->id % 32);
+   nvc0->screen->tsc.lock2[tsc->id / 32] |= 1 << (tsc->id % 32);
+
+   return 0x100000000ULL | (tsc->id << 20) | tic->id;
+
+fail:
+   pipe->delete_sampler_state(pipe, tsc);
+   return 0;
+}
+
+static void
+nve4_delete_texture_handle(struct pipe_context *pipe, uint64_t handle)
+{
+   struct nvc0_context *nvc0 = nvc0_context(pipe);
+   uint32_t tic = handle & NVE4_TIC_ENTRY_INVALID;
+   uint32_t tsc = (handle & NVE4_TSC_ENTRY_INVALID) >> 20;
+
+   nvc0->screen->tic.lock2[tic / 32] &= ~(1 << (tic % 32));
+
+   pipe->delete_sampler_state(pipe, nvc0->screen->tsc.entries[tsc]);
+}
+
+static void
+nve4_make_texture_handle_resident(struct pipe_context *pipe,
+                                  uint64_t handle, bool resident)
+{
+   struct nvc0_context *nvc0 = nvc0_context(pipe);
+   if (resident) {
+      struct nvc0_resident *res = calloc(1, sizeof(struct nvc0_resident));
+      struct nv50_tic_entry *tic =
+         nvc0->screen->tic.entries[handle & NVE4_TIC_ENTRY_INVALID];
+      assert(tic);
+
+      res->handle = handle;
+      res->buf = nv04_resource(tic->pipe.texture);
+      res->flags = NOUVEAU_BO_RD;
+      list_add(&res->list, &nvc0->tex_head);
+   } else {
+      list_for_each_entry_safe(struct nvc0_resident, pos, &nvc0->tex_head, list) {
+         if (pos->handle == handle) {
+            list_del(&pos->list);
+            free(pos);
+            break;
+         }
+      }
+   }
+}
+
+void
+nvc0_init_bindless_functions(struct pipe_context *pipe) {
+   list_inithead(&nvc0_context(pipe)->tex_head);
+   list_inithead(&nvc0_context(pipe)->img_head);
+
+   pipe->create_texture_handle = nve4_create_texture_handle;
+   pipe->delete_texture_handle = nve4_delete_texture_handle;
+   pipe->make_texture_handle_resident = nve4_make_texture_handle_resident;
+}
 
 static const uint8_t nve4_su_format_map[PIPE_FORMAT_COUNT];
 static const uint16_t nve4_su_format_aux_map[PIPE_FORMAT_COUNT];
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
index 406a17e51d6..e306916e5fc 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
@@ -922,6 +922,7 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
    int s;
 
    nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_IDX);
+   nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_3D_BINDLESS);
 
    /* NOTE: caller must ensure that (min_index + index_bias) is >= 0 */
    nvc0->vb_elt_first = info->min_index + info->index_bias;
@@ -974,6 +975,11 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
       BCTX_REFN(nvc0->bufctx_3d, 3D_IDX, buf, RD);
    }
 
+   list_for_each_entry(struct nvc0_resident, resident, &nvc0->tex_head, list) {
+      nvc0_add_resident(nvc0->bufctx_3d, NVC0_BIND_3D_BINDLESS, resident->buf,
+                        resident->flags);
+   }
+
    nvc0_state_validate_3d(nvc0, ~0);
 
    if (nvc0->vertprog->vp.need_draw_parameters && !info->indirect) {
diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
index ea601b58a16..54c7965c105 100644
--- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
+++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
@@ -684,6 +684,8 @@ nve4_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
    struct nouveau_bo *desc_bo;
    int ret;
 
+   nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_BINDLESS);
+
    desc = nve4_compute_alloc_launch_desc(&nvc0->base, &desc_bo, &desc_gpuaddr);
    if (!desc) {
       ret = -1;
@@ -703,6 +705,11 @@ nve4_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
 
    nve4_compute_upload_input(nvc0, info);
 
+   list_for_each_entry(struct nvc0_resident, resident, &nvc0->tex_head, list) {
+      nvc0_add_resident(nvc0->bufctx_cp, NVC0_BIND_CP_BINDLESS, resident->buf,
+                        resident->flags);
+   }
+
 #ifdef DEBUG
    if (debug_get_num_option("NV50_PROG_DEBUG", 0)) {
       if (nvc0->screen->compute->oclass >= GP100_COMPUTE_CLASS)
-- 
2.13.0



More information about the mesa-dev mailing list