Mesa (nvc0): nvc0: implement VRAM buffer transfers with bounce buffers

Christoph Bumiller chrisbmr at kemper.freedesktop.org
Mon Dec 27 13:04:44 UTC 2010


Module: Mesa
Branch: nvc0
Commit: e4349027f6842563555992a39add4d0b2283fbbb
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=e4349027f6842563555992a39add4d0b2283fbbb

Author: Christoph Bumiller <e0425955 at student.tuwien.ac.at>
Date:   Mon Dec 27 13:57:46 2010 +0100

nvc0: implement VRAM buffer transfers with bounce buffers

---

 src/gallium/drivers/nvc0/nvc0_buffer.c   |  302 +++++++++++++++++++++++-------
 src/gallium/drivers/nvc0/nvc0_context.c  |   35 ++---
 src/gallium/drivers/nvc0/nvc0_context.h  |    6 +-
 src/gallium/drivers/nvc0/nvc0_fence.c    |   11 +
 src/gallium/drivers/nvc0/nvc0_fence.h    |    1 +
 src/gallium/drivers/nvc0/nvc0_push.c     |   10 +-
 src/gallium/drivers/nvc0/nvc0_resource.h |   58 +++++--
 src/gallium/drivers/nvc0/nvc0_screen.c   |    3 +-
 src/gallium/drivers/nvc0/nvc0_screen.h   |   18 ++-
 src/gallium/drivers/nvc0/nvc0_state.c    |    4 +
 src/gallium/drivers/nvc0/nvc0_tex.c      |    2 -
 src/gallium/drivers/nvc0/nvc0_winsys.h   |    2 +
 12 files changed, 336 insertions(+), 116 deletions(-)

diff --git a/src/gallium/drivers/nvc0/nvc0_buffer.c b/src/gallium/drivers/nvc0/nvc0_buffer.c
index 93d7f5d..8021e43 100644
--- a/src/gallium/drivers/nvc0/nvc0_buffer.c
+++ b/src/gallium/drivers/nvc0/nvc0_buffer.c
@@ -11,7 +11,15 @@
 #include "nvc0_context.h"
 #include "nvc0_resource.h"
 
-#define NVC0_BUFFER_STATUS_USER_MEMORY 0xff
+struct nvc0_transfer {
+   struct pipe_transfer base;
+};
+
+static INLINE struct nvc0_transfer *
+nvc0_transfer(struct pipe_transfer *transfer)
+{
+   return (struct nvc0_transfer *)transfer;
+}
 
 static INLINE boolean
 nvc0_buffer_allocate(struct nvc0_screen *screen, struct nvc0_resource *buf,
@@ -28,12 +36,13 @@ nvc0_buffer_allocate(struct nvc0_screen *screen, struct nvc0_resource *buf,
                                  &buf->offset);
       if (!buf->bo)
          return FALSE;
-   } else {
-      assert(!domain);
-      if (!buf->data)
+   }
+   if (domain != NOUVEAU_BO_GART) {
+      if (!buf->data) {
          buf->data = MALLOC(buf->base.width0);
-      if (!buf->data)
-         return FALSE;
+         if (!buf->data)
+            return FALSE;
+      }
    }
    buf->domain = domain;
    return TRUE;
@@ -59,68 +68,199 @@ nvc0_buffer_destroy(struct pipe_screen *pscreen,
    if (res->mm)
       release_allocation(&res->mm, screen->fence.current);
 
-   if (res->status != NVC0_BUFFER_STATUS_USER_MEMORY && res->data)
+   if (res->data && !(res->status & NVC0_BUFFER_STATUS_USER_MEMORY))
       FREE(res->data);
 
    FREE(res);
 }
 
-static INLINE uint32_t
-nouveau_buffer_rw_flags(unsigned pipe)
+/* Maybe just migrate to GART right away if we actually need to do this. */
+boolean
+nvc0_buffer_download(struct nvc0_context *nvc0, struct nvc0_resource *buf,
+                     unsigned start, unsigned size)
+{
+   struct nvc0_mm_allocation *mm;
+   struct nouveau_bo *bounce = NULL;
+   uint32_t offset;
+
+   assert(buf->domain == NOUVEAU_BO_VRAM);
+
+   mm = nvc0_mm_allocate(nvc0->screen->mm_GART, size, &bounce, &offset);
+   if (!bounce)
+      return FALSE;
+
+   nvc0_m2mf_copy_linear(nvc0, bounce, offset, NOUVEAU_BO_GART,
+                         buf->bo, buf->offset + start, NOUVEAU_BO_VRAM,
+                         size);
+
+   if (nouveau_bo_map_range(bounce, offset, size, NOUVEAU_BO_RD))
+      return FALSE;
+   memcpy(buf->data + start, bounce->map, size);
+   nouveau_bo_unmap(bounce);
+
+   buf->status &= ~NVC0_BUFFER_STATUS_DIRTY;
+
+   nouveau_bo_ref(NULL, &bounce);
+   if (mm)
+      nvc0_mm_free(mm);
+   return TRUE;
+}
+
+static boolean
+nvc0_buffer_upload(struct nvc0_context *nvc0, struct nvc0_resource *buf,
+                   unsigned start, unsigned size)
 {
-   uint32_t flags = 0;
+   struct nvc0_mm_allocation *mm;
+   struct nouveau_bo *bounce = NULL;
+   uint32_t offset;
 
-   if (pipe & PIPE_TRANSFER_READ)
-      flags = NOUVEAU_BO_RD;
-   if (pipe & PIPE_TRANSFER_WRITE)
-      flags |= NOUVEAU_BO_WR;
+   mm = nvc0_mm_allocate(nvc0->screen->mm_GART, size, &bounce, &offset);
+   if (!bounce)
+      return FALSE;
+
+   nouveau_bo_map_range(bounce, offset, size,
+                        NOUVEAU_BO_WR | NOUVEAU_BO_NOSYNC);
+   memcpy(bounce->map, buf->data + start, size);
+   nouveau_bo_unmap(bounce);
+
+   nvc0_m2mf_copy_linear(nvc0, buf->bo, buf->offset + start, NOUVEAU_BO_VRAM,
+                         bounce, offset, NOUVEAU_BO_GART, size);
+
+   nouveau_bo_ref(NULL, &bounce);
+   if (mm)
+      release_allocation(&mm, nvc0->screen->fence.current);
+
+   if (start == 0 && size == buf->base.width0)
+      buf->status &= ~NVC0_BUFFER_STATUS_DIRTY;
+   return TRUE;
+}
+
+static struct pipe_transfer *
+nvc0_buffer_transfer_get(struct pipe_context *pipe,
+                         struct pipe_resource *resource,
+                         unsigned level,
+                         unsigned usage,
+                         const struct pipe_box *box)
+{
+   struct nvc0_resource *buf = nvc0_resource(resource);
+   struct nvc0_transfer *xfr = CALLOC_STRUCT(nvc0_transfer);
+   if (!xfr)
+      return NULL;
+
+   xfr->base.resource = resource;
+   xfr->base.box.x = box->x;
+   xfr->base.box.width = box->width;
+   xfr->base.usage = usage;
+
+   if (buf->domain == NOUVEAU_BO_VRAM) {
+      if (usage & PIPE_TRANSFER_READ) {
+         if (buf->status & NVC0_BUFFER_STATUS_DIRTY)
+            nvc0_buffer_download(nvc0_context(pipe), buf, 0, buf->base.width0);
+      }
+   }
+
+   return &xfr->base;
+}
+
+static void
+nvc0_buffer_transfer_destroy(struct pipe_context *pipe,
+                             struct pipe_transfer *transfer)
+{
+   struct nvc0_resource *buf = nvc0_resource(transfer->resource);
+   struct nvc0_transfer *xfr = nvc0_transfer(transfer);
+
+   if (xfr->base.usage & PIPE_TRANSFER_WRITE) {
+      /* writing is worse */
+      nvc0_buffer_adjust_score(nvc0_context(pipe), buf, -5000);
+
+      if (buf->domain == NOUVEAU_BO_VRAM) {
+         nvc0_buffer_upload(nvc0_context(pipe), buf,
+                            transfer->box.x, transfer->box.width);
+      }
+
+      if (buf->domain != 0 && (buf->base.bind & (PIPE_BIND_VERTEX_BUFFER |
+                                                 PIPE_BIND_INDEX_BUFFER)))
+         nvc0_context(pipe)->vbo_dirty = TRUE;
+   }
 
-   return flags;
+   FREE(xfr);
+}
+
+static INLINE boolean
+nvc0_buffer_sync(struct nvc0_resource *buf, unsigned rw)
+{
+   if (rw == PIPE_TRANSFER_READ) {
+      if (!buf->fence_wr)
+         return TRUE;
+      if (!nvc0_fence_wait(buf->fence_wr))
+         return FALSE;
+   } else {
+      if (!buf->fence)
+         return TRUE;
+      if (!nvc0_fence_wait(buf->fence))
+         return FALSE;
+
+      nvc0_fence_reference(&buf->fence, NULL);
+   }
+   nvc0_fence_reference(&buf->fence_wr, NULL);
+
+   return TRUE;
+}
+
+static INLINE boolean
+nvc0_buffer_busy(struct nvc0_resource *buf, unsigned rw)
+{
+   if (rw == PIPE_TRANSFER_READ)
+      return (buf->fence_wr && !nvc0_fence_signalled(buf->fence_wr));
+   else
+      return (buf->fence && !nvc0_fence_signalled(buf->fence));
 }
 
 static void *
 nvc0_buffer_transfer_map(struct pipe_context *pipe,
                          struct pipe_transfer *transfer)
 {
-   struct nvc0_resource *res = nvc0_resource(transfer->resource);
-   struct nvc0_fence *fence;
+   struct nvc0_transfer *xfr = nvc0_transfer(transfer);
+   struct nvc0_resource *buf = nvc0_resource(transfer->resource);
+   struct nouveau_bo *bo = buf->bo;
    uint8_t *map;
    int ret;
-   uint32_t flags = nouveau_buffer_rw_flags(transfer->usage);
+   uint32_t offset = xfr->base.box.x;
+   uint32_t flags;
 
-   if ((res->base.bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER)) &&
-       (flags & NOUVEAU_BO_WR))
-      nvc0_context(pipe)->vbo_dirty = TRUE;
+   nvc0_buffer_adjust_score(nvc0_context(pipe), buf, -250);
 
-   if (res->domain == 0)
-      return res->data + transfer->box.x;
+   if (buf->domain != NOUVEAU_BO_GART)
+      return buf->data + offset;
 
-   if (res->domain == NOUVEAU_BO_VRAM) {
-      NOUVEAU_ERR("transfers to/from VRAM buffers are not allowed\n");
-      /* if this happens, migrate back to GART */
-      return NULL;
-   }
+   if (buf->mm)
+      flags = NOUVEAU_BO_NOSYNC | NOUVEAU_BO_RDWR;
+   else
+      flags = nouveau_screen_transfer_flags(xfr->base.usage);
 
-   if (res->score > -1024)
-      --res->score;
+   offset += buf->offset;
 
-   ret = nouveau_bo_map(res->bo, flags | NOUVEAU_BO_NOSYNC);
+   ret = nouveau_bo_map_range(buf->bo, offset, xfr->base.box.width, flags);
    if (ret)
       return NULL;
-   map = res->bo->map;
-   nouveau_bo_unmap(res->bo);
-
-   fence = (flags == NOUVEAU_BO_RD) ? res->fence_wr : res->fence;
-
-   if (fence) {
-      if (nvc0_fence_wait(fence) == FALSE)
-         NOUVEAU_ERR("failed to fence buffer\n");
-
-      nvc0_fence_reference(&res->fence, NULL);
-      nvc0_fence_reference(&res->fence_wr, NULL);
+   map = bo->map;
+
+   /* Unmap right now. Since multiple buffers can share a single nouveau_bo,
+    * not doing so might make future maps fail or trigger "reloc while mapped"
+    * errors. For now, mappings to userspace are guaranteed to be persistent.
+    */
+   nouveau_bo_unmap(bo);
+
+   if (buf->mm) {
+      if (xfr->base.usage & PIPE_TRANSFER_DONTBLOCK) {
+         if (nvc0_buffer_busy(buf, xfr->base.usage & PIPE_TRANSFER_READ_WRITE))
+            return NULL;
+      } else
+      if (!(xfr->base.usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
+         nvc0_buffer_sync(buf, xfr->base.usage & PIPE_TRANSFER_READ_WRITE);
+      }
    }
-
-   return map + transfer->box.x + res->offset;
+   return map;
 }
 
 
@@ -131,26 +271,23 @@ nvc0_buffer_transfer_flush_region(struct pipe_context *pipe,
                                   const struct pipe_box *box)
 {
    struct nvc0_resource *res = nvc0_resource(transfer->resource);
+   struct nouveau_bo *bo = res->bo;
+   unsigned offset = res->offset + transfer->box.x + box->x;
 
-   if (!res->bo)
+   /* not using non-snoop system memory yet, no need for cflush */
+   if (1)
       return;
 
-   nouveau_screen_bo_map_flush_range(pipe->screen,
-                                     res->bo,
-                                     res->offset + transfer->box.x + box->x,
-                                     box->width);
+   /* XXX: maybe need to upload for VRAM buffers here */
+
+   nouveau_screen_bo_map_flush_range(pipe->screen, bo, offset, box->width);
 }
 
 static void
 nvc0_buffer_transfer_unmap(struct pipe_context *pipe,
                            struct pipe_transfer *transfer)
 {
-   struct nvc0_resource *res = nvc0_resource(transfer->resource);
-
-   if (res->data)
-      return;
-
-   /* nouveau_screen_bo_unmap(pipe->screen, res->bo); */
+   /* we've called nouveau_bo_unmap right after map */
 }
 
 const struct u_resource_vtbl nvc0_buffer_vtbl =
@@ -158,8 +295,8 @@ const struct u_resource_vtbl nvc0_buffer_vtbl =
    u_default_resource_get_handle,     /* get_handle */
    nvc0_buffer_destroy,               /* resource_destroy */
    NULL,                              /* is_resource_referenced */
-   u_default_get_transfer,            /* get_transfer */
-   u_default_transfer_destroy,        /* transfer_destroy */
+   nvc0_buffer_transfer_get,          /* get_transfer */
+   nvc0_buffer_transfer_destroy,      /* transfer_destroy */
    nvc0_buffer_transfer_map,          /* transfer_map */
    nvc0_buffer_transfer_flush_region, /* transfer_flush_region */
    nvc0_buffer_transfer_unmap,        /* transfer_unmap */
@@ -227,6 +364,23 @@ nvc0_user_buffer_create(struct pipe_screen *pscreen,
    return &buffer->base;
 }
 
+static INLINE boolean
+nvc0_buffer_fetch_data(struct nvc0_resource *buf,
+                       struct nouveau_bo *bo, unsigned offset, unsigned size)
+{
+   if (!buf->data) {
+      buf->data = MALLOC(size);
+      if (!buf->data)
+         return FALSE;
+   }
+   if (nouveau_bo_map_range(bo, offset, size, NOUVEAU_BO_RD))
+      return FALSE;
+   memcpy(buf->data, bo->map, size);
+   nouveau_bo_unmap(bo);
+
+   return TRUE;
+}
+
 /* Migrate a linear buffer (vertex, index, constants) USER -> GART -> VRAM. */
 boolean
 nvc0_buffer_migrate(struct nvc0_context *nvc0,
@@ -235,38 +389,52 @@ nvc0_buffer_migrate(struct nvc0_context *nvc0,
    struct nvc0_screen *screen = nvc0_screen(buf->base.screen);
    struct nouveau_bo *bo;
    unsigned size = buf->base.width0;
+   unsigned offset;
    int ret;
 
+   assert(domain != buf->domain);
+
    if (domain == NOUVEAU_BO_GART && buf->domain == 0) {
       if (!nvc0_buffer_allocate(screen, buf, domain))
          return FALSE;
-      ret = nouveau_bo_map(buf->bo, NOUVEAU_BO_WR | NOUVEAU_BO_NOSYNC);
+      ret = nouveau_bo_map_range(buf->bo, buf->offset, size, NOUVEAU_BO_WR |
+                                 NOUVEAU_BO_NOSYNC);
       if (ret)
          return ret;
-      memcpy((uint8_t *)buf->bo->map + buf->offset, buf->data, size);
+      memcpy(buf->bo->map, buf->data, size);
       nouveau_bo_unmap(buf->bo);
+      FREE(buf->data);
    } else
-   if (domain == NOUVEAU_BO_VRAM && buf->domain == NOUVEAU_BO_GART) {
+   if (domain != 0 && buf->domain != 0) {
       struct nvc0_mm_allocation *mm = buf->mm;
 
+      if (domain == NOUVEAU_BO_VRAM) {
+         /* keep a system memory copy of our data in case we hit a fallback */
+         if (!nvc0_buffer_fetch_data(buf, buf->bo, buf->offset, size))
+            return FALSE;
+         debug_printf("migrating %u KiB to VRAM\n", size / 1024);
+      }
+
+      offset = buf->offset;
       bo = buf->bo;
       buf->bo = NULL;
       buf->mm = NULL;
       nvc0_buffer_allocate(screen, buf, domain);
 
-      nvc0_m2mf_copy_linear(nvc0, buf->bo, 0, NOUVEAU_BO_VRAM,
-                            bo, 0, NOUVEAU_BO_GART, buf->base.width0);
+      nvc0_m2mf_copy_linear(nvc0, buf->bo, buf->offset, domain,
+                            bo, offset, buf->domain, buf->base.width0);
 
-      release_allocation(&mm, screen->fence.current);
       nouveau_bo_ref(NULL, &bo);
+      if (mm)
+         release_allocation(&mm, screen->fence.current);
    } else
    if (domain == NOUVEAU_BO_VRAM && buf->domain == 0) {
-      /* should use a scratch buffer instead here */
-      if (!nvc0_buffer_migrate(nvc0, buf, NOUVEAU_BO_GART))
+      if (!nvc0_buffer_allocate(screen, buf, NOUVEAU_BO_VRAM))
+         return FALSE;
+      if (!nvc0_buffer_upload(nvc0, buf, 0, buf->base.width0))
          return FALSE;
-      return nvc0_buffer_migrate(nvc0, buf, NOUVEAU_BO_VRAM);
    } else
-      return -1;
+      return FALSE;
 
    buf->domain = domain;
 
diff --git a/src/gallium/drivers/nvc0/nvc0_context.c b/src/gallium/drivers/nvc0/nvc0_context.c
index d41ee29..b2b4fd6 100644
--- a/src/gallium/drivers/nvc0/nvc0_context.c
+++ b/src/gallium/drivers/nvc0/nvc0_context.c
@@ -104,7 +104,7 @@ nvc0_create(struct pipe_screen *pscreen, void *priv)
 }
 
 struct resident {
-   struct nouveau_bo *bo;
+   struct nvc0_resource *res;
    uint32_t flags;
 };
 
@@ -112,12 +112,14 @@ void
 nvc0_bufctx_add_resident(struct nvc0_context *nvc0, int ctx,
                          struct nvc0_resource *resource, uint32_t flags)
 {
-   struct resident rsd = { NULL, flags };
+   struct resident rsd = { resource, flags };
 
    if (!resource->bo)
       return;
-   nouveau_bo_ref(resource->bo, &rsd.bo);
 
+   /* We don't need to reference the resource here, it will be referenced
+    * in the context/state, and bufctx will be reset when state changes.
+    */
    util_dynarray_append(&nvc0->residents[ctx], struct resident, rsd);
 }
 
@@ -125,35 +127,24 @@ void
 nvc0_bufctx_del_resident(struct nvc0_context *nvc0, int ctx,
                          struct nvc0_resource *resource)
 {
-   struct resident *rsd, rem;
+   struct resident *rsd, *top;
    unsigned i;
 
    for (i = 0; i < nvc0->residents[ctx].size / sizeof(struct resident); ++i) {
       rsd = util_dynarray_element(&nvc0->residents[ctx], struct resident, i);
 
-      if (rsd->bo == resource->bo) {
-         rem = util_dynarray_pop(&nvc0->residents[ctx], struct resident);
-         nouveau_bo_ref(NULL, &rem.bo);
+      if (rsd->res == resource) {
+         top = util_dynarray_pop_ptr(&nvc0->residents[ctx], struct resident);
+         if (rsd != top)
+            *rsd = *top;
          break;
       }
    }
 }
 
 void
-nvc0_bufctx_reset(struct nvc0_context *nvc0, int ctx)
-{
-   unsigned i;
-
-   for (i = 0; i < nvc0->residents[ctx].size / sizeof(struct resident); ++i)
-      nouveau_bo_ref(NULL, &util_dynarray_element(&nvc0->residents[ctx],
-                                                  struct resident, i)->bo);
-   util_dynarray_resize(&nvc0->residents[ctx], 0);
-}
-
-void
 nvc0_bufctx_emit_relocs(struct nvc0_context *nvc0)
 {
-   struct nouveau_channel *chan = nvc0->screen->base.channel;
    struct resident *rsd;
    struct util_dynarray *array;
    unsigned ctx, i;
@@ -164,11 +155,9 @@ nvc0_bufctx_emit_relocs(struct nvc0_context *nvc0)
       for (i = 0; i < array->size / sizeof(struct resident); ++i) {
          rsd = util_dynarray_element(array, struct resident, i);
 
-         nouveau_bo_validate(chan, rsd->bo, rsd->flags);
+         nvc0_resource_validate(rsd->res, rsd->flags);
       }
    }
 
-   nouveau_bo_validate(chan, nvc0->screen->text, NOUVEAU_BO_RD);
-   nouveau_bo_validate(chan, nvc0->screen->uniforms, NOUVEAU_BO_RD);
-   nouveau_bo_validate(chan, nvc0->screen->txc, NOUVEAU_BO_RD);
+   nvc0_screen_make_buffers_resident(nvc0->screen);
 }
diff --git a/src/gallium/drivers/nvc0/nvc0_context.h b/src/gallium/drivers/nvc0/nvc0_context.h
index 962a2c0..83aff0a 100644
--- a/src/gallium/drivers/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nvc0/nvc0_context.h
@@ -147,11 +147,15 @@ nvc0_surface(struct pipe_surface *ps)
 struct pipe_context *nvc0_create(struct pipe_screen *, void *);
 
 void nvc0_bufctx_emit_relocs(struct nvc0_context *);
-void nvc0_bufctx_reset(struct nvc0_context *, int ctx);
 void nvc0_bufctx_add_resident(struct nvc0_context *, int ctx,
                               struct nvc0_resource *, uint32_t flags);
 void nvc0_bufctx_del_resident(struct nvc0_context *, int ctx,
                               struct nvc0_resource *);
+static INLINE void
+nvc0_bufctx_reset(struct nvc0_context *nvc0, int ctx)
+{
+   util_dynarray_resize(&nvc0->residents[ctx], 0);
+}
 
 /* nvc0_draw.c */
 extern struct draw_stage *nvc0_draw_render_stage(struct nvc0_context *);
diff --git a/src/gallium/drivers/nvc0/nvc0_fence.c b/src/gallium/drivers/nvc0/nvc0_fence.c
index dc2abe4..0387c59 100644
--- a/src/gallium/drivers/nvc0/nvc0_fence.c
+++ b/src/gallium/drivers/nvc0/nvc0_fence.c
@@ -139,6 +139,17 @@ nvc0_screen_fence_update(struct nvc0_screen *screen)
 #define NVC0_FENCE_MAX_SPINS (1 << 17)
 
 boolean
+nvc0_fence_signalled(struct nvc0_fence *fence)
+{
+   struct nvc0_screen *screen = fence->screen;
+
+   if (fence->state == NVC0_FENCE_STATE_EMITTED)
+      nvc0_screen_fence_update(screen);
+
+   return fence->state == NVC0_FENCE_STATE_SIGNALLED;
+}
+
+boolean
 nvc0_fence_wait(struct nvc0_fence *fence)
 {
    struct nvc0_screen *screen = fence->screen;
diff --git a/src/gallium/drivers/nvc0/nvc0_fence.h b/src/gallium/drivers/nvc0/nvc0_fence.h
index 7b31f28..e63c164 100644
--- a/src/gallium/drivers/nvc0/nvc0_fence.h
+++ b/src/gallium/drivers/nvc0/nvc0_fence.h
@@ -24,6 +24,7 @@ void nvc0_fence_emit(struct nvc0_fence *);
 void nvc0_fence_del(struct nvc0_fence *);
 
 boolean nvc0_fence_wait(struct nvc0_fence *);
+boolean nvc0_fence_signalled(struct nvc0_fence *);
 
 static INLINE void
 nvc0_fence_reference(struct nvc0_fence **ref, struct nvc0_fence *fence)
diff --git a/src/gallium/drivers/nvc0/nvc0_push.c b/src/gallium/drivers/nvc0/nvc0_push.c
index 4bf259c..779a477 100644
--- a/src/gallium/drivers/nvc0/nvc0_push.c
+++ b/src/gallium/drivers/nvc0/nvc0_push.c
@@ -215,7 +215,8 @@ nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
       struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[i];
       struct nvc0_resource *res = nvc0_resource(vb->buffer);
 
-      data = nvc0_resource_map_offset(res, vb->buffer_offset, NOUVEAU_BO_RD);
+      data = nvc0_resource_map_offset(nvc0, res,
+                                      vb->buffer_offset, NOUVEAU_BO_RD);
       if (info->indexed)
          data += info->index_bias * vb->stride;
 
@@ -223,12 +224,11 @@ nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
    }
 
    if (info->indexed) {
-      ctx.idxbuf = pipe_buffer_map(&nvc0->pipe, nvc0->idxbuf.buffer,
-                                   PIPE_TRANSFER_READ, &transfer);
+      ctx.idxbuf = nvc0_resource_map_offset(nvc0,
+                                            nvc0_resource(nvc0->idxbuf.buffer),
+                                            nvc0->idxbuf.offset, NOUVEAU_BO_RD);
       if (!ctx.idxbuf)
          return;
-      ctx.idxbuf = (uint8_t *)ctx.idxbuf + nvc0->idxbuf.offset;
-
       index_size = nvc0->idxbuf.index_size;
       ctx.primitive_restart = info->primitive_restart;
       ctx.restart_index = info->restart_index;
diff --git a/src/gallium/drivers/nvc0/nvc0_resource.h b/src/gallium/drivers/nvc0/nvc0_resource.h
index 9384f19..0ffb9e8 100644
--- a/src/gallium/drivers/nvc0/nvc0_resource.h
+++ b/src/gallium/drivers/nvc0/nvc0_resource.h
@@ -12,6 +12,14 @@
 
 struct pipe_resource;
 struct nouveau_bo;
+struct nvc0_context;
+
+#define NVC0_BUFFER_SCORE_MIN -25000
+#define NVC0_BUFFER_SCORE_MAX  25000
+#define NVC0_BUFFER_SCORE_VRAM_THRESHOLD 20000
+
+#define NVC0_BUFFER_STATUS_DIRTY       (1 << 0)
+#define NVC0_BUFFER_STATUS_USER_MEMORY (1 << 7)
 
 /* Resources, if mapped into the GPU's address space, are guaranteed to
  * have constant virtual addresses.
@@ -21,7 +29,6 @@ struct nouveau_bo;
 struct nvc0_resource {
    struct pipe_resource base;
    const struct u_resource_vtbl *vtbl;
-   uint64_t address;
 
    uint8_t *data;
    struct nouveau_bo *bo;
@@ -38,22 +45,55 @@ struct nvc0_resource {
    struct nvc0_mm_allocation *mm;
 };
 
+boolean
+nvc0_buffer_download(struct nvc0_context *, struct nvc0_resource *,
+                     unsigned start, unsigned size);
+
+boolean
+nvc0_buffer_migrate(struct nvc0_context *,
+                    struct nvc0_resource *, unsigned domain);
+
+static INLINE void
+nvc0_buffer_adjust_score(struct nvc0_context *nvc0, struct nvc0_resource *res,
+                         int16_t score)
+{
+   if (score < 0) {
+      if (res->score > NVC0_BUFFER_SCORE_MIN)
+         res->score += score;
+   } else
+   if (score > 0){
+      if (res->score < NVC0_BUFFER_SCORE_MAX)
+         res->score += score;
+      if (res->domain == NOUVEAU_BO_GART &&
+          res->score > NVC0_BUFFER_SCORE_VRAM_THRESHOLD)
+         nvc0_buffer_migrate(nvc0, res, NOUVEAU_BO_VRAM);
+   }
+}
+
 /* XXX: wait for fence (atm only using this for vertex push) */
 static INLINE void *
-nvc0_resource_map_offset(struct nvc0_resource *res, uint32_t offset,
+nvc0_resource_map_offset(struct nvc0_context *nvc0,
+                         struct nvc0_resource *res, uint32_t offset,
                          uint32_t flags)
 {
    void *map;
 
-   if (res->domain == 0)
+   nvc0_buffer_adjust_score(nvc0, res, -250);
+
+   if ((res->domain == NOUVEAU_BO_VRAM) &&
+       (res->status & NVC0_BUFFER_STATUS_DIRTY))
+      nvc0_buffer_download(nvc0, res, 0, res->base.width0);
+
+   if (res->domain != NOUVEAU_BO_GART)
       return res->data + offset;
 
+   if (res->mm)
+      flags |= NOUVEAU_BO_NOSYNC;
+
    if (nouveau_bo_map_range(res->bo, res->offset + offset,
-                            res->base.width0, flags | NOUVEAU_BO_NOSYNC))
+                            res->base.width0, flags))
       return NULL;
 
-   /* With suballocation, the same bo can be mapped several times, so unmap
-    * immediately. Maps are guaranteed to persist. */
    map = res->bo->map;
    nouveau_bo_unmap(res->bo);
    return map;
@@ -149,12 +189,6 @@ nvc0_miptree_surface_new(struct pipe_context *,
 void
 nvc0_miptree_surface_del(struct pipe_context *, struct pipe_surface *);
 
-struct nvc0_context;
-
-boolean
-nvc0_buffer_migrate(struct nvc0_context *,
-                    struct nvc0_resource *, unsigned domain);
-
 boolean
 nvc0_migrate_vertices(struct nvc0_resource *buf, unsigned base, unsigned size);
 
diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c
index 4ec73b0..0e80e28 100644
--- a/src/gallium/drivers/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nvc0/nvc0_screen.c
@@ -248,7 +248,7 @@ nvc0_screen_fence_signalled(struct pipe_screen *pscreen,
                             struct pipe_fence_handle *fence,
                             unsigned flags)
 {
-   return !(((struct nvc0_fence *)fence)->state == NVC0_FENCE_STATE_SIGNALLED);
+   return !(nvc0_fence_signalled(nvc0_fence(fence)));
 }
 
 static int
@@ -622,6 +622,7 @@ nvc0_screen_make_buffers_resident(struct nvc0_screen *screen)
    const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD;
 
    nouveau_bo_validate(chan, screen->text, flags);
+   nouveau_bo_validate(chan, screen->uniforms, flags);
    nouveau_bo_validate(chan, screen->txc, flags);
    nouveau_bo_validate(chan, screen->tls, flags);
    nouveau_bo_validate(chan, screen->mp_stack_bo, flags);
diff --git a/src/gallium/drivers/nvc0/nvc0_screen.h b/src/gallium/drivers/nvc0/nvc0_screen.h
index 5b1b623..efa5ff6 100644
--- a/src/gallium/drivers/nvc0/nvc0_screen.h
+++ b/src/gallium/drivers/nvc0/nvc0_screen.h
@@ -101,18 +101,26 @@ int nvc0_screen_tic_alloc(struct nvc0_screen *, void *);
 int nvc0_screen_tsc_alloc(struct nvc0_screen *, void *);
 
 static INLINE void
-nvc0_resource_validate(struct nvc0_resource *res, uint32_t flags)
+nvc0_resource_fence(struct nvc0_resource *res, uint32_t flags)
 {
    struct nvc0_screen *screen = nvc0_screen(res->base.screen);
 
-   assert(res->mm);
+   if (res->mm) {
+      nvc0_fence_reference(&res->fence, screen->fence.current);
 
-   nvc0_fence_reference(&res->fence, screen->fence.current);
+      if (flags & NOUVEAU_BO_WR)
+         nvc0_fence_reference(&res->fence_wr, screen->fence.current);
+   }
+}
 
-   if (flags & NOUVEAU_BO_WR)
-      nvc0_fence_reference(&res->fence_wr, screen->fence.current);
+static INLINE void
+nvc0_resource_validate(struct nvc0_resource *res, uint32_t flags)
+{
+   struct nvc0_screen *screen = nvc0_screen(res->base.screen);
 
    nouveau_bo_validate(screen->base.channel, res->bo, flags);
+
+   nvc0_resource_fence(res, flags);
 }
 
 
diff --git a/src/gallium/drivers/nvc0/nvc0_state.c b/src/gallium/drivers/nvc0/nvc0_state.c
index 62abaa7..e77e956 100644
--- a/src/gallium/drivers/nvc0/nvc0_state.c
+++ b/src/gallium/drivers/nvc0/nvc0_state.c
@@ -539,6 +539,8 @@ nvc0_stage_set_sampler_views(struct nvc0_context *nvc0, int s,
 
    nvc0->num_textures[s] = nr;
 
+   nvc0_bufctx_reset(nvc0, NVC0_BUFCTX_TEXTURES);
+
    nvc0->dirty |= NVC0_NEW_TEXTURES;
 }
 
@@ -773,6 +775,8 @@ nvc0_set_vertex_buffers(struct pipe_context *pipe,
     memcpy(nvc0->vtxbuf, vb, sizeof(*vb) * count);
     nvc0->num_vtxbufs = count;
 
+    nvc0_bufctx_reset(nvc0, NVC0_BUFCTX_VERTEX);
+
     nvc0->dirty |= NVC0_NEW_ARRAYS;
 }
 
diff --git a/src/gallium/drivers/nvc0/nvc0_tex.c b/src/gallium/drivers/nvc0/nvc0_tex.c
index c9f929b..b219f82 100644
--- a/src/gallium/drivers/nvc0/nvc0_tex.c
+++ b/src/gallium/drivers/nvc0/nvc0_tex.c
@@ -218,8 +218,6 @@ void nvc0_validate_textures(struct nvc0_context *nvc0)
 {
    boolean need_flush;
 
-   nvc0_bufctx_reset(nvc0, NVC0_BUFCTX_TEXTURES);
-
    need_flush  = nvc0_validate_tic(nvc0, 0);
    need_flush |= nvc0_validate_tic(nvc0, 4);
 
diff --git a/src/gallium/drivers/nvc0/nvc0_winsys.h b/src/gallium/drivers/nvc0/nvc0_winsys.h
index af77110..1544fb7 100644
--- a/src/gallium/drivers/nvc0/nvc0_winsys.h
+++ b/src/gallium/drivers/nvc0/nvc0_winsys.h
@@ -94,6 +94,8 @@ static INLINE int
 OUT_RESRCl(struct nouveau_channel *chan, struct nvc0_resource *res,
            unsigned delta, unsigned flags)
 {
+   if (flags & NOUVEAU_BO_WR)
+      res->status |= NVC0_BUFFER_STATUS_DIRTY;
    return OUT_RELOCl(chan, res->bo, res->offset + delta, res->domain | flags);
 }
 




More information about the mesa-commit mailing list