Mesa (master): nvc0: use NOUVEAU_GETPARAM_GRAPH_UNITS to get MP count

Christoph Bumiller chrisbmr at kemper.freedesktop.org
Fri Mar 29 00:11:59 UTC 2013


Module: Mesa
Branch: master
Commit: 25722e3454fb179933514f3a1b76e0f4662875bd
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=25722e3454fb179933514f3a1b76e0f4662875bd

Author: Christoph Bumiller <e0425955 at student.tuwien.ac.at>
Date:   Wed Mar 27 23:38:29 2013 +0100

nvc0: use NOUVEAU_GETPARAM_GRAPH_UNITS to get MP count

---

 src/gallium/drivers/nvc0/nvc0_screen.c      |   63 +++++++++++++++++++++------
 src/gallium/drivers/nvc0/nvc0_screen.h      |    5 ++-
 src/gallium/drivers/nvc0/nve4_compute.c     |   12 +++---
 src/gallium/drivers/nvc0/nve4_compute.xml.h |   22 ++++++----
 4 files changed, 73 insertions(+), 29 deletions(-)

diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c
index b6cf2ca..b5b4ef1 100644
--- a/src/gallium/drivers/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nvc0/nvc0_screen.c
@@ -32,6 +32,10 @@
 
 #include "nvc0_graph_macros.h"
 
+#ifndef NOUVEAU_GETPARAM_GRAPH_UNITS
+# define NOUVEAU_GETPARAM_GRAPH_UNITS 13
+#endif
+
 static boolean
 nvc0_screen_is_format_supported(struct pipe_screen *pscreen,
                                 enum pipe_format format,
@@ -494,6 +498,35 @@ nvc0_screen_init_compute(struct nvc0_screen *screen)
    }
 }
 
+boolean
+nvc0_screen_resize_tls_area(struct nvc0_screen *screen,
+                            uint32_t lpos, uint32_t lneg, uint32_t cstack)
+{
+   struct nouveau_bo *bo = NULL;
+   int ret;
+   uint64_t size = (lpos + lneg) * 32 + cstack;
+
+   if (size >= (1 << 20)) {
+      NOUVEAU_ERR("requested TLS size too large: 0x%"PRIx64"\n", size);
+      return FALSE;
+   }
+
+   size *= (screen->base.device->chipset >= 0xe0) ? 64 : 48; /* max warps */
+   size *= screen->mp_count;
+
+   size = align(size, 1 << 17);
+
+   ret = nouveau_bo_new(screen->base.device, NOUVEAU_BO_VRAM, 1 << 17, size,
+                        NULL, &bo);
+   if (ret) {
+      NOUVEAU_ERR("failed to allocate TLS area, size: 0x%"PRIx64"\n", size);
+      return FALSE;
+   }
+   nouveau_bo_ref(NULL, &screen->tls);
+   screen->tls = bo;
+   return TRUE;
+}
+
 #define FAIL_SCREEN_INIT(str, err)                    \
    do {                                               \
       NOUVEAU_ERR(str, err);                          \
@@ -508,6 +541,7 @@ nvc0_screen_create(struct nouveau_device *dev)
    struct pipe_screen *pscreen;
    struct nouveau_object *chan;
    struct nouveau_pushbuf *push;
+   uint64_t value;
    uint32_t obj_class;
    int ret;
    unsigned i;
@@ -733,18 +767,21 @@ nvc0_screen_create(struct nouveau_device *dev)
    PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (6 << 9));
    PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (6 << 9));
 
-   /* max MPs * max warps per MP (TODO: ask kernel) */
-   if (screen->eng3d->oclass >= NVE4_3D_CLASS)
-      screen->tls_size = 8 * 64 * 32;
-   else
-      screen->tls_size = 16 * 48 * 32;
-   screen->tls_size *= NVC0_CAP_MAX_PROGRAM_TEMPS * 16;
-   screen->tls_size = align(screen->tls_size, 1 << 17);
+   if (dev->drm_version >= 0x01000101) {
+      ret = nouveau_getparam(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value);
+      if (ret) {
+         NOUVEAU_ERR("NOUVEAU_GETPARAM_GRAPH_UNITS failed.\n");
+         goto fail;
+      }
+   } else {
+      if (dev->chipset >= 0xe0 && dev->chipset < 0xf0)
+         value = (8 << 8) | 4;
+      else
+         value = (16 << 8) | 4;
+   }
+   screen->mp_count = value >> 8;
 
-   ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17,
-                        screen->tls_size, NULL, &screen->tls);
-   if (ret)
-      goto fail;
+   nvc0_screen_resize_tls_area(screen, 128 * 16, 0, 0x200);
 
    BEGIN_NVC0(push, NVC0_3D(CODE_ADDRESS_HIGH), 2);
    PUSH_DATAh(push, screen->text->offset);
@@ -752,8 +789,8 @@ nvc0_screen_create(struct nouveau_device *dev)
    BEGIN_NVC0(push, NVC0_3D(TEMP_ADDRESS_HIGH), 4);
    PUSH_DATAh(push, screen->tls->offset);
    PUSH_DATA (push, screen->tls->offset);
-   PUSH_DATA (push, screen->tls_size >> 32);
-   PUSH_DATA (push, screen->tls_size);
+   PUSH_DATA (push, screen->tls->size >> 32);
+   PUSH_DATA (push, screen->tls->size);
    BEGIN_NVC0(push, NVC0_3D(WARP_TEMP_ALLOC), 1);
    PUSH_DATA (push, 0);
    BEGIN_NVC0(push, NVC0_3D(LOCAL_BASE), 1);
diff --git a/src/gallium/drivers/nvc0/nvc0_screen.h b/src/gallium/drivers/nvc0/nvc0_screen.h
index 16f0feb..13dc83e 100644
--- a/src/gallium/drivers/nvc0/nvc0_screen.h
+++ b/src/gallium/drivers/nvc0/nvc0_screen.h
@@ -38,7 +38,7 @@ struct nvc0_screen {
    struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */
    struct nouveau_bo *poly_cache;
 
-   uint64_t tls_size;
+   uint16_t mp_count;
 
    struct nouveau_heap *text_heap;
    struct nouveau_heap *lib_code; /* allocated from text_heap */
@@ -86,6 +86,9 @@ int nvc0_screen_tsc_alloc(struct nvc0_screen *, void *);
 
 int nve4_screen_compute_setup(struct nvc0_screen *, struct nouveau_pushbuf *);
 
+boolean nvc0_screen_resize_tls_area(struct nvc0_screen *, uint32_t lpos,
+                                    uint32_t lneg, uint32_t cstack);
+
 static INLINE void
 nvc0_resource_fence(struct nv04_resource *res, uint32_t flags)
 {
diff --git a/src/gallium/drivers/nvc0/nve4_compute.c b/src/gallium/drivers/nvc0/nve4_compute.c
index c61d90c..943ae78 100644
--- a/src/gallium/drivers/nvc0/nve4_compute.c
+++ b/src/gallium/drivers/nvc0/nve4_compute.c
@@ -74,13 +74,13 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
     * Actually this might be per-MP TEMP size and looks like I'm only using
     * 2 MPs instead of all 8.
     */
-   BEGIN_NVC0(push, NVE4_COMPUTE(TEMP_SIZE_HIGH(0)), 3);
-   PUSH_DATAh(push, screen->tls_size / 2);
-   PUSH_DATA (push, screen->tls_size / 2);
+   BEGIN_NVC0(push, NVE4_COMPUTE(MP_TEMP_SIZE_HIGH(0)), 3);
+   PUSH_DATAh(push, screen->tls->size / screen->mp_count);
+   PUSH_DATA (push, screen->tls->size / screen->mp_count);
    PUSH_DATA (push, 0xff);
-   BEGIN_NVC0(push, NVE4_COMPUTE(TEMP_SIZE_HIGH(1)), 3);
-   PUSH_DATAh(push, screen->tls_size / 2);
-   PUSH_DATA (push, screen->tls_size / 2);
+   BEGIN_NVC0(push, NVE4_COMPUTE(MP_TEMP_SIZE_HIGH(1)), 3);
+   PUSH_DATAh(push, screen->tls->size / screen->mp_count);
+   PUSH_DATA (push, screen->tls->size / screen->mp_count);
    PUSH_DATA (push, 0xff);
 
    /* Unified address space ? Who needs that ? Certainly not OpenCL.
diff --git a/src/gallium/drivers/nvc0/nve4_compute.xml.h b/src/gallium/drivers/nvc0/nve4_compute.xml.h
index e513ae7..2f110f5 100644
--- a/src/gallium/drivers/nvc0/nve4_compute.xml.h
+++ b/src/gallium/drivers/nvc0/nve4_compute.xml.h
@@ -8,10 +8,10 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng
 git clone git://0x04.net/rules-ng-ng
 
 The rules-ng-ng source files this header was generated from are:
-- nve4_compute.xml (   6352 bytes, from 2013-03-10 14:59:45)
+- nve4_compute.xml (  11117 bytes, from 2013-03-27 19:22:20)
 - copyright.xml    (   6452 bytes, from 2011-08-11 18:25:12)
-- nvchipsets.xml   (   3870 bytes, from 2013-03-08 12:41:50)
-- nv_object.xml    (  13238 bytes, from 2013-02-07 16:35:34)
+- nvchipsets.xml   (   3954 bytes, from 2013-03-26 01:26:43)
+- nv_object.xml    (  13792 bytes, from 2013-03-26 01:26:43)
 - nv_defs.xml      (   4437 bytes, from 2011-08-11 18:25:12)
 - nv50_defs.xml    (   7783 bytes, from 2013-03-08 12:42:29)
 
@@ -110,15 +110,15 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 #define NVE4_COMPUTE_LAUNCH					0x000002bc
 
-#define NVE4_COMPUTE_TEMP_SIZE(i0)			       (0x000002e4 + 0xc*(i0))
-#define NVE4_COMPUTE_TEMP_SIZE__ESIZE				0x0000000c
-#define NVE4_COMPUTE_TEMP_SIZE__LEN				0x00000002
+#define NVE4_COMPUTE_MP_TEMP_SIZE(i0)			       (0x000002e4 + 0xc*(i0))
+#define NVE4_COMPUTE_MP_TEMP_SIZE__ESIZE			0x0000000c
+#define NVE4_COMPUTE_MP_TEMP_SIZE__LEN				0x00000002
 
-#define NVE4_COMPUTE_TEMP_SIZE_HIGH(i0)			       (0x000002e4 + 0xc*(i0))
+#define NVE4_COMPUTE_MP_TEMP_SIZE_HIGH(i0)		       (0x000002e4 + 0xc*(i0))
 
-#define NVE4_COMPUTE_TEMP_SIZE_LOW(i0)			       (0x000002e8 + 0xc*(i0))
+#define NVE4_COMPUTE_MP_TEMP_SIZE_LOW(i0)		       (0x000002e8 + 0xc*(i0))
 
-#define NVE4_COMPUTE_TEMP_SIZE_MASK(i0)			       (0x000002ec + 0xc*(i0))
+#define NVE4_COMPUTE_MP_TEMP_SIZE_MASK(i0)		       (0x000002ec + 0xc*(i0))
 
 #define NVE4_COMPUTE_UNK0310					0x00000310
 
@@ -200,6 +200,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define NVE4_COMPUTE_UNK260c					0x0000260c
 
 #define NVE4_COMPUTE_LAUNCH_DESC__SIZE				0x00000100
+#define NVE4_COMPUTE_LAUNCH_DESC_6				0x00000018
+#define NVE4_COMPUTE_LAUNCH_DESC_6_NOTIFY__MASK			0x00000c00
+#define NVE4_COMPUTE_LAUNCH_DESC_6_NOTIFY__SHIFT		10
+
 #define NVE4_COMPUTE_LAUNCH_DESC_PROG_START			0x00000020
 
 #define NVE4_COMPUTE_LAUNCH_DESC_12				0x00000030




More information about the mesa-commit mailing list