<p dir="ltr">As I recall, we already upload ms offsets for fs. Perhaps reuse the same spot for these CP ones? What are they used for, anyways? Can't think of anything offhand, but perhaps opencl needs something funky?</p>
<div class="gmail_quote">On Mar 31, 2016 12:09 PM, "Samuel Pitoiset" <<a href="mailto:samuel.pitoiset@gmail.com">samuel.pitoiset@gmail.com</a>> wrote:<br type="attribution"><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Instead of using the screen->parm buffer object which will be removed,<br>
upload auxiliary constants to uniform_bo to be consistent regarding<br>
what we already do for Fermi.<br>
<br>
This breaks surfaces support (for compute only) but this will be<br>
properly re-introduced later for ARB_shader_image_load_store.<br>
<br>
Signed-off-by: Samuel Pitoiset <<a href="mailto:samuel.pitoiset@gmail.com">samuel.pitoiset@gmail.com</a>><br>
---<br>
src/gallium/drivers/nouveau/nvc0/nvc0_context.h | 6 ++++<br>
src/gallium/drivers/nouveau/nvc0/nvc0_program.c | 11 ++++---<br>
src/gallium/drivers/nouveau/nvc0/nve4_compute.c | 40 ++++++++++++++++---------<br>
src/gallium/drivers/nouveau/nvc0/nve4_compute.h | 25 ----------------<br>
4 files changed, 37 insertions(+), 45 deletions(-)<br>
<br>
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h<br>
index 31e1272..34dff6a 100644<br>
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h<br>
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h<br>
@@ -113,6 +113,12 @@<br>
/* 8 user clip planes, at 4 32-bits floats each */<br>
#define NVC0_CB_AUX_UCP_INFO 0x100<br>
#define NVC0_CB_AUX_UCP_SIZE (PIPE_MAX_CLIP_PLANES * 4 * 4)<br>
+/* 8 sets of 32-bits pairs MS offsets */<br>
+#define NVC0_CB_AUX_MS_INFO 0x100 /* CP */<br>
+#define NVC0_CB_AUX_MS_SIZE (8 * 2 * 4)<br>
+/* block/grid size, at 3 32-bits integers each and gridid */<br>
+#define NVC0_CB_AUX_GRID_INFO 0x140 /* CP */<br>
+#define NVC0_CB_AUX_GRID_SIZE (7 * 4)<br>
/* 8 sets of 32-bits integer pairs sample offsets */<br>
#define NVC0_CB_AUX_SAMPLE_INFO 0x180 /* FP */<br>
#define NVC0_CB_AUX_SAMPLE_SIZE (8 * 4 * 2)<br>
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c<br>
index a3433f4..d76b48f 100644<br>
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c<br>
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c<br>
@@ -540,17 +540,16 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,<br>
<br>
if (prog->type == PIPE_SHADER_COMPUTE) {<br>
if (chipset >= NVISA_GK104_CHIPSET) {<br>
- info->io.auxCBSlot = 0;<br>
- info->io.texBindBase = NVE4_CP_INPUT_TEX(0);<br>
- info->io.suInfoBase = NVE4_CP_INPUT_SUF(0);<br>
- info->prop.cp.gridInfoBase = NVE4_CP_INPUT_GRID_INFO(0);<br>
+ info->io.auxCBSlot = 7;<br>
+ info->io.texBindBase = NVC0_CB_AUX_TEX_INFO(0);<br>
+ info->prop.cp.gridInfoBase = NVC0_CB_AUX_GRID_INFO;<br>
info->io.bufInfoBase = 0; /* TODO */<br>
} else {<br>
info->io.bufInfoBase = NVC0_CB_AUX_BUF_INFO(0);<br>
- info->io.suInfoBase = 0; /* TODO */<br>
}<br>
info->io.msInfoCBSlot = 0;<br>
- info->io.msInfoBase = NVE4_CP_INPUT_MS_OFFSETS;<br>
+ info->io.msInfoBase = NVC0_CB_AUX_MS_INFO;<br>
+ info->io.suInfoBase = 0; /* TODO */<br>
} else {<br>
if (chipset >= NVISA_GK104_CHIPSET) {<br>
info->io.texBindBase = NVC0_CB_AUX_TEX_INFO(0);<br>
diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c<br>
index b3d8414..cae4838 100644<br>
--- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c<br>
+++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c<br>
@@ -41,6 +41,7 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,<br>
int i;<br>
int ret;<br>
uint32_t obj_class;<br>
+ uint64_t address;<br>
<br>
switch (dev->chipset & ~0xf) {<br>
case 0x100:<br>
@@ -65,7 +66,7 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,<br>
return ret;<br>
}<br>
<br>
- ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 0, NVE4_CP_PARAM_SIZE, NULL,<br>
+ ret = nouveau_bo_new(dev, NV_VRAM_DOMAIN(&screen->base), 0, 1 << 12, NULL,<br>
&screen->parm);<br>
if (ret)<br>
return ret;<br>
@@ -128,15 +129,17 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,<br>
}<br>
<br>
BEGIN_NVC0(push, NVE4_CP(TEX_CB_INDEX), 1);<br>
- PUSH_DATA (push, 0); /* does not interefere with 3D */<br>
+ PUSH_DATA (push, 7); /* does not interfere with 3D */<br>
<br>
if (obj_class == NVF0_COMPUTE_CLASS)<br>
IMMED_NVC0(push, SUBC_CP(0x02c4), 1);<br>
<br>
+ address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5);<br>
+<br>
/* MS sample coordinate offsets: these do not work with _ALT modes ! */<br>
BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);<br>
- PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_MS_OFFSETS);<br>
- PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_MS_OFFSETS);<br>
+ PUSH_DATAh(push, address + NVC0_CB_AUX_MS_INFO);<br>
+ PUSH_DATA (push, address + NVC0_CB_AUX_MS_INFO);<br>
BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);<br>
PUSH_DATA (push, 64);<br>
PUSH_DATA (push, 1);<br>
@@ -159,7 +162,7 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,<br>
PUSH_DATA (push, 3); /* 7 */<br>
PUSH_DATA (push, 1);<br>
<br>
-#ifdef DEBUG<br>
+#ifdef NOUVEAU_NVE4_MP_TRAP_HANDLER<br>
BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);<br>
PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_TRAP_INFO_PTR);<br>
PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_TRAP_INFO_PTR);<br>
@@ -194,6 +197,9 @@ nve4_compute_validate_surfaces(struct nvc0_context *nvc0)<br>
uint32_t mask;<br>
unsigned i;<br>
const unsigned t = 1;<br>
+ uint64_t address;<br>
+<br>
+ address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5);<br>
<br>
mask = nvc0->surfaces_dirty[t];<br>
while (mask) {<br>
@@ -205,8 +211,8 @@ nve4_compute_validate_surfaces(struct nvc0_context *nvc0)<br>
* directly instead of via binding points, so we have to supply them.<br>
*/<br>
BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);<br>
- PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_SUF(i));<br>
- PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_SUF(i));<br>
+ PUSH_DATAh(push, address + NVC0_CB_AUX_BUF_INFO(i));<br>
+ PUSH_DATA (push, address + NVC0_CB_AUX_BUF_INFO(i));<br>
BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);<br>
PUSH_DATA (push, 64);<br>
PUSH_DATA (push, 1);<br>
@@ -271,6 +277,7 @@ static void<br>
nve4_compute_set_tex_handles(struct nvc0_context *nvc0)<br>
{<br>
struct nouveau_pushbuf *push = nvc0->base.pushbuf;<br>
+ struct nvc0_screen *screen = nvc0->screen;<br>
uint64_t address;<br>
const unsigned s = nvc0_shader_stage(PIPE_SHADER_COMPUTE);<br>
unsigned i, n;<br>
@@ -282,11 +289,11 @@ nve4_compute_set_tex_handles(struct nvc0_context *nvc0)<br>
n = util_logbase2(dirty) + 1 - i;<br>
assert(n);<br>
<br>
- address = nvc0->screen->parm->offset + NVE4_CP_INPUT_TEX(i);<br>
+ address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s);<br>
<br>
BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);<br>
- PUSH_DATAh(push, address);<br>
- PUSH_DATA (push, address);<br>
+ PUSH_DATAh(push, address + NVC0_CB_AUX_TEX_INFO(i));<br>
+ PUSH_DATA (push, address + NVC0_CB_AUX_TEX_INFO(i));<br>
BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);<br>
PUSH_DATA (push, n * 4);<br>
PUSH_DATA (push, 0x1);<br>
@@ -334,6 +341,9 @@ nve4_compute_upload_input(struct nvc0_context *nvc0, const void *input,<br>
struct nvc0_screen *screen = nvc0->screen;<br>
struct nouveau_pushbuf *push = nvc0->base.pushbuf;<br>
struct nvc0_program *cp = nvc0->compprog;<br>
+ uint64_t address;<br>
+<br>
+ address = screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5);<br>
<br>
if (cp->parm_size) {<br>
BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);<br>
@@ -347,8 +357,8 @@ nve4_compute_upload_input(struct nvc0_context *nvc0, const void *input,<br>
PUSH_DATAp(push, input, cp->parm_size / 4);<br>
}<br>
BEGIN_NVC0(push, NVE4_CP(UPLOAD_DST_ADDRESS_HIGH), 2);<br>
- PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_GRID_INFO(0));<br>
- PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_GRID_INFO(0));<br>
+ PUSH_DATAh(push, address + NVC0_CB_AUX_GRID_INFO);<br>
+ PUSH_DATA (push, address + NVC0_CB_AUX_GRID_INFO);<br>
BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);<br>
PUSH_DATA (push, 7 * 4);<br>
PUSH_DATA (push, 0x1);<br>
@@ -408,7 +418,9 @@ nve4_compute_setup_launch_desc(struct nvc0_context *nvc0,<br>
if (nvc0->constbuf[s][i].u.buf)<br>
nve4_cp_launch_desc_set_ctx_cb(desc, i + 1, &nvc0->constbuf[s][i]);<br>
}<br>
- nve4_cp_launch_desc_set_cb(desc, 0, screen->parm, 0, NVE4_CP_INPUT_SIZE);<br>
+ nve4_cp_launch_desc_set_cb(desc, 0, screen->parm, 0, 1 << 12);<br>
+ nve4_cp_launch_desc_set_cb(desc, 7, screen->uniform_bo,<br>
+ NVC0_CB_AUX_INFO(5), 1 << 10);<br>
}<br>
<br>
static inline struct nve4_cp_launch_desc *<br>
@@ -495,7 +507,7 @@ nve4_compute_validate_textures(struct nvc0_context *nvc0)<br>
struct nouveau_pushbuf *push = nvc0->base.pushbuf;<br>
const unsigned s = 5;<br>
unsigned i;<br>
- uint32_t commands[2][NVE4_CP_INPUT_TEX_MAX];<br>
+ uint32_t commands[2][32];<br>
unsigned n[2] = { 0, 0 };<br>
<br>
for (i = 0; i < nvc0->num_textures[s]; ++i) {<br>
diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.h b/src/gallium/drivers/nouveau/nvc0/nve4_compute.h<br>
index 84f8593..dcafbed 100644<br>
--- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.h<br>
+++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.h<br>
@@ -4,31 +4,6 @@<br>
<br>
#include "nvc0/nve4_compute.xml.h"<br>
<br>
-/* Input space is implemented as c0[], to which we bind the screen->parm bo.<br>
- */<br>
-#define NVE4_CP_INPUT_USER 0x0000<br>
-#define NVE4_CP_INPUT_USER_LIMIT 0x1000<br>
-#define NVE4_CP_INPUT_GRID_INFO(i) (0x1000 + (i) * 4)<br>
-#define NVE4_CP_INPUT_NTID(i) (0x1000 + (i) * 4)<br>
-#define NVE4_CP_INPUT_NCTAID(i) (0x100c + (i) * 4)<br>
-#define NVE4_CP_INPUT_GRIDID 0x1018<br>
-#define NVE4_CP_INPUT_TEX(i) (0x1040 + (i) * 4)<br>
-#define NVE4_CP_INPUT_TEX_STRIDE 4<br>
-#define NVE4_CP_INPUT_TEX_MAX 32<br>
-#define NVE4_CP_INPUT_MS_OFFSETS 0x10c0<br>
-#define NVE4_CP_INPUT_SUF_STRIDE 64<br>
-#define NVE4_CP_INPUT_SUF(i) (0x1100 + (i) * NVE4_CP_INPUT_SUF_STRIDE)<br>
-#define NVE4_CP_INPUT_SUF_MAX 32<br>
-#define NVE4_CP_INPUT_TRAP_INFO_PTR 0x1900<br>
-#define NVE4_CP_INPUT_TEMP_PTR 0x1908<br>
-#define NVE4_CP_INPUT_MP_TEMP_SIZE 0x1910<br>
-#define NVE4_CP_INPUT_WARP_TEMP_SIZE 0x1914<br>
-#define NVE4_CP_INPUT_CSTACK_SIZE 0x1918<br>
-#define NVE4_CP_INPUT_SIZE 0x1a00<br>
-#define NVE4_CP_PARAM_TRAP_INFO 0x2000<br>
-#define NVE4_CP_PARAM_TRAP_INFO_SZ (1 << 16)<br>
-#define NVE4_CP_PARAM_SIZE (NVE4_CP_PARAM_TRAP_INFO + (1 << 16))<br>
-<br>
struct nve4_cp_launch_desc<br>
{<br>
u32 unk0[8];<br>
--<br>
2.7.4<br>
<br>
_______________________________________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a><br>
<a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank">https://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</blockquote></div>