[Mesa-dev] [PATCH 07/30] i965/vec4: Allow for dispatch_grf_start_reg to vary.
Paul Berry
stereotype441 at gmail.com
Tue Aug 20 11:30:22 PDT 2013
Both 3DSTATE_VS and 3DSTATE_GS have a dispatch_grf_start_reg control,
which determines the register where the hardware delivers data sourced
from the URB (push constants followed by per-vertex input data).
For vertex shaders, we always set dispatch_grf_start_reg to 1, since
R1 is always the first register available for push constants in vertex
shaders.
For geometry shaders, we'll need the flexibility to set
dispatch_grf_start_reg to different values depending on the behvaiour
of the geometry shader; if it accesses gl_PrimitiveIDIn, we'll need to
set it to 2 to allow the primitive ID to be delivered to the thread in
R1.
This patch eliminates the assumption that dispatch_grf_start_reg is
always 1. In vec4_visitor, we record the regnum that was passed to
vec4_visitor::setup_uniforms() in prog_data for later use. In
vec4_generator, we consult this value when converting an abstract
UNIFORM register to a concrete hardware register. And in the code
that emits 3DSTATE_VS, we set dispatch_grf_start_reg based on the
value recorded in prog_data.
This will allow us to set dispatch_grf_start_reg to the appropriate
value when compiling geometry shaders. Vertex shaders will continue
to always use a dispatch_grf_start_reg of 1.
---
src/mesa/drivers/dri/i965/brw_context.h | 6 ++++++
src/mesa/drivers/dri/i965/brw_vec4.cpp | 4 +++-
src/mesa/drivers/dri/i965/brw_vec4.h | 2 +-
src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 7 ++++---
src/mesa/drivers/dri/i965/brw_vs_state.c | 3 ++-
src/mesa/drivers/dri/i965/gen6_vs_state.c | 3 ++-
src/mesa/drivers/dri/i965/gen7_vs_state.c | 3 ++-
7 files changed, 20 insertions(+), 8 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index dae3219..0c4aab6 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -480,6 +480,12 @@ struct brw_gs_prog_data {
struct brw_vec4_prog_data {
struct brw_vue_map vue_map;
+ /**
+ * Register where the thread expects to find input data from the URB
+ * (typically uniforms, followed by per-vertex inputs).
+ */
+ GLuint dispatch_grf_start_reg;
+
GLuint curb_read_length;
GLuint urb_read_length;
GLuint total_grf;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 36527cd..bfef8e0 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -1260,6 +1260,8 @@ vec4_vs_visitor::setup_attributes(int payload_reg)
int
vec4_visitor::setup_uniforms(int reg)
{
+ prog_data->dispatch_grf_start_reg = reg;
+
/* The pre-gen6 VS requires that some push constants get loaded no
* matter what, or the GPU would hang.
*/
@@ -1280,7 +1282,7 @@ vec4_visitor::setup_uniforms(int reg)
prog_data->nr_params = this->uniforms * 4;
- prog_data->curb_read_length = reg - 1;
+ prog_data->curb_read_length = reg - prog_data->dispatch_grf_start_reg;
return reg;
}
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 512b6b3..587cb45 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -206,7 +206,7 @@ public:
src_reg src2 = src_reg());
struct brw_reg get_dst(void);
- struct brw_reg get_src(int i);
+ struct brw_reg get_src(const struct brw_vec4_prog_data *prog_data, int i);
dst_reg dst;
src_reg src[3];
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index ce9bcd0..53b4bf2 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -66,7 +66,7 @@ vec4_instruction::get_dst(void)
}
struct brw_reg
-vec4_instruction::get_src(int i)
+vec4_instruction::get_src(const struct brw_vec4_prog_data *prog_data, int i)
{
struct brw_reg brw_reg;
@@ -100,7 +100,8 @@ vec4_instruction::get_src(int i)
break;
case UNIFORM:
- brw_reg = stride(brw_vec4_grf(1 + (src[i].reg + src[i].reg_offset) / 2,
+ brw_reg = stride(brw_vec4_grf(prog_data->dispatch_grf_start_reg +
+ (src[i].reg + src[i].reg_offset) / 2,
((src[i].reg + src[i].reg_offset) % 2) * 4),
0, 4, 1);
brw_reg = retype(brw_reg, src[i].type);
@@ -946,7 +947,7 @@ vec4_generator::generate_code(exec_list *instructions)
}
for (unsigned int i = 0; i < 3; i++) {
- src[i] = inst->get_src(i);
+ src[i] = inst->get_src(this->prog_data, i);
}
dst = inst->get_dst();
diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c
index a8729df..e5421f1 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_state.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_state.c
@@ -92,7 +92,8 @@ brw_upload_vs_unit(struct brw_context *brw)
vs->thread3.urb_entry_read_length = brw->vs.prog_data->base.urb_read_length;
vs->thread3.const_urb_entry_read_length
= brw->vs.prog_data->base.curb_read_length;
- vs->thread3.dispatch_grf_start_reg = 1;
+ vs->thread3.dispatch_grf_start_reg =
+ brw->vs.prog_data->base.dispatch_grf_start_reg;
vs->thread3.urb_entry_read_offset = 0;
/* BRW_NEW_CURBE_OFFSETS, _NEW_TRANSFORM, BRW_NEW_VERTEX_PROGRAM */
diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c
index 4af7cda..c5f2fd0 100644
--- a/src/mesa/drivers/dri/i965/gen6_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c
@@ -159,7 +159,8 @@ upload_vs_state(struct brw_context *brw)
OUT_BATCH(0);
}
- OUT_BATCH((1 << GEN6_VS_DISPATCH_START_GRF_SHIFT) |
+ OUT_BATCH((brw->vs.prog_data->base.dispatch_grf_start_reg <<
+ GEN6_VS_DISPATCH_START_GRF_SHIFT) |
(brw->vs.prog_data->base.urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) |
(0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT));
diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c
index 7a6ba59..b2493cf 100644
--- a/src/mesa/drivers/dri/i965/gen7_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c
@@ -99,7 +99,8 @@ upload_vs_state(struct brw_context *brw)
OUT_BATCH(0);
}
- OUT_BATCH((1 << GEN6_VS_DISPATCH_START_GRF_SHIFT) |
+ OUT_BATCH((brw->vs.prog_data->base.dispatch_grf_start_reg <<
+ GEN6_VS_DISPATCH_START_GRF_SHIFT) |
(brw->vs.prog_data->base.urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) |
(0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT));
--
1.8.3.4
More information about the mesa-dev
mailing list