[Mesa-dev] [PATCH 07/23] i965/vs: Unify URB entry size/read length calculations between backends.
Kenneth Graunke
kenneth at whitecape.org
Wed Sep 30 00:58:11 PDT 2015
Both the vec4 and scalar VS backends had virtually identical URB entry
size and read length calculations. We can move those up a level to
backend-agnostic code and reuse it for both.
Unfortunately, the backends need to know nr_attributes to compute
first_non_payload_grf, so I had to store that in prog_data. We could
use urb_read_length, but that's nr_attributes rounded up to a multiple
of two, so doing so would waste a register in some cases.
There's more code to be removed in the vec4 backend, but that will
come in a follow-on patch.
Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
---
src/mesa/drivers/dri/i965/brw_context.h | 2 ++
src/mesa/drivers/dri/i965/brw_fs.cpp | 15 +++------------
src/mesa/drivers/dri/i965/brw_vec4.cpp | 19 +------------------
src/mesa/drivers/dri/i965/brw_vs.c | 32 ++++++++++++++++++++++++++++++++
4 files changed, 38 insertions(+), 30 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 8b790fe..118d664 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -720,6 +720,8 @@ struct brw_vs_prog_data {
GLbitfield64 inputs_read;
+ unsigned nr_attributes;
+
bool uses_vertexid;
bool uses_instanceid;
};
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index b269ade..e23cb18 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1522,21 +1522,12 @@ void
fs_visitor::assign_vs_urb_setup()
{
brw_vs_prog_data *vs_prog_data = (brw_vs_prog_data *) prog_data;
- int grf, count, slot, channel, attr;
+ int grf, slot, channel, attr;
assert(stage == MESA_SHADER_VERTEX);
- count = _mesa_bitcount_64(vs_prog_data->inputs_read);
- if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid)
- count++;
/* Each attribute is 4 regs. */
- this->first_non_payload_grf += count * 4;
-
- unsigned vue_entries =
- MAX2(count, vs_prog_data->base.vue_map.num_slots);
-
- vs_prog_data->base.urb_entry_size = ALIGN(vue_entries, 4) / 4;
- vs_prog_data->base.urb_read_length = (count + 1) / 2;
+ this->first_non_payload_grf += 4 * vs_prog_data->nr_attributes;
assert(vs_prog_data->base.urb_read_length <= 15);
@@ -1546,7 +1537,7 @@ fs_visitor::assign_vs_urb_setup()
if (inst->src[i].file == ATTR) {
if (inst->src[i].reg == VERT_ATTRIB_MAX) {
- slot = count - 1;
+ slot = vs_prog_data->nr_attributes - 1;
} else {
/* Attributes come in in a contiguous block, ordered by their
* gl_vert_attrib value. That means we can compute the slot
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index c61b385..b9b1f2e 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -1609,28 +1609,11 @@ vec4_vs_visitor::setup_attributes(int payload_reg)
*/
if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid) {
attribute_map[VERT_ATTRIB_MAX] = payload_reg + nr_attributes;
- nr_attributes++;
}
lower_attributes_to_hw_regs(attribute_map, false /* interleaved */);
- /* The BSpec says we always have to read at least one thing from
- * the VF, and it appears that the hardware wedges otherwise.
- */
- if (nr_attributes == 0)
- nr_attributes = 1;
-
- prog_data->urb_read_length = (nr_attributes + 1) / 2;
-
- unsigned vue_entries =
- MAX2(nr_attributes, prog_data->vue_map.num_slots);
-
- if (devinfo->gen == 6)
- prog_data->urb_entry_size = ALIGN(vue_entries, 8) / 8;
- else
- prog_data->urb_entry_size = ALIGN(vue_entries, 4) / 4;
-
- return payload_reg + nr_attributes;
+ return payload_reg + vs_prog_data->nr_attributes;
}
int
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index 0c60bde..16d7c41 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -183,6 +183,38 @@ brw_codegen_vs_prog(struct brw_context *brw,
&prog_data.base.vue_map, outputs_written,
prog ? prog->SeparateShader : false);
+ unsigned nr_attributes = _mesa_bitcount_64(prog_data.inputs_read);
+
+ /* gl_VertexID and gl_InstanceID are system values, but arrive via an
+ * incoming vertex attribute. So, add an extra slot.
+ */
+ if (vp->program.Base.SystemValuesRead &
+ (BITFIELD64_BIT(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) |
+ BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID))) {
+ nr_attributes++;
+ }
+
+ /* The BSpec says we always have to read at least one thing from the VF,
+ * and it appears that the hardware wedges otherwise.
+ */
+ if (nr_attributes == 0 && !brw->intelScreen->compiler->scalar_vs)
+ nr_attributes = 1;
+
+ prog_data.nr_attributes = nr_attributes;
+ prog_data.base.urb_read_length = DIV_ROUND_UP(nr_attributes, 2);
+
+ /* Since vertex shaders reuse the same VUE entry for inputs and outputs
+ * (overwriting the original contents), we need to make sure the size is
+ * the larger of the two.
+ */
+ const unsigned vue_entries =
+ MAX2(nr_attributes, prog_data.base.vue_map.num_slots);
+
+ if (brw->gen == 6)
+ prog_data.base.urb_entry_size = DIV_ROUND_UP(vue_entries, 8);
+ else
+ prog_data.base.urb_entry_size = DIV_ROUND_UP(vue_entries, 4);
+
if (0) {
_mesa_fprint_program_opt(stderr, &vp->program.Base, PROG_PRINT_DEBUG,
true);
--
2.5.3
More information about the mesa-dev
mailing list