[Mesa-dev] [PATCH v2 07/19] i965/vs: split brw_vs_prog_data into generic and VS-specific parts.
Paul Berry
stereotype441 at gmail.com
Tue Apr 9 15:11:10 PDT 2013
This will allow the generic parts to be re-used for geometry shaders.
Reviewed-by: Jordan Justen <jordan.l.justen at intel.com>
v2: Put urb_read_length and urb_entry_size in the generic struct.
---
src/mesa/drivers/dri/i965/brw_context.h | 29 +++++++---
src/mesa/drivers/dri/i965/brw_curbe.c | 6 +-
src/mesa/drivers/dri/i965/brw_gs.c | 4 +-
src/mesa/drivers/dri/i965/brw_urb.c | 2 +-
src/mesa/drivers/dri/i965/brw_vec4.cpp | 34 +++++------
.../drivers/dri/i965/brw_vec4_reg_allocate.cpp | 10 ++--
src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 35 +++++------
src/mesa/drivers/dri/i965/brw_vec4_vp.cpp | 12 ++--
src/mesa/drivers/dri/i965/brw_vs.c | 67 ++++++++++++++++------
src/mesa/drivers/dri/i965/brw_vs.h | 3 +
src/mesa/drivers/dri/i965/brw_vs_state.c | 14 +++--
src/mesa/drivers/dri/i965/brw_vs_surface_state.c | 18 +++---
src/mesa/drivers/dri/i965/gen6_urb.c | 2 +-
src/mesa/drivers/dri/i965/gen6_vs_state.c | 16 +++---
src/mesa/drivers/dri/i965/gen7_urb.c | 2 +-
src/mesa/drivers/dri/i965/gen7_vs_state.c | 6 +-
16 files changed, 155 insertions(+), 105 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 559f7e8..93bcf55 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -435,10 +435,11 @@ struct brw_gs_prog_data {
unsigned svbi_postincrement_value;
};
-/* Note: brw_vs_prog_data_compare() must be updated when adding fields to this
- * struct!
+
+/* Note: brw_vec4_prog_data_compare() must be updated when adding fields to
+ * this struct!
*/
-struct brw_vs_prog_data {
+struct brw_vec4_prog_data {
struct brw_vue_map vue_map;
GLuint curb_read_length;
@@ -448,21 +449,31 @@ struct brw_vs_prog_data {
GLuint nr_pull_params; /**< number of dwords referenced by pull_param[] */
GLuint total_scratch;
- GLbitfield64 inputs_read;
-
- /* Used for calculating urb partitions:
+ /* Used for calculating urb partitions. In the VS, this is the size of the
+ * URB entry used for both input and output to the thread. In the GS, this
+ * is the size of the URB entry used for output.
*/
GLuint urb_entry_size;
- bool uses_vertexid;
-
int num_surfaces;
- /* These pointers must appear last. See brw_vs_prog_data_compare(). */
+ /* These pointers must appear last. See brw_vec4_prog_data_compare(). */
const float **param;
const float **pull_param;
};
+
+/* Note: brw_vs_prog_data_compare() must be updated when adding fields to this
+ * struct!
+ */
+struct brw_vs_prog_data {
+ struct brw_vec4_prog_data base;
+
+ GLbitfield64 inputs_read;
+
+ bool uses_vertexid;
+};
+
/** Number of texture sampler units */
#define BRW_MAX_TEX_UNIT 16
diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c
index b332f19..3abd22b 100644
--- a/src/mesa/drivers/dri/i965/brw_curbe.c
+++ b/src/mesa/drivers/dri/i965/brw_curbe.c
@@ -60,7 +60,7 @@ static void calculate_curbe_offsets( struct brw_context *brw )
const GLuint nr_fp_regs = (brw->wm.prog_data->nr_params + 15) / 16;
/* BRW_NEW_VERTEX_PROGRAM */
- const GLuint nr_vp_regs = (brw->vs.prog_data->nr_params + 15) / 16;
+ const GLuint nr_vp_regs = (brw->vs.prog_data->base.nr_params + 15) / 16;
GLuint nr_clip_regs = 0;
GLuint total_regs;
@@ -240,8 +240,8 @@ brw_upload_constant_buffer(struct brw_context *brw)
if (brw->curbe.vs_size) {
GLuint offset = brw->curbe.vs_start * 16;
- for (i = 0; i < brw->vs.prog_data->nr_params; i++) {
- buf[offset + i] = *brw->vs.prog_data->param[i];
+ for (i = 0; i < brw->vs.prog_data->base.nr_params; i++) {
+ buf[offset + i] = *brw->vs.prog_data->base.param[i];
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c
index 00a2a5d..caa3b3e 100644
--- a/src/mesa/drivers/dri/i965/brw_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_gs.c
@@ -57,7 +57,7 @@ static void compile_gs_prog( struct brw_context *brw,
memset(&c, 0, sizeof(c));
c.key = *key;
- c.vue_map = brw->vs.prog_data->vue_map;
+ c.vue_map = brw->vs.prog_data->base.vue_map;
c.nr_regs = (c.vue_map.num_slots + 1)/2;
mem_ctx = ralloc_context(NULL);
@@ -167,7 +167,7 @@ static void populate_key( struct brw_context *brw,
memset(key, 0, sizeof(*key));
/* CACHE_NEW_VS_PROG (part of VUE map) */
- key->attrs = brw->vs.prog_data->vue_map.slots_valid;
+ key->attrs = brw->vs.prog_data->base.vue_map.slots_valid;
/* BRW_NEW_PRIMITIVE */
key->primitive = brw->primitive;
diff --git a/src/mesa/drivers/dri/i965/brw_urb.c b/src/mesa/drivers/dri/i965/brw_urb.c
index b1126b5..3f42ba8 100644
--- a/src/mesa/drivers/dri/i965/brw_urb.c
+++ b/src/mesa/drivers/dri/i965/brw_urb.c
@@ -116,7 +116,7 @@ static void recalculate_urb_fence( struct brw_context *brw )
{
struct intel_context *intel = &brw->intel;
GLuint csize = brw->curbe.total_size;
- GLuint vsize = brw->vs.prog_data->urb_entry_size;
+ GLuint vsize = brw->vs.prog_data->base.urb_entry_size;
GLuint sfsize = brw->sf.prog_data->urb_entry_size;
if (csize < limits[CS].min_entry_size)
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index b924c70..8a52910 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -410,8 +410,8 @@ vec4_visitor::pack_uniform_registers()
/* Move the references to the data */
for (int j = 0; j < size; j++) {
- prog_data->param[dst * 4 + new_chan[src] + j] =
- prog_data->param[src * 4 + j];
+ prog_data->base.param[dst * 4 + new_chan[src] + j] =
+ prog_data->base.param[src * 4 + j];
}
this->uniform_vector_size[dst] += size;
@@ -562,16 +562,16 @@ vec4_visitor::move_push_constants_to_pull_constants()
pull_constant_loc[i / 4] = -1;
if (i >= max_uniform_components) {
- const float **values = &prog_data->param[i];
+ const float **values = &prog_data->base.param[i];
/* Try to find an existing copy of this uniform in the pull
* constants if it was part of an array access already.
*/
- for (unsigned int j = 0; j < prog_data->nr_pull_params; j += 4) {
+ for (unsigned int j = 0; j < prog_data->base.nr_pull_params; j += 4) {
int matches;
for (matches = 0; matches < 4; matches++) {
- if (prog_data->pull_param[j + matches] != values[matches])
+ if (prog_data->base.pull_param[j + matches] != values[matches])
break;
}
@@ -582,11 +582,11 @@ vec4_visitor::move_push_constants_to_pull_constants()
}
if (pull_constant_loc[i / 4] == -1) {
- assert(prog_data->nr_pull_params % 4 == 0);
- pull_constant_loc[i / 4] = prog_data->nr_pull_params / 4;
+ assert(prog_data->base.nr_pull_params % 4 == 0);
+ pull_constant_loc[i / 4] = prog_data->base.nr_pull_params / 4;
for (int j = 0; j < 4; j++) {
- prog_data->pull_param[prog_data->nr_pull_params++] = values[j];
+ prog_data->base.pull_param[prog_data->base.nr_pull_params++] = values[j];
}
}
}
@@ -646,7 +646,8 @@ vec4_visitor::opt_set_dependency_control()
cfg_t cfg(this);
- assert(prog_data->total_grf || !"Must be called after register allocation");
+ assert(prog_data->base.total_grf ||
+ !"Must be called after register allocation");
for (int i = 0; i < cfg.num_blocks; i++) {
bblock_t *bblock = cfg.blocks[i];
@@ -1233,14 +1234,15 @@ vec4_visitor::setup_attributes(int payload_reg)
if (nr_attributes == 0)
nr_attributes = 1;
- prog_data->urb_read_length = (nr_attributes + 1) / 2;
+ prog_data->base.urb_read_length = (nr_attributes + 1) / 2;
- unsigned vue_entries = MAX2(nr_attributes, prog_data->vue_map.num_slots);
+ unsigned vue_entries =
+ MAX2(nr_attributes, prog_data->base.vue_map.num_slots);
if (intel->gen == 6)
- prog_data->urb_entry_size = ALIGN(vue_entries, 8) / 8;
+ prog_data->base.urb_entry_size = ALIGN(vue_entries, 8) / 8;
else
- prog_data->urb_entry_size = ALIGN(vue_entries, 4) / 4;
+ prog_data->base.urb_entry_size = ALIGN(vue_entries, 4) / 4;
return payload_reg + nr_attributes;
}
@@ -1257,7 +1259,7 @@ vec4_visitor::setup_uniforms(int reg)
for (unsigned int i = 0; i < 4; i++) {
unsigned int slot = this->uniforms * 4 + i;
static float zero = 0.0;
- prog_data->param[slot] = &zero;
+ prog_data->base.param[slot] = &zero;
}
this->uniforms++;
@@ -1266,9 +1268,9 @@ vec4_visitor::setup_uniforms(int reg)
reg += ALIGN(uniforms, 2) / 2;
}
- prog_data->nr_params = this->uniforms * 4;
+ prog_data->base.nr_params = this->uniforms * 4;
- prog_data->curb_read_length = reg - 1;
+ prog_data->base.curb_read_length = reg - 1;
return reg;
}
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
index 0853c0a..f9ebc3c 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
@@ -76,7 +76,7 @@ vec4_visitor::reg_allocate_trivial()
next += this->virtual_grf_sizes[i];
}
}
- prog_data->total_grf = next;
+ prog_data->base.total_grf = next;
foreach_iter(exec_list_iterator, iter, this->instructions) {
vec4_instruction *inst = (vec4_instruction *)iter.get();
@@ -87,9 +87,9 @@ vec4_visitor::reg_allocate_trivial()
assign(hw_reg_mapping, &inst->src[2]);
}
- if (prog_data->total_grf > max_grf) {
+ if (prog_data->base.total_grf > max_grf) {
fail("Ran out of regs on trivial allocator (%d/%d)\n",
- prog_data->total_grf, max_grf);
+ prog_data->base.total_grf, max_grf);
return false;
}
@@ -221,12 +221,12 @@ vec4_visitor::reg_allocate()
* regs in the register classes back down to real hardware reg
* numbers.
*/
- prog_data->total_grf = first_assigned_grf;
+ prog_data->base.total_grf = first_assigned_grf;
for (int i = 0; i < virtual_grf_count; i++) {
int reg = ra_get_node_reg(g, i);
hw_reg_mapping[i] = first_assigned_grf + brw->vs.ra_reg_to_grf[reg];
- prog_data->total_grf = MAX2(prog_data->total_grf,
+ prog_data->base.total_grf = MAX2(prog_data->base.total_grf,
hw_reg_mapping[i] + virtual_grf_sizes[i]);
}
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 8769e9f..6bc9769 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -605,12 +605,12 @@ vec4_visitor::setup_uniform_values(ir_variable *ir)
int i;
for (i = 0; i < uniform_vector_size[uniforms]; i++) {
- prog_data->param[uniforms * 4 + i] = &components->f;
+ prog_data->base.param[uniforms * 4 + i] = &components->f;
components++;
}
for (; i < 4; i++) {
static float zero = 0;
- prog_data->param[uniforms * 4 + i] = &zero;
+ prog_data->base.param[uniforms * 4 + i] = &zero;
}
uniforms++;
@@ -639,7 +639,7 @@ vec4_visitor::setup_uniform_clipplane_values()
this->userplane[compacted_clipplane_index] = dst_reg(UNIFORM, this->uniforms);
this->userplane[compacted_clipplane_index].type = BRW_REGISTER_TYPE_F;
for (int j = 0; j < 4; ++j) {
- prog_data->param[this->uniforms * 4 + j] = &clip_planes[i][j];
+ prog_data->base.param[this->uniforms * 4 + j] = &clip_planes[i][j];
}
++compacted_clipplane_index;
++this->uniforms;
@@ -653,7 +653,7 @@ vec4_visitor::setup_uniform_clipplane_values()
this->userplane[i] = dst_reg(UNIFORM, this->uniforms);
this->userplane[i].type = BRW_REGISTER_TYPE_F;
for (int j = 0; j < 4; ++j) {
- prog_data->param[this->uniforms * 4 + j] = &clip_planes[i][j];
+ prog_data->base.param[this->uniforms * 4 + j] = &clip_planes[i][j];
}
++this->uniforms;
}
@@ -689,7 +689,7 @@ vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
int swiz = GET_SWZ(slots[i].swizzle, j);
last_swiz = swiz;
- prog_data->param[this->uniforms * 4 + j] = &values[swiz];
+ prog_data->base.param[this->uniforms * 4 + j] = &values[swiz];
if (swiz <= last_swiz)
this->uniform_vector_size[this->uniforms]++;
}
@@ -2408,7 +2408,7 @@ void
vec4_visitor::emit_psiz_and_flags(struct brw_reg reg)
{
if (intel->gen < 6 &&
- ((prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) ||
+ ((prog_data->base.vue_map.slots_valid & VARYING_BIT_PSIZ) ||
c->key.base.userclip_active || brw->has_negative_rhw_bug)) {
dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
dst_reg header1_w = header1;
@@ -2417,7 +2417,7 @@ vec4_visitor::emit_psiz_and_flags(struct brw_reg reg)
emit(MOV(header1, 0u));
- if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) {
+ if (prog_data->base.vue_map.slots_valid & VARYING_BIT_PSIZ) {
src_reg psiz = src_reg(output_reg[VARYING_SLOT_PSIZ]);
current_annotation = "Point size";
@@ -2462,7 +2462,7 @@ vec4_visitor::emit_psiz_and_flags(struct brw_reg reg)
emit(MOV(retype(reg, BRW_REGISTER_TYPE_UD), 0u));
} else {
emit(MOV(retype(reg, BRW_REGISTER_TYPE_D), src_reg(0)));
- if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) {
+ if (prog_data->base.vue_map.slots_valid & VARYING_BIT_PSIZ) {
emit(MOV(brw_writemask(reg, WRITEMASK_W),
src_reg(output_reg[VARYING_SLOT_PSIZ])));
}
@@ -2493,7 +2493,7 @@ vec4_visitor::emit_clip_distances(struct brw_reg reg, int offset)
* if the user wrote to it; otherwise we use gl_Position.
*/
gl_varying_slot clip_vertex = VARYING_SLOT_CLIP_VERTEX;
- if (!(prog_data->vue_map.slots_valid & VARYING_BIT_CLIP_VERTEX)) {
+ if (!(prog_data->base.vue_map.slots_valid & VARYING_BIT_CLIP_VERTEX)) {
clip_vertex = VARYING_SLOT_POS;
}
@@ -2632,8 +2632,8 @@ vec4_visitor::emit_urb_writes()
/* Set up the VUE data for the first URB write */
int slot;
- for (slot = 0; slot < prog_data->vue_map.num_slots; ++slot) {
- emit_urb_slot(mrf++, prog_data->vue_map.slot_to_varying[slot]);
+ for (slot = 0; slot < prog_data->base.vue_map.num_slots; ++slot) {
+ emit_urb_slot(mrf++, prog_data->base.vue_map.slot_to_varying[slot]);
/* If this was max_usable_mrf, we can't fit anything more into this URB
* WRITE.
@@ -2644,7 +2644,7 @@ vec4_visitor::emit_urb_writes()
}
}
- bool eot = slot >= prog_data->vue_map.num_slots;
+ bool eot = slot >= prog_data->base.vue_map.num_slots;
if (eot) {
if (INTEL_DEBUG & DEBUG_SHADER_TIME)
emit_shader_time_end();
@@ -2659,10 +2659,10 @@ vec4_visitor::emit_urb_writes()
if (!inst->eot) {
mrf = base_mrf + 1;
- for (; slot < prog_data->vue_map.num_slots; ++slot) {
+ for (; slot < prog_data->base.vue_map.num_slots; ++slot) {
assert(mrf < max_usable_mrf);
- emit_urb_slot(mrf++, prog_data->vue_map.slot_to_varying[slot]);
+ emit_urb_slot(mrf++, prog_data->base.vue_map.slot_to_varying[slot]);
}
if (INTEL_DEBUG & DEBUG_SHADER_TIME)
@@ -2928,12 +2928,13 @@ vec4_visitor::move_uniform_array_access_to_pull_constants()
* add it.
*/
if (pull_constant_loc[uniform] == -1) {
- const float **values = &prog_data->param[uniform * 4];
+ const float **values = &prog_data->base.param[uniform * 4];
- pull_constant_loc[uniform] = prog_data->nr_pull_params / 4;
+ pull_constant_loc[uniform] = prog_data->base.nr_pull_params / 4;
for (int j = 0; j < uniform_size[uniform] * 4; j++) {
- prog_data->pull_param[prog_data->nr_pull_params++] = values[j];
+ prog_data->base.pull_param[prog_data->base.nr_pull_params++]
+ = values[j];
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp b/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp
index bf6d03c..13156dd 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp
@@ -413,9 +413,9 @@ vec4_visitor::emit_vertex_program_code()
const struct gl_program_parameter_list *params = c->vp->program.Base.Parameters;
unsigned i;
for (i = 0; i < params->NumParameters * 4; i++) {
- prog_data->pull_param[i] = ¶ms->ParameterValues[i / 4][i % 4].f;
+ prog_data->base.pull_param[i] = ¶ms->ParameterValues[i / 4][i % 4].f;
}
- prog_data->nr_pull_params = i;
+ prog_data->base.nr_pull_params = i;
}
}
@@ -442,15 +442,15 @@ vec4_visitor::setup_vp_regs()
this->uniform_size[this->uniforms] = 1; /* 1 vec4 */
this->uniform_vector_size[this->uniforms] = components;
for (unsigned i = 0; i < 4; i++) {
- prog_data->param[this->uniforms * 4 + i] = i >= components ? 0 :
- &plist->ParameterValues[p][i].f;
+ prog_data->base.param[this->uniforms * 4 + i] = i >= components
+ ? 0 : &plist->ParameterValues[p][i].f;
}
this->uniforms++; /* counted in vec4 units */
}
/* PROGRAM_OUTPUT */
- for (int slot = 0; slot < prog_data->vue_map.num_slots; slot++) {
- int varying = prog_data->vue_map.slot_to_varying[slot];
+ for (int slot = 0; slot < prog_data->base.vue_map.num_slots; slot++) {
+ int varying = prog_data->base.vue_map.slot_to_varying[slot];
if (varying == VARYING_SLOT_PSIZ)
output_reg[varying] = dst_reg(this, glsl_type::float_type);
else
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index 2d0849a..998edb0 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -197,15 +197,13 @@ gl_clip_plane *brw_select_clip_planes(struct gl_context *ctx)
}
}
+
bool
-brw_vs_prog_data_compare(const void *in_a, const void *in_b,
- int aux_size, const void *in_key)
+brw_vec4_prog_data_compare(const struct brw_vec4_prog_data *a,
+ const struct brw_vec4_prog_data *b)
{
- const struct brw_vs_prog_data *a = in_a;
- const struct brw_vs_prog_data *b = in_b;
-
/* Compare all the struct up to the pointers. */
- if (memcmp(a, b, offsetof(struct brw_vs_prog_data, param)))
+ if (memcmp(a, b, offsetof(struct brw_vec4_prog_data, param)))
return false;
if (memcmp(a->param, b->param, a->nr_params * sizeof(void *)))
@@ -217,6 +215,28 @@ brw_vs_prog_data_compare(const void *in_a, const void *in_b,
return true;
}
+
+bool
+brw_vs_prog_data_compare(const void *in_a, const void *in_b,
+ int aux_size, const void *in_key)
+{
+ const struct brw_vs_prog_data *a = in_a;
+ const struct brw_vs_prog_data *b = in_b;
+
+ /* Compare the base vec4 structure. */
+ if (!brw_vec4_prog_data_compare(&a->base, &b->base))
+ return false;
+
+ /* Compare the rest of the struct. */
+ const unsigned offset = sizeof(struct brw_vec4_prog_data);
+ if (memcmp(((char *) &a) + offset, ((char *) &b) + offset,
+ sizeof(struct brw_vs_prog_data) - offset)) {
+ return false;
+ }
+
+ return true;
+}
+
static bool
do_vs_prog(struct brw_context *brw,
struct gl_shader_program *prog,
@@ -261,8 +281,8 @@ do_vs_prog(struct brw_context *brw,
/* We also upload clip plane data as uniforms */
param_count += MAX_CLIP_PLANES * 4;
- prog_data.param = rzalloc_array(NULL, const float *, param_count);
- prog_data.pull_param = rzalloc_array(NULL, const float *, param_count);
+ prog_data.base.param = rzalloc_array(NULL, const float *, param_count);
+ prog_data.base.pull_param = rzalloc_array(NULL, const float *, param_count);
GLbitfield64 outputs_written = vp->program.Base.OutputsWritten;
prog_data.inputs_read = vp->program.Base.InputsRead;
@@ -285,7 +305,7 @@ do_vs_prog(struct brw_context *brw,
}
}
- brw_compute_vue_map(brw, &prog_data.vue_map, outputs_written,
+ brw_compute_vue_map(brw, &prog_data.base.vue_map, outputs_written,
c.key.base.userclip_active);
if (0) {
@@ -301,13 +321,13 @@ do_vs_prog(struct brw_context *brw,
return false;
}
- if (prog_data.nr_pull_params)
- prog_data.num_surfaces = 1;
+ if (prog_data.base.nr_pull_params)
+ prog_data.base.num_surfaces = 1;
if (c.vp->program.Base.SamplersUsed)
- prog_data.num_surfaces = SURF_INDEX_VS_TEXTURE(BRW_MAX_TEX_UNIT);
+ prog_data.base.num_surfaces = SURF_INDEX_VS_TEXTURE(BRW_MAX_TEX_UNIT);
if (prog &&
prog->_LinkedShaders[MESA_SHADER_VERTEX]->NumUniformBlocks) {
- prog_data.num_surfaces =
+ prog_data.base.num_surfaces =
SURF_INDEX_VS_UBO(prog->_LinkedShaders[MESA_SHADER_VERTEX]->NumUniformBlocks);
}
@@ -317,10 +337,11 @@ do_vs_prog(struct brw_context *brw,
"Try reducing the number of live vec4 values to "
"improve performance.\n");
- prog_data.total_scratch = brw_get_scratch_size(c.base.last_scratch*REG_SIZE);
+ prog_data.base.total_scratch
+ = brw_get_scratch_size(c.base.last_scratch*REG_SIZE);
brw_get_scratch_bo(intel, &brw->vs.scratch_bo,
- prog_data.total_scratch * brw->max_vs_threads);
+ prog_data.base.total_scratch * brw->max_vs_threads);
}
brw_upload_cache(&brw->cache, BRW_VS_PROG,
@@ -503,9 +524,9 @@ static void brw_upload_vs_prog(struct brw_context *brw)
assert(success);
}
- if (memcmp(&brw->vs.prog_data->vue_map, &brw->vue_map_geom_out,
+ if (memcmp(&brw->vs.prog_data->base.vue_map, &brw->vue_map_geom_out,
sizeof(brw->vue_map_geom_out)) != 0) {
- brw->vue_map_geom_out = brw->vs.prog_data->vue_map;
+ brw->vue_map_geom_out = brw->vs.prog_data->base.vue_map;
brw->state.dirty.brw |= BRW_NEW_VUE_MAP_GEOM_OUT;
}
}
@@ -564,11 +585,19 @@ brw_vs_precompile(struct gl_context *ctx, struct gl_shader_program *prog)
return success;
}
+
+void
+brw_vec4_prog_data_free(const struct brw_vec4_prog_data *prog_data)
+{
+ ralloc_free((void *)prog_data->param);
+ ralloc_free((void *)prog_data->pull_param);
+}
+
+
void
brw_vs_prog_data_free(const void *in_prog_data)
{
const struct brw_vs_prog_data *prog_data = in_prog_data;
- ralloc_free((void *)prog_data->param);
- ralloc_free((void *)prog_data->pull_param);
+ brw_vec4_prog_data_free(&prog_data->base);
}
diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h
index e1c6ed0..ba83f6d 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.h
+++ b/src/mesa/drivers/dri/i965/brw_vs.h
@@ -130,8 +130,11 @@ bool brw_vs_precompile(struct gl_context *ctx, struct gl_shader_program *prog);
void brw_vs_debug_recompile(struct brw_context *brw,
struct gl_shader_program *prog,
const struct brw_vs_prog_key *key);
+bool brw_vec4_prog_data_compare(const struct brw_vec4_prog_data *a,
+ const struct brw_vec4_prog_data *b);
bool brw_vs_prog_data_compare(const void *a, const void *b,
int aux_size, const void *key);
+void brw_vec4_prog_data_free(const struct brw_vec4_prog_data *prog_data);
void brw_vs_prog_data_free(const void *in_prog_data);
#endif
diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c
index a72a283..bb42bd0 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_state.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_state.c
@@ -47,7 +47,8 @@ brw_upload_vs_unit(struct brw_context *brw)
memset(vs, 0, sizeof(*vs));
/* BRW_NEW_PROGRAM_CACHE | CACHE_NEW_VS_PROG */
- vs->thread0.grf_reg_count = ALIGN(brw->vs.prog_data->total_grf, 16) / 16 - 1;
+ vs->thread0.grf_reg_count =
+ ALIGN(brw->vs.prog_data->base.total_grf, 16) / 16 - 1;
vs->thread0.kernel_start_pointer =
brw_program_reloc(brw,
brw->vs.state_offset +
@@ -72,18 +73,19 @@ brw_upload_vs_unit(struct brw_context *brw)
vs->thread1.binding_table_entry_count = 0;
- if (brw->vs.prog_data->total_scratch != 0) {
+ if (brw->vs.prog_data->base.total_scratch != 0) {
vs->thread2.scratch_space_base_pointer =
brw->vs.scratch_bo->offset >> 10; /* reloc */
vs->thread2.per_thread_scratch_space =
- ffs(brw->vs.prog_data->total_scratch) - 11;
+ ffs(brw->vs.prog_data->base.total_scratch) - 11;
} else {
vs->thread2.scratch_space_base_pointer = 0;
vs->thread2.per_thread_scratch_space = 0;
}
- vs->thread3.urb_entry_read_length = brw->vs.prog_data->urb_read_length;
- vs->thread3.const_urb_entry_read_length = brw->vs.prog_data->curb_read_length;
+ vs->thread3.urb_entry_read_length = brw->vs.prog_data->base.urb_read_length;
+ vs->thread3.const_urb_entry_read_length
+ = brw->vs.prog_data->base.curb_read_length;
vs->thread3.dispatch_grf_start_reg = 1;
vs->thread3.urb_entry_read_offset = 0;
@@ -144,7 +146,7 @@ brw_upload_vs_unit(struct brw_context *brw)
vs->vs6.vs_enable = 1;
/* Emit scratch space relocation */
- if (brw->vs.prog_data->total_scratch != 0) {
+ if (brw->vs.prog_data->base.total_scratch != 0) {
drm_intel_bo_emit_reloc(intel->batch.bo,
brw->vs.state_offset +
offsetof(struct brw_vs_unit_state, thread2),
diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
index 675a84c..968cc03 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
@@ -56,7 +56,7 @@ brw_upload_vs_pull_constants(struct brw_context *brw)
_mesa_load_state_parameters(&brw->intel.ctx, vp->program.Base.Parameters);
/* CACHE_NEW_VS_PROG */
- if (!brw->vs.prog_data->nr_pull_params) {
+ if (!brw->vs.prog_data->base.nr_pull_params) {
if (brw->vs.const_bo) {
drm_intel_bo_unreference(brw->vs.const_bo);
brw->vs.const_bo = NULL;
@@ -68,19 +68,20 @@ brw_upload_vs_pull_constants(struct brw_context *brw)
/* _NEW_PROGRAM_CONSTANTS */
drm_intel_bo_unreference(brw->vs.const_bo);
- uint32_t size = brw->vs.prog_data->nr_pull_params * 4;
+ uint32_t size = brw->vs.prog_data->base.nr_pull_params * 4;
brw->vs.const_bo = drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer",
size, 64);
drm_intel_gem_bo_map_gtt(brw->vs.const_bo);
- for (i = 0; i < brw->vs.prog_data->nr_pull_params; i++) {
+ for (i = 0; i < brw->vs.prog_data->base.nr_pull_params; i++) {
memcpy(brw->vs.const_bo->virtual + i * 4,
- brw->vs.prog_data->pull_param[i],
+ brw->vs.prog_data->base.pull_param[i],
4);
}
if (0) {
- for (i = 0; i < ALIGN(brw->vs.prog_data->nr_pull_params, 4) / 4; i++) {
+ for (i = 0; i < ALIGN(brw->vs.prog_data->base.nr_pull_params, 4) / 4;
+ i++) {
float *row = (float *)brw->vs.const_bo->virtual + i * 4;
printf("vs const surface %3d: %4.3f %4.3f %4.3f %4.3f\n",
i, row[0], row[1], row[2], row[3]);
@@ -142,14 +143,15 @@ brw_vs_upload_binding_table(struct brw_context *brw)
if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
gen7_create_shader_time_surface(brw, &brw->vs.surf_offset[SURF_INDEX_VS_SHADER_TIME]);
- assert(brw->vs.prog_data->num_surfaces <= SURF_INDEX_VS_SHADER_TIME);
- brw->vs.prog_data->num_surfaces = SURF_INDEX_VS_SHADER_TIME;
+ assert(brw->vs.prog_data->base.num_surfaces
+ <= SURF_INDEX_VS_SHADER_TIME);
+ brw->vs.prog_data->base.num_surfaces = SURF_INDEX_VS_SHADER_TIME;
}
/* CACHE_NEW_VS_PROG: Skip making a binding table if we don't use textures or
* pull constants.
*/
- if (brw->vs.prog_data->num_surfaces == 0) {
+ if (brw->vs.prog_data->base.num_surfaces == 0) {
if (brw->vs.bind_bo_offset != 0) {
brw->state.dirty.brw |= BRW_NEW_VS_BINDING_TABLE;
brw->vs.bind_bo_offset = 0;
diff --git a/src/mesa/drivers/dri/i965/gen6_urb.c b/src/mesa/drivers/dri/i965/gen6_urb.c
index aa985de..d47bf9e 100644
--- a/src/mesa/drivers/dri/i965/gen6_urb.c
+++ b/src/mesa/drivers/dri/i965/gen6_urb.c
@@ -54,7 +54,7 @@ gen6_upload_urb( struct brw_context *brw )
int total_urb_size = brw->urb.size * 1024; /* in bytes */
/* CACHE_NEW_VS_PROG */
- unsigned vs_size = MAX2(brw->vs.prog_data->urb_entry_size, 1);
+ unsigned vs_size = MAX2(brw->vs.prog_data->base.urb_entry_size, 1);
/* We use the same VUE layout for VS outputs and GS outputs (as it's what
* the SF and Clipper expect), so we can simply make the GS URB entry size
diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c
index fb05354..ae1a841 100644
--- a/src/mesa/drivers/dri/i965/gen6_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c
@@ -49,7 +49,7 @@ gen6_upload_vs_push_constants(struct brw_context *brw)
_mesa_load_state_parameters(ctx, vp->program.Base.Parameters);
/* CACHE_NEW_VS_PROG */
- if (brw->vs.prog_data->nr_params == 0) {
+ if (brw->vs.prog_data->base.nr_params == 0) {
brw->vs.push_const_size = 0;
} else {
int params_uploaded;
@@ -57,7 +57,7 @@ gen6_upload_vs_push_constants(struct brw_context *brw)
int i;
param = brw_state_batch(brw, AUB_TRACE_VS_CONSTANTS,
- brw->vs.prog_data->nr_params * sizeof(float),
+ brw->vs.prog_data->base.nr_params * sizeof(float),
32, &brw->vs.push_const_offset);
/* _NEW_PROGRAM_CONSTANTS
@@ -66,10 +66,10 @@ gen6_upload_vs_push_constants(struct brw_context *brw)
* side effect of dereferencing uniforms, so _NEW_PROGRAM_CONSTANTS
* wouldn't be set for them.
*/
- for (i = 0; i < brw->vs.prog_data->nr_params; i++) {
- param[i] = *brw->vs.prog_data->param[i];
+ for (i = 0; i < brw->vs.prog_data->base.nr_params; i++) {
+ param[i] = *brw->vs.prog_data->base.param[i];
}
- params_uploaded = brw->vs.prog_data->nr_params / 4;
+ params_uploaded = brw->vs.prog_data->base.nr_params / 4;
if (0) {
printf("VS constant buffer:\n");
@@ -148,16 +148,16 @@ upload_vs_state(struct brw_context *brw)
OUT_BATCH(floating_point_mode |
((ALIGN(brw->sampler.count, 4)/4) << GEN6_VS_SAMPLER_COUNT_SHIFT));
- if (brw->vs.prog_data->total_scratch) {
+ if (brw->vs.prog_data->base.total_scratch) {
OUT_RELOC(brw->vs.scratch_bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
- ffs(brw->vs.prog_data->total_scratch) - 11);
+ ffs(brw->vs.prog_data->base.total_scratch) - 11);
} else {
OUT_BATCH(0);
}
OUT_BATCH((1 << GEN6_VS_DISPATCH_START_GRF_SHIFT) |
- (brw->vs.prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) |
+ (brw->vs.prog_data->base.urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) |
(0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT));
OUT_BATCH(((brw->max_vs_threads - 1) << GEN6_VS_MAX_THREADS_SHIFT) |
diff --git a/src/mesa/drivers/dri/i965/gen7_urb.c b/src/mesa/drivers/dri/i965/gen7_urb.c
index 5ac3885..bdcf164 100644
--- a/src/mesa/drivers/dri/i965/gen7_urb.c
+++ b/src/mesa/drivers/dri/i965/gen7_urb.c
@@ -83,7 +83,7 @@ gen7_upload_urb(struct brw_context *brw)
int handle_region_size = (brw->urb.size - push_size_kB) * 1024; /* bytes */
/* CACHE_NEW_VS_PROG */
- unsigned vs_size = MAX2(brw->vs.prog_data->urb_entry_size, 1);
+ unsigned vs_size = MAX2(brw->vs.prog_data->base.urb_entry_size, 1);
int nr_vs_entries = handle_region_size / (vs_size * 64);
if (nr_vs_entries > brw->urb.max_vs_entries)
diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c
index e8be4f2..1b97e8c 100644
--- a/src/mesa/drivers/dri/i965/gen7_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c
@@ -89,16 +89,16 @@ upload_vs_state(struct brw_context *brw)
OUT_BATCH(floating_point_mode |
((ALIGN(brw->sampler.count, 4)/4) << GEN6_VS_SAMPLER_COUNT_SHIFT));
- if (brw->vs.prog_data->total_scratch) {
+ if (brw->vs.prog_data->base.total_scratch) {
OUT_RELOC(brw->vs.scratch_bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
- ffs(brw->vs.prog_data->total_scratch) - 11);
+ ffs(brw->vs.prog_data->base.total_scratch) - 11);
} else {
OUT_BATCH(0);
}
OUT_BATCH((1 << GEN6_VS_DISPATCH_START_GRF_SHIFT) |
- (brw->vs.prog_data->urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) |
+ (brw->vs.prog_data->base.urb_read_length << GEN6_VS_URB_READ_LENGTH_SHIFT) |
(0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT));
OUT_BATCH(((brw->max_vs_threads - 1) << max_threads_shift) |
--
1.8.2.1
More information about the mesa-dev
mailing list