[Mesa-dev] [RFC PATCH] i965/gs: add snb support
Ilia Mirkin
imirkin at alum.mit.edu
Mon Feb 24 00:36:25 PST 2014
Before you read any further, this is nowhere close to working. However it's in
a state where I think most of the structure is there, albeit with a lot of XXX
comments. And I haven't actually implemented the new opcodes I've added.
I was hoping one or two Intel people could take a look at this and let me know
of any pitfalls I'm likely to run into. I've already gotten a lot of help and
advice from Ken, but wanted to put something out publicly.
Any and all feedback much appreciated!
Not-Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
---
src/mesa/drivers/dri/i965/Makefile.sources | 1 +
src/mesa/drivers/dri/i965/brw_defines.h | 13 +
src/mesa/drivers/dri/i965/brw_vec4.h | 1 +
src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 36 ++-
src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h | 5 +-
src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 14 ++
src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp | 285 ++++++++++++++++++++++
src/mesa/drivers/dri/i965/gen6_gs_visitor.h | 63 +++++
8 files changed, 405 insertions(+), 13 deletions(-)
create mode 100644 src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
create mode 100644 src/mesa/drivers/dri/i965/gen6_gs_visitor.h
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index dc30eb3..96b637f 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -119,6 +119,7 @@ i965_FILES = \
gen6_clip_state.c \
gen6_depthstencil.c \
gen6_gs_state.c \
+ gen6_gs_visitor.cpp \
gen6_multisample_state.c \
gen6_queryobj.c \
gen6_sampler_state.c \
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index 01d3cb6..a24919c 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -909,6 +909,19 @@ enum opcode {
* - dst is the GRF for gl_InvocationID.
*/
GS_OPCODE_GET_INSTANCE_ID,
+
+ /**
+ * Sets DWORD 2 of dst to the value in src DWORD 0. Used by geometry
+ * shaders to initialize DWORD 2 of the message header, which contains
+ * primitive start/end flags.
+ */
+ GS_OPCODE_SET_DWORD_2,
+
+ /**
+ * Emits a FF_SYNC, which on Gen6 returns a VUE handle, which is needed to
+ * emit state in GS.
+ */
+ GS_OPCODE_FF_SYNC,
};
enum brw_urb_write_flags {
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 6bd8b80..14d67b7 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -130,6 +130,7 @@ public:
bool is_one() const;
src_reg(class vec4_visitor *v, const struct glsl_type *type);
+ src_reg(class vec4_visitor *v, const struct glsl_type *type, int size);
explicit src_reg(dst_reg reg);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
index 0a2d8ff..ede9002 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
@@ -28,6 +28,7 @@
*/
#include "brw_vec4_gs_visitor.h"
+#include "gen6_gs_visitor.h"
const unsigned MAX_GS_INPUT_VERTICES = 6;
@@ -587,6 +588,8 @@ brw_gs_emit(struct brw_context *brw,
{
struct brw_shader *shader =
(brw_shader *) prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
+ vec4_gs_visitor *gs;
+ const unsigned *ret = NULL;
if (unlikely(INTEL_DEBUG & DEBUG_GS)) {
printf("GLSL IR for native geometry shader %d:\n", prog->Name);
@@ -602,12 +605,17 @@ brw_gs_emit(struct brw_context *brw,
likely(!(INTEL_DEBUG & DEBUG_NO_DUAL_OBJECT_GS))) {
c->prog_data.dual_instanced_dispatch = false;
- vec4_gs_visitor v(brw, c, prog, shader, mem_ctx, true /* no_spills */);
- if (v.run()) {
- return generate_assembly(brw, prog, &c->gp->program.Base,
- &c->prog_data.base, mem_ctx, &v.instructions,
- final_assembly_size);
+ if (brw->gen >= 7)
+ gs = new vec4_gs_visitor(brw, c, prog, shader, mem_ctx, true /* no_spills */);
+ else
+ gs = new gen6_gs_visitor(brw, c, prog, shader, mem_ctx, true /* no_spills */);
+ if (gs->run()) {
+ ret = generate_assembly(brw, prog, &c->gp->program.Base,
+ &c->prog_data.base, mem_ctx, &gs->instructions,
+ final_assembly_size);
+ goto done;
}
+ delete gs;
}
/* Either we failed to compile in DUAL_OBJECT mode (probably because it
@@ -622,15 +630,21 @@ brw_gs_emit(struct brw_context *brw,
*/
c->prog_data.dual_instanced_dispatch = true;
- vec4_gs_visitor v(brw, c, prog, shader, mem_ctx, false /* no_spills */);
- if (!v.run()) {
+ if (brw->gen >= 7)
+ gs = new vec4_gs_visitor(brw, c, prog, shader, mem_ctx, false /* no_spills */);
+ else
+ gs = new gen6_gs_visitor(brw, c, prog, shader, mem_ctx, false /* no_spills */);
+ if (!gs->run()) {
prog->LinkStatus = false;
- ralloc_strcat(&prog->InfoLog, v.fail_msg);
- return NULL;
+ ralloc_strcat(&prog->InfoLog, gs->fail_msg);
+ goto done;
}
- return generate_assembly(brw, prog, &c->gp->program.Base, &c->prog_data.base,
- mem_ctx, &v.instructions, final_assembly_size);
+ ret = generate_assembly(brw, prog, &c->gp->program.Base, &c->prog_data.base,
+ mem_ctx, &gs->instructions, final_assembly_size);
+done:
+ delete gs;
+ return ret;
}
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h
index 68756f7..7a4a262 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h
@@ -96,14 +96,15 @@ protected:
virtual void visit(ir_emit_vertex *);
virtual void visit(ir_end_primitive *);
+ src_reg vertex_count;
+ const struct brw_gs_compile * const c;
+
private:
int setup_varying_inputs(int payload_reg, int *attribute_map,
int attributes_per_reg);
void emit_control_data_bits();
- src_reg vertex_count;
src_reg control_data_bits;
- const struct brw_gs_compile * const c;
};
} /* namespace brw */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 601b364..0ebf118 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -616,6 +616,20 @@ src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
this->type = brw_type_for_base_type(type);
}
+src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type, int size)
+{
+ assert(size > 0);
+
+ init();
+
+ this->file = GRF;
+ this->reg = v->virtual_grf_alloc(type_size(type) * size);
+
+ this->swizzle = BRW_SWIZZLE_NOOP;
+
+ this->type = brw_type_for_base_type(type);
+}
+
dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
{
init();
diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
new file mode 100644
index 0000000..7be04d0
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
@@ -0,0 +1,285 @@
+/*
+ * Copyright © 2014 Ilia Mirkin
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file gen6_gs_visitor.cpp
+ *
+ * Gen6 geometry-shader-specific code, derived from the Gen7+ vec4_gs_visitor.
+ */
+
+#include "gen6_gs_visitor.h"
+
+namespace brw {
+
+void
+gen6_gs_visitor::emit_prolog()
+{
+ vec4_gs_visitor::emit_prolog();
+
+ /* vertex_output layout:
+ *
+ * This is an array that contains all the data emitted during the runtime
+ * of this GP. For each emitted vertex, there are vue_map.num_slots data
+ * items, and an extra register used to store flags. This register comes
+ * after the first num_slots. To match the URB_WRITE message header, bit 0
+ * is PrimEnd, and bit 1 is PrimStart. The next vertex then continues
+ * afterwards.
+ */
+ this->vertex_output = src_reg(this, glsl_type::uint_type,
+ (prog_data->vue_map.num_slots + 1) *
+ c->gp->program.VerticesOut);
+ this->vertex_output_offset = src_reg(this, glsl_type::uint_type);
+ emit(MOV(dst_reg(this->vertex_output_offset), src_reg(0u)));
+
+ this->first_vertex = src_reg(this, glsl_type::uint_type);
+ emit(MOV(dst_reg(this->first_vertex), src_reg(2u)));
+}
+
+void
+gen6_gs_visitor::visit(ir_emit_vertex *)
+{
+ /* copy all of the outputs into the temporary area */
+
+ unsigned num_output_vertices = c->gp->program.VerticesOut;
+ emit(CMP(dst_null_d(), this->vertex_count,
+ src_reg(num_output_vertices), BRW_CONDITIONAL_L));
+ emit(IF(BRW_PREDICATE_NORMAL));
+ {
+ for (int slot = 0; slot < prog_data->vue_map.num_slots; ++slot) {
+ dst_reg dst(this->vertex_output);
+ dst.reladdr = ralloc(mem_ctx, src_reg);
+ memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg));
+ /* xxx generalize emit_urb_slot and use that, otherwise gl_Position &
+ * co don't work */
+ emit_generic_urb_slot(dst, prog_data->vue_map.slot_to_varying[slot]);
+ emit(ADD(dst_reg(this->vertex_output_offset),
+ this->vertex_output_offset, src_reg(1u)));
+ }
+
+ /* XXX write the flags in a format that will make implementation of the
+ * "write flags" opcode easy.
+ */
+ dst_reg dst(this->vertex_output);
+ dst.reladdr = ralloc(mem_ctx, src_reg);
+ memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg));
+ if (c->gp->program.OutputType == GL_POINTS) {
+ /* Each point starts and ends the vertex */
+ emit(MOV(dst, src_reg(3u)));
+ } else {
+ /* Set the flags to first_vertex, which will be set to 2 when it's
+ * the first, or 0 if it's not the first. Zero out first_vertex so
+ * that future runs will work correctly as well.
+ */
+ emit(MOV(dst, this->first_vertex));
+ emit(MOV(dst_reg(this->first_vertex), src_reg(0u)));
+ }
+
+ emit(ADD(dst_reg(this->vertex_output_offset),
+ this->vertex_output_offset, src_reg(1u)));
+
+ emit(ADD(dst_reg(this->vertex_count),
+ this->vertex_count, src_reg(1u)));
+ }
+ emit(BRW_OPCODE_ENDIF);
+}
+
+void
+gen6_gs_visitor::visit(ir_end_primitive *)
+{
+ /* This has no effect for GL_POINTS */
+ if (c->gp->program.OutputType == GL_POINTS)
+ return;
+
+ /* mark the current vertex as ending the primitive */
+ unsigned num_output_vertices = c->gp->program.VerticesOut;
+ emit(CMP(dst_null_d(), this->vertex_count,
+ src_reg(num_output_vertices), BRW_CONDITIONAL_L));
+ emit(IF(BRW_PREDICATE_NORMAL));
+ {
+ /* vertex_output_offset is already pointing at the first entry of the
+ * next vertex. So subtract 1 to modify the flags for the previous
+ * vertex.
+ */
+ src_reg offset(this, glsl_type::uint_type);
+ emit(MOV(dst_reg(offset), this->vertex_output_offset));
+ emit(ADD(dst_reg(offset), offset, src_reg(~0u)));
+
+ src_reg dst(this->vertex_output);
+ dst.reladdr = ralloc(mem_ctx, src_reg);
+ memcpy(dst.reladdr, &offset, sizeof(src_reg));
+
+ /* Set the 0 bit to indicate that the primitive is ending */
+ emit(OR(dst_reg(dst), dst, src_reg(1u)));
+
+ /* Set the first vertex flag to indicate that the next vertex will start
+ * a primitive
+ */
+ emit(MOV(dst_reg(this->first_vertex), src_reg(2u)));
+ }
+ emit(BRW_OPCODE_ENDIF);
+}
+
+void
+gen6_gs_visitor::emit_thread_end()
+{
+ /* Now we take all of the vertex data previously written into
+ * vertex_output, and emit it for real. This involves looping through all
+ * the vertices, loading the relevant data back into the MRFs, and emitting
+ * write opcodes.
+ *
+ * Note that this has to be done before move_grf_array_access_to_scratch
+ * runs and does its magic, otherwise we have to manually deal with scratch
+ * space.
+ */
+
+ src_reg i(this, glsl_type::uint_type);
+ emit(MOV(dst_reg(i), src_reg(0u)));
+
+ src_reg offset(this, glsl_type::uint_type);
+ emit(MOV(dst_reg(offset), src_reg(0u)));
+
+ src_reg vue_handle(this, glsl_type::uint_type);
+ src_reg prim_flags(this, glsl_type::uint_type);
+
+ /* XXX check if FF_SYNC really needs the # of primitives generated, if it
+ * does, figure out exactly what it wants a counter of, and count it.
+ */
+ /* XXX look into SO and whether it interacts with this GS logic in any way.
+ */
+ emit(GS_OPCODE_FF_SYNC, dst_reg(vue_handle));
+
+ /* XXX double-check loop logic -- can this be done using a regular
+ * condition instead of a nested if inside an infinite do/while?
+ */
+ emit(BRW_OPCODE_DO);
+ {
+ emit(CMP(dst_null_d(), this->vertex_count, i, BRW_CONDITIONAL_L));
+ emit(IF(BRW_PREDICATE_NORMAL));
+ {
+ emit(BRW_OPCODE_BREAK);
+ }
+ emit(BRW_OPCODE_ENDIF);
+
+ /* Writes out a single vertex's worth data. This is basically the same
+ * as vec4_visitor::emit_vertex, except that it's using vertex_output as
+ * its source rather than the varying slots
+ */
+
+ int base_mrf = 1;
+ int mrf = base_mrf;
+ /* MRF 14/15 are used for spill handling */
+ int max_usable_mrf = 13;
+
+ dst_reg base_mrf_reg = dst_reg(MRF, base_mrf);
+ base_mrf_reg.type = BRW_REGISTER_TYPE_UD;
+
+ emit_urb_write_header(mrf++);
+
+ /* XXX double-check that these really are the right primitive ids */
+ unsigned prim_type;
+ switch (c->gp->program.OutputType) {
+ case GL_POINTS:
+ prim_type = _3DPRIM_POINTLIST << 2;
+ break;
+ case GL_LINE_STRIP:
+ prim_type = _3DPRIM_LINESTRIP << 2;
+ break;
+ case GL_TRIANGLE_STRIP:
+ prim_type = _3DPRIM_TRISTRIP << 2;
+ break;
+ default:
+ assert(!"Unexpected output type");
+ prim_type = 0;
+ break;
+ }
+
+ /* Location of the primitive flags for the current vertex */
+ src_reg flag_offset(this, glsl_type::uint_type);
+ emit(ADD(dst_reg(flag_offset),
+ offset, src_reg(prog_data->vue_map.num_slots)));
+
+ /* Pointer to the primitive flag data for the current vertex */
+ src_reg prim_data(this->vertex_output);
+ prim_data.reladdr = ralloc(mem_ctx, src_reg);
+ memcpy(&prim_data.reladdr, &flag_offset, sizeof(src_reg));
+
+ /* Combine the current vertex prim start/end flags with the primitive
+ * type
+ */
+ emit(OR(dst_reg(prim_flags), prim_data, src_reg(prim_type)));
+
+ emit(GS_OPCODE_SET_DWORD_2, base_mrf_reg, prim_flags);
+
+ src_reg data(this->vertex_output);
+ data.reladdr = ralloc(mem_ctx, src_reg);
+ memcpy(data.reladdr, &offset, sizeof(src_reg));
+
+ int slot = 0;
+ bool complete = false;
+ do {
+ /* URB offset is in URB row increments, and each of our MRFs is half of
+ * one of those, since we're doing interleaved writes.
+ */
+ int urb_offset = slot / 2;
+
+ mrf = base_mrf + 1;
+ for (; slot < prog_data->vue_map.num_slots; ++slot) {
+ dst_reg reg = dst_reg(MRF, mrf++);
+ reg.type = BRW_REGISTER_TYPE_F;
+
+ emit(MOV(reg, data));
+
+ /* XXX double-check that adding to offset will also alter the
+ * reladdr above, or if we need to create a fresh offset for each
+ * slot. */
+ emit(ADD(dst_reg(offset), offset, src_reg(1u)));
+
+ /* If this was max_usable_mrf, we can't fit anything more into this
+ * URB WRITE.
+ */
+ if (mrf > max_usable_mrf) {
+ slot++;
+ break;
+ }
+ }
+
+ complete = slot >= prog_data->vue_map.num_slots;
+ current_annotation = "URB write";
+ vec4_instruction *inst = emit_urb_write_opcode(complete);
+ inst->base_mrf = base_mrf;
+ inst->mlen = mrf - base_mrf;
+ if ((inst->mlen % 2) != 1)
+ inst->mlen++;
+ inst->offset += urb_offset;
+ } while (!complete);
+
+ emit(ADD(dst_reg(i), i, src_reg(1u)));
+ }
+ emit(BRW_OPCODE_WHILE);
+
+ /* XXX more thread ending logic here, perhaps similar to what
+ * vec4_gs_visitor::emit_thread_end does. Need to check the docs.
+ */
+}
+
+} /* namespace brw */
diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.h b/src/mesa/drivers/dri/i965/gen6_gs_visitor.h
new file mode 100644
index 0000000..b519438
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright © 2014 Ilia Mirkin
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file gen6_gs_visitor.h
+ *
+ * Gen6 geometry-shader-specific code, derived from the Gen7+ vec4_gs_visitor.
+ */
+
+#ifndef GEN6_GS_VISITOR_H
+#define GEN6_GS_VISITOR_H
+
+#include "brw_vec4.h"
+#include "brw_vec4_gs_visitor.h"
+
+#ifdef __cplusplus
+namespace brw {
+
+class gen6_gs_visitor : public vec4_gs_visitor {
+ public:
+ gen6_gs_visitor(struct brw_context *brw,
+ struct brw_gs_compile *c,
+ struct gl_shader_program *prog,
+ struct brw_shader *shader,
+ void *mem_ctx,
+ bool no_spills) :
+ vec4_gs_visitor(brw, c, prog, shader, mem_ctx, no_spills) {}
+ protected:
+ virtual void emit_prolog();
+ virtual void emit_thread_end();
+ virtual void visit(ir_emit_vertex *);
+ virtual void visit(ir_end_primitive *);
+
+ private:
+ src_reg vertex_output;
+ src_reg vertex_output_offset;
+ src_reg first_vertex;
+};
+
+} /* namespace brw */
+#endif /* __cplusplus */
+
+#endif /* GEN6_VS_VISITOR_H */
--
1.8.3.2
More information about the mesa-dev
mailing list