[Mesa-dev] [PATCH v2 11/13] i965: Add tessellation evaluation shaders
Kenneth Graunke
kenneth at whitecape.org
Fri Dec 11 13:24:00 PST 2015
The TES is essentially a post-tessellator VS, which has access to the
entire TCS output patch, and a special gl_TessCoord input. Otherwise,
they're very straightforward.
This patch implements SIMD8 tessellation evaluation shaders for Gen8+.
The tessellator can generate a lot of geometry, so operating in SIMD8
mode (8 vertices per thread) is more efficient than SIMD4x2 mode (only
2 vertices per thread). I have another patch which implements SIMD4x2
mode for older hardware (or via an environment variable override).
We currently handle all inputs via the pull model.
Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
---
src/mesa/drivers/dri/i965/Makefile.sources | 1 +
src/mesa/drivers/dri/i965/brw_compiler.h | 24 +++
src/mesa/drivers/dri/i965/brw_context.h | 6 +
src/mesa/drivers/dri/i965/brw_fs.cpp | 48 +++++
src/mesa/drivers/dri/i965/brw_fs.h | 10 +-
src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 121 +++++++++++
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 12 +-
src/mesa/drivers/dri/i965/brw_link.cpp | 4 +
src/mesa/drivers/dri/i965/brw_program.h | 2 +
src/mesa/drivers/dri/i965/brw_shader.cpp | 94 +++++++++
src/mesa/drivers/dri/i965/brw_shader.h | 3 +
src/mesa/drivers/dri/i965/brw_state_upload.c | 3 +
src/mesa/drivers/dri/i965/brw_tes.c | 300 +++++++++++++++++++++++++++
13 files changed, 625 insertions(+), 3 deletions(-)
create mode 100644 src/mesa/drivers/dri/i965/brw_tes.c
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index d147a73..7354aaf 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -151,6 +151,7 @@ i965_FILES = \
brw_state_upload.c \
brw_structs.h \
brw_tcs_surface_state.c \
+ brw_tes.c \
brw_tes_surface_state.c \
brw_tex.c \
brw_tex_layout.c \
diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h
index c9e0317..64d831d 100644
--- a/src/mesa/drivers/dri/i965/brw_compiler.h
+++ b/src/mesa/drivers/dri/i965/brw_compiler.h
@@ -191,6 +191,14 @@ struct brw_vs_prog_key {
struct brw_sampler_prog_key_data tex;
};
+/** The program key for Tessellation Evaluation Shaders. */
+struct brw_tes_prog_key
+{
+ unsigned program_string_id;
+
+ struct brw_sampler_prog_key_data tex;
+};
+
/** The program key for Geometry Shaders. */
struct brw_gs_prog_key
{
@@ -669,6 +677,22 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data,
char **error_str);
/**
+ * Compile a tessellation evaluation shader.
+ *
+ * Returns the final assembly and the program's size.
+ */
+const unsigned *
+brw_compile_tes(const struct brw_compiler *compiler, void *log_data,
+ void *mem_ctx,
+ const struct brw_tes_prog_key *key,
+ struct brw_tes_prog_data *prog_data,
+ const struct nir_shader *shader,
+ struct gl_shader_program *shader_prog,
+ int shader_time_index,
+ unsigned *final_assembly_size,
+ char **error_str);
+
+/**
* Compile a vertex shader.
*
* Returns the final assembly and the program's size.
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 69bc04c..5e840d1 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1704,6 +1704,12 @@ brw_vertex_program_const(const struct gl_vertex_program *p)
return (const struct brw_vertex_program *) p;
}
+static inline struct brw_tess_eval_program *
+brw_tess_eval_program(struct gl_tess_eval_program *p)
+{
+ return (struct brw_tess_eval_program *) p;
+}
+
static inline struct brw_geometry_program *
brw_geometry_program(struct gl_geometry_program *p)
{
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index c833ef0..de584e4 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1686,6 +1686,21 @@ fs_visitor::assign_vs_urb_setup()
}
void
+fs_visitor::assign_tes_urb_setup()
+{
+ assert(stage == MESA_SHADER_TESS_EVAL);
+
+ brw_vue_prog_data *vue_prog_data = (brw_vue_prog_data *) prog_data;
+
+ first_non_payload_grf += 8 * vue_prog_data->urb_read_length;
+
+ /* Rewrite all ATTR file references to HW_REGs. */
+ foreach_block_and_inst(block, fs_inst, inst, cfg) {
+ convert_attr_sources_to_hw_regs(inst);
+ }
+}
+
+void
fs_visitor::assign_gs_urb_setup()
{
assert(stage == MESA_SHADER_GEOMETRY);
@@ -5232,6 +5247,39 @@ fs_visitor::run_vs(gl_clip_plane *clip_planes)
}
bool
+fs_visitor::run_tes()
+{
+ assert(stage == MESA_SHADER_TESS_EVAL);
+
+ payload.num_regs = 5;
+
+ if (shader_time_index >= 0)
+ emit_shader_time_begin();
+
+ emit_nir_code();
+
+ if (failed)
+ return false;
+
+ emit_urb_writes();
+
+ if (shader_time_index >= 0)
+ emit_shader_time_end();
+
+ calculate_cfg();
+
+ optimize();
+
+ assign_curb_setup();
+ assign_tes_urb_setup();
+
+ fixup_3src_null_dest();
+ allocate_registers();
+
+ return !failed;
+}
+
+bool
fs_visitor::run_gs()
{
assert(stage == MESA_SHADER_GEOMETRY);
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index f2e3841..372f760 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -81,7 +81,8 @@ public:
struct gl_program *prog,
const nir_shader *shader,
unsigned dispatch_width,
- int shader_time_index);
+ int shader_time_index,
+ const struct brw_vue_map *input_vue_map = NULL);
fs_visitor(const struct brw_compiler *compiler, void *log_data,
void *mem_ctx,
struct brw_gs_compile *gs_compile,
@@ -109,6 +110,7 @@ public:
bool run_fs(bool do_rep_send);
bool run_vs(gl_clip_plane *clip_planes);
+ bool run_tes();
bool run_gs();
bool run_cs();
void optimize();
@@ -124,6 +126,7 @@ public:
void assign_urb_setup();
void convert_attr_sources_to_hw_regs(fs_inst *inst);
void assign_vs_urb_setup();
+ void assign_tes_urb_setup();
void assign_gs_urb_setup();
bool assign_regs(bool allow_spilling);
void assign_regs_trivial();
@@ -249,6 +252,8 @@ public:
nir_intrinsic_instr *instr);
void nir_emit_intrinsic(const brw::fs_builder &bld,
nir_intrinsic_instr *instr);
+ void nir_emit_tes_intrinsic(const brw::fs_builder &bld,
+ nir_intrinsic_instr *instr);
void nir_emit_ssbo_atomic(const brw::fs_builder &bld,
int op, nir_intrinsic_instr *instr);
void nir_emit_shared_atomic(const brw::fs_builder &bld,
@@ -260,6 +265,7 @@ public:
fs_reg get_nir_src(nir_src src);
fs_reg get_nir_dest(nir_dest dest);
fs_reg get_nir_image_deref(const nir_deref_var *deref);
+ fs_reg get_indirect_offset(nir_intrinsic_instr *instr);
void emit_percomp(const brw::fs_builder &bld, const fs_inst &inst,
unsigned wr_mask);
@@ -313,6 +319,8 @@ public:
struct brw_stage_prog_data *prog_data;
struct gl_program *prog;
+ const struct brw_vue_map *input_vue_map;
+
int *param_size;
int *virtual_grf_start;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index db38f61..fe87561 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -123,6 +123,7 @@ fs_visitor::nir_setup_outputs()
switch (stage) {
case MESA_SHADER_VERTEX:
+ case MESA_SHADER_TESS_EVAL:
case MESA_SHADER_GEOMETRY: {
unsigned location = var->data.location;
nir_setup_single_output_varying(®, var->type, &location);
@@ -443,6 +444,9 @@ fs_visitor::nir_emit_instr(nir_instr *instr)
case MESA_SHADER_VERTEX:
nir_emit_vs_intrinsic(abld, nir_instr_as_intrinsic(instr));
break;
+ case MESA_SHADER_TESS_EVAL:
+ nir_emit_tes_intrinsic(abld, nir_instr_as_intrinsic(instr));
+ break;
case MESA_SHADER_GEOMETRY:
nir_emit_gs_intrinsic(abld, nir_instr_as_intrinsic(instr));
break;
@@ -1715,6 +1719,24 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst,
}
}
+fs_reg
+fs_visitor::get_indirect_offset(nir_intrinsic_instr *instr)
+{
+ nir_src *offset_src = nir_get_io_offset_src(instr);
+ nir_const_value *const_value = nir_src_as_const_value(*offset_src);
+
+ if (const_value) {
+ /* The only constant offset we should find is 0. brw_nir.c's
+ * add_const_offset_to_base() will fold other constant offsets
+ * into instr->const_index[0].
+ */
+ assert(const_value->u[0] == 0);
+ return fs_reg();
+ }
+
+ return get_nir_src(*offset_src);
+}
+
void
fs_visitor::nir_emit_vs_intrinsic(const fs_builder &bld,
nir_intrinsic_instr *instr)
@@ -1747,6 +1769,105 @@ fs_visitor::nir_emit_vs_intrinsic(const fs_builder &bld,
}
void
+fs_visitor::nir_emit_tes_intrinsic(const fs_builder &bld,
+ nir_intrinsic_instr *instr)
+{
+ assert(stage == MESA_SHADER_TESS_EVAL);
+ struct brw_tes_prog_data *tes_prog_data = (struct brw_tes_prog_data *) prog_data;
+
+ fs_reg dest;
+ if (nir_intrinsic_infos[instr->intrinsic].has_dest)
+ dest = get_nir_dest(instr->dest);
+
+ switch (instr->intrinsic) {
+ case nir_intrinsic_load_primitive_id:
+ bld.MOV(dest, fs_reg(brw_vec1_grf(0, 1)));
+ break;
+ case nir_intrinsic_load_tess_coord:
+ /* gl_TessCoord is part of the payload in g1-3 */
+ for (unsigned i = 0; i < 3; i++) {
+ bld.MOV(offset(dest, bld, i), fs_reg(brw_vec8_grf(1 + i, 0)));
+ }
+ break;
+
+ case nir_intrinsic_load_tess_level_outer:
+ /* When the TES reads gl_TessLevelOuter, we ensure that the patch header
+ * appears as a push-model input. So, we can simply use the ATTR file
+ * rather than issuing URB read messages. Again, the data is stored
+ * in the high DWords in reverse order.
+ */
+ switch (tes_prog_data->domain) {
+ case BRW_TESS_DOMAIN_QUAD:
+ for (unsigned i = 0; i < 4; i++)
+ bld.MOV(offset(dest, bld, i), component(fs_reg(ATTR, 0), 7 - i));
+ break;
+ case BRW_TESS_DOMAIN_TRI:
+ for (unsigned i = 0; i < 3; i++)
+ bld.MOV(offset(dest, bld, i), component(fs_reg(ATTR, 0), 7 - i));
+ break;
+ case BRW_TESS_DOMAIN_ISOLINE:
+ for (unsigned i = 0; i < 2; i++)
+ bld.MOV(offset(dest, bld, i), component(fs_reg(ATTR, 0), 7 - i));
+ break;
+ }
+ break;
+
+ case nir_intrinsic_load_tess_level_inner:
+ /* When the TES reads gl_TessLevelInner, we ensure that the patch header
+ * appears as a push-model input. So, we can simply use the ATTR file
+ * rather than issuing URB read messages.
+ */
+ switch (tes_prog_data->domain) {
+ case BRW_TESS_DOMAIN_QUAD:
+ bld.MOV(dest, component(fs_reg(ATTR, 0), 3));
+ bld.MOV(offset(dest, bld, 1), component(fs_reg(ATTR, 0), 2));
+ break;
+ case BRW_TESS_DOMAIN_TRI:
+ bld.MOV(dest, component(fs_reg(ATTR, 0), 4));
+ break;
+ case BRW_TESS_DOMAIN_ISOLINE:
+ /* ignore - value is undefined */
+ break;
+ }
+ break;
+
+ case nir_intrinsic_load_input:
+ case nir_intrinsic_load_per_vertex_input: {
+ fs_reg indirect_offset = get_indirect_offset(instr);
+ unsigned imm_offset = instr->const_index[0];
+
+ fs_inst *inst;
+ if (indirect_offset.file == BAD_FILE) {
+ /* Replicate the patch handle to all enabled channels */
+ fs_reg patch_handle = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
+ bld.MOV(patch_handle, retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD));
+
+ inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dest, patch_handle);
+ inst->mlen = 1;
+ } else {
+ /* Indirect indexing - use per-slot offsets as well. */
+ const fs_reg srcs[] = {
+ retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD),
+ indirect_offset
+ };
+ fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, 2);
+ bld.LOAD_PAYLOAD(payload, srcs, ARRAY_SIZE(srcs), 0);
+
+ inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, dest, payload);
+ inst->mlen = 2;
+ }
+ inst->offset = imm_offset;
+ inst->base_mrf = -1;
+ inst->regs_written = instr->num_components;
+ break;
+ }
+ default:
+ nir_emit_intrinsic(bld, instr);
+ break;
+ }
+}
+
+void
fs_visitor::nir_emit_gs_intrinsic(const fs_builder &bld,
nir_intrinsic_instr *instr)
{
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 0582e78..b6405cd 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -700,7 +700,10 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
fs_reg sources[8];
fs_reg urb_handle;
- urb_handle = fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
+ if (stage == MESA_SHADER_TESS_EVAL)
+ urb_handle = fs_reg(retype(brw_vec8_grf(4, 0), BRW_REGISTER_TYPE_UD));
+ else
+ urb_handle = fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
/* If we don't have any valid slots to write, just do a minimal urb write
* send to terminate the shader. This includes 1 slot of undefined data,
@@ -934,9 +937,11 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data,
struct gl_program *prog,
const nir_shader *shader,
unsigned dispatch_width,
- int shader_time_index)
+ int shader_time_index,
+ const struct brw_vue_map *input_vue_map)
: backend_shader(compiler, log_data, mem_ctx, shader, prog_data),
key(key), gs_compile(NULL), prog_data(prog_data), prog(prog),
+ input_vue_map(input_vue_map),
dispatch_width(dispatch_width),
shader_time_index(shader_time_index),
bld(fs_builder(this, dispatch_width).at_end())
@@ -972,6 +977,9 @@ fs_visitor::init()
case MESA_SHADER_VERTEX:
key_tex = &((const brw_vs_prog_key *) key)->tex;
break;
+ case MESA_SHADER_TESS_EVAL:
+ key_tex = &((const brw_tes_prog_key *) key)->tex;
+ break;
case MESA_SHADER_GEOMETRY:
key_tex = &((const brw_gs_prog_key *) key)->tex;
break;
diff --git a/src/mesa/drivers/dri/i965/brw_link.cpp b/src/mesa/drivers/dri/i965/brw_link.cpp
index 31d29ec..f5a7d20 100644
--- a/src/mesa/drivers/dri/i965/brw_link.cpp
+++ b/src/mesa/drivers/dri/i965/brw_link.cpp
@@ -42,6 +42,7 @@ brw_shader_precompile(struct gl_context *ctx,
struct gl_shader_program *sh_prog)
{
struct gl_shader *vs = sh_prog->_LinkedShaders[MESA_SHADER_VERTEX];
+ struct gl_shader *tes = sh_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL];
struct gl_shader *gs = sh_prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
struct gl_shader *fs = sh_prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
struct gl_shader *cs = sh_prog->_LinkedShaders[MESA_SHADER_COMPUTE];
@@ -52,6 +53,9 @@ brw_shader_precompile(struct gl_context *ctx,
if (gs && !brw_gs_precompile(ctx, sh_prog, gs->Program))
return false;
+ if (tes && !brw_tes_precompile(ctx, sh_prog, tes->Program))
+ return false;
+
if (vs && !brw_vs_precompile(ctx, sh_prog, vs->Program))
return false;
diff --git a/src/mesa/drivers/dri/i965/brw_program.h b/src/mesa/drivers/dri/i965/brw_program.h
index 339b8e1..1cdab97 100644
--- a/src/mesa/drivers/dri/i965/brw_program.h
+++ b/src/mesa/drivers/dri/i965/brw_program.h
@@ -56,6 +56,8 @@ void
brw_dump_ir(const char *stage, struct gl_shader_program *shader_prog,
struct gl_shader *shader, struct gl_program *prog);
+void brw_upload_tes_prog(struct brw_context *brw);
+
#ifdef __cplusplus
} /* extern "C" */
#endif
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 7a6751b..d954568 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -24,6 +24,7 @@
#include "brw_context.h"
#include "brw_cfg.h"
#include "brw_eu.h"
+#include "brw_fs.h"
#include "brw_nir.h"
#include "glsl/glsl_parser_extras.h"
#include "main/shaderobj.h"
@@ -84,6 +85,7 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
compiler->scalar_stage[MESA_SHADER_VERTEX] =
devinfo->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS);
+ compiler->scalar_stage[MESA_SHADER_TESS_EVAL] = true;
compiler->scalar_stage[MESA_SHADER_GEOMETRY] =
devinfo->gen >= 8 && env_var_as_boolean("INTEL_SCALAR_GS", false);
compiler->scalar_stage[MESA_SHADER_FRAGMENT] = true;
@@ -135,6 +137,8 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
compiler->glsl_compiler_options[i].LowerBufferInterfaceBlocks = true;
}
+ compiler->glsl_compiler_options[MESA_SHADER_TESS_EVAL].EmitNoIndirectInput = false;
+
if (compiler->scalar_stage[MESA_SHADER_GEOMETRY])
compiler->glsl_compiler_options[MESA_SHADER_GEOMETRY].EmitNoIndirectInput = false;
@@ -1289,3 +1293,93 @@ gl_clip_plane *brw_select_clip_planes(struct gl_context *ctx)
}
}
+extern "C" const unsigned *
+brw_compile_tes(const struct brw_compiler *compiler,
+ void *log_data,
+ void *mem_ctx,
+ const struct brw_tes_prog_key *key,
+ struct brw_tes_prog_data *prog_data,
+ const nir_shader *src_shader,
+ struct gl_shader_program *shader_prog,
+ int shader_time_index,
+ unsigned *final_assembly_size,
+ char **error_str)
+{
+ const struct brw_device_info *devinfo = compiler->devinfo;
+ struct gl_shader *shader =
+ shader_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL];
+ const bool is_scalar = compiler->scalar_stage[MESA_SHADER_TESS_EVAL];
+
+ nir_shader *nir = nir_shader_clone(mem_ctx, src_shader);
+ nir = brw_nir_apply_sampler_key(nir, devinfo, &key->tex, is_scalar);
+ nir = brw_postprocess_nir(nir, compiler->devinfo, is_scalar);
+
+ brw_compute_vue_map(devinfo, &prog_data->base.vue_map,
+ nir->info.outputs_written,
+ nir->info.separate_shader);
+
+ unsigned output_size_bytes = prog_data->base.vue_map.num_slots * 4 * 4;
+
+ assert(output_size_bytes >= 1);
+ if (output_size_bytes > GEN7_MAX_DS_URB_ENTRY_SIZE_BYTES) {
+ if (error_str)
+ *error_str = ralloc_strdup(mem_ctx, "DS outputs exceed maximum size");
+ return NULL;
+ }
+
+ /* URB entry sizes are stored as a multiple of 64 bytes. */
+ prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
+
+ struct brw_vue_map input_vue_map;
+ brw_compute_tess_vue_map(&input_vue_map,
+ nir->info.inputs_read & ~VARYING_BIT_PRIMITIVE_ID,
+ nir->info.patch_inputs_read);
+
+ bool need_patch_header = nir->info.system_values_read &
+ (BITFIELD64_BIT(SYSTEM_VALUE_TESS_LEVEL_OUTER) |
+ BITFIELD64_BIT(SYSTEM_VALUE_TESS_LEVEL_INNER));
+
+ /* The TES will pull most inputs using URB read messages.
+ *
+ * However, we push the patch header for TessLevel factors when required,
+ * as it's a tiny amount of extra data.
+ */
+ prog_data->base.urb_read_length = need_patch_header ? 1 : 0;
+
+ if (unlikely(INTEL_DEBUG & DEBUG_TES)) {
+ fprintf(stderr, "TES Input ");
+ brw_print_vue_map(stderr, &input_vue_map);
+ fprintf(stderr, "TES Output ");
+ brw_print_vue_map(stderr, &prog_data->base.vue_map);
+ }
+
+ if (is_scalar) {
+ fs_visitor v(compiler, log_data, mem_ctx, (void *) key,
+ &prog_data->base.base, shader->Program, nir, 8,
+ shader_time_index, &input_vue_map);
+ if (!v.run_tes()) {
+ if (error_str)
+ *error_str = ralloc_strdup(mem_ctx, v.fail_msg);
+ return NULL;
+ }
+
+ prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8;
+
+ fs_generator g(compiler, log_data, mem_ctx, (void *) key,
+ &prog_data->base.base, v.promoted_constants, false,
+ "TES");
+ if (unlikely(INTEL_DEBUG & DEBUG_TES)) {
+ g.enable_debug(ralloc_asprintf(mem_ctx,
+ "%s tessellation evaluation shader %s",
+ nir->info.label ? nir->info.label
+ : "unnamed",
+ nir->info.name));
+ }
+
+ g.generate_code(v.cfg, 8);
+
+ return g.get_assembly(final_assembly_size);
+ } else {
+ unreachable("XXX: vec4 tessellation evalation shaders not merged yet.");
+ }
+}
diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h
index 8c5778f..2e73f12 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.h
+++ b/src/mesa/drivers/dri/i965/brw_shader.h
@@ -273,6 +273,9 @@ brw_assign_common_binding_table_offsets(gl_shader_stage stage,
bool brw_vs_precompile(struct gl_context *ctx,
struct gl_shader_program *shader_prog,
struct gl_program *prog);
+bool brw_tes_precompile(struct gl_context *ctx,
+ struct gl_shader_program *shader_prog,
+ struct gl_program *prog);
bool brw_gs_precompile(struct gl_context *ctx,
struct gl_shader_program *shader_prog,
struct gl_program *prog);
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index cf3cf97..c657b25 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -678,6 +678,7 @@ brw_upload_programs(struct brw_context *brw,
{
if (pipeline == BRW_RENDER_PIPELINE) {
brw_upload_vs_prog(brw);
+ brw_upload_tes_prog(brw);
if (brw->gen < 6)
brw_upload_ff_gs_prog(brw);
@@ -691,6 +692,8 @@ brw_upload_programs(struct brw_context *brw,
bool old_separate = brw->vue_map_geom_out.separate;
if (brw->geometry_program)
brw->vue_map_geom_out = brw->gs.prog_data->base.vue_map;
+ else if (brw->tess_eval_program)
+ brw->vue_map_geom_out = brw->tes.prog_data->base.vue_map;
else
brw->vue_map_geom_out = brw->vs.prog_data->base.vue_map;
diff --git a/src/mesa/drivers/dri/i965/brw_tes.c b/src/mesa/drivers/dri/i965/brw_tes.c
new file mode 100644
index 0000000..daa8f86
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_tes.c
@@ -0,0 +1,300 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file brw_tes.c
+ *
+ * Tessellation evaluation shader state upload code.
+ */
+
+#include "brw_context.h"
+#include "brw_nir.h"
+#include "brw_program.h"
+#include "brw_shader.h"
+#include "brw_state.h"
+#include "program/prog_parameter.h"
+
+static void
+brw_tes_debug_recompile(struct brw_context *brw,
+ struct gl_shader_program *shader_prog,
+ const struct brw_tes_prog_key *key)
+{
+ struct brw_cache_item *c = NULL;
+ const struct brw_tes_prog_key *old_key = NULL;
+ bool found = false;
+
+ perf_debug("Recompiling tessellation evaluation shader for program %d\n",
+ shader_prog->Name);
+
+ for (unsigned int i = 0; i < brw->cache.size; i++) {
+ for (c = brw->cache.items[i]; c; c = c->next) {
+ if (c->cache_id == BRW_CACHE_TES_PROG) {
+ old_key = c->key;
+
+ if (old_key->program_string_id == key->program_string_id)
+ break;
+ }
+ }
+ if (c)
+ break;
+ }
+
+ if (!c) {
+ perf_debug(" Didn't find previous compile in the shader cache for "
+ "debug\n");
+ return;
+ }
+
+ found |= brw_debug_recompile_sampler_key(brw, &old_key->tex, &key->tex);
+
+ if (!found) {
+ perf_debug(" Something else\n");
+ }
+}
+
+static bool
+brw_codegen_tes_prog(struct brw_context *brw,
+ struct gl_shader_program *shader_prog,
+ struct brw_tess_eval_program *tep,
+ struct brw_tes_prog_key *key)
+{
+ const struct brw_compiler *compiler = brw->intelScreen->compiler;
+ const struct brw_device_info *devinfo = brw->intelScreen->devinfo;
+ struct brw_stage_state *stage_state = &brw->tes.base;
+ nir_shader *nir = tep->program.Base.nir;
+ struct brw_tes_prog_data prog_data;
+ bool start_busy = false;
+ double start_time = 0;
+
+ memset(&prog_data, 0, sizeof(prog_data));
+
+ brw_assign_common_binding_table_offsets(MESA_SHADER_TESS_EVAL, devinfo,
+ shader_prog, &tep->program.Base,
+ &prog_data.base.base, 0);
+
+ switch (tep->program.Spacing) {
+ case GL_EQUAL:
+ prog_data.partitioning = BRW_TESS_PARTITIONING_INTEGER;
+ break;
+ case GL_FRACTIONAL_ODD:
+ prog_data.partitioning = BRW_TESS_PARTITIONING_ODD_FRACTIONAL;
+ break;
+ case GL_FRACTIONAL_EVEN:
+ prog_data.partitioning = BRW_TESS_PARTITIONING_EVEN_FRACTIONAL;
+ break;
+ default:
+ unreachable("invalid domain shader spacing");
+ }
+
+ switch (tep->program.PrimitiveMode) {
+ case GL_QUADS:
+ prog_data.domain = BRW_TESS_DOMAIN_QUAD;
+ break;
+ case GL_TRIANGLES:
+ prog_data.domain = BRW_TESS_DOMAIN_TRI;
+ break;
+ case GL_ISOLINES:
+ prog_data.domain = BRW_TESS_DOMAIN_ISOLINE;
+ break;
+ default:
+ unreachable("invalid domain shader primitive mode");
+ }
+
+ if (tep->program.PointMode) {
+ prog_data.output_topology = BRW_TESS_OUTPUT_TOPOLOGY_POINT;
+ } else if (tep->program.PrimitiveMode == GL_ISOLINES) {
+ prog_data.output_topology = BRW_TESS_OUTPUT_TOPOLOGY_LINE;
+ } else {
+ /* Hardware winding order is backwards from OpenGL */
+ switch (tep->program.VertexOrder) {
+ case GL_CCW:
+ prog_data.output_topology = BRW_TESS_OUTPUT_TOPOLOGY_TRI_CW;
+ break;
+ case GL_CW:
+ prog_data.output_topology = BRW_TESS_OUTPUT_TOPOLOGY_TRI_CCW;
+ break;
+ default:
+ unreachable("invalid domain shader vertex order");
+ }
+ }
+
+ /* Allocate the references to the uniforms that will end up in the
+ * prog_data associated with the compiled program, and which will be freed
+ * by the state cache.
+ *
+ * Note: param_count needs to be num_uniform_components * 4, since we add
+ * padding around uniform values below vec4 size, so the worst case is that
+ * every uniform is a float which gets padded to the size of a vec4.
+ */
+ struct gl_shader *tes = shader_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL];
+ int param_count = nir->num_uniforms;
+ if (!compiler->scalar_stage[MESA_SHADER_TESS_EVAL])
+ param_count *= 4;
+
+ prog_data.base.base.param =
+ rzalloc_array(NULL, const gl_constant_value *, param_count);
+ prog_data.base.base.pull_param =
+ rzalloc_array(NULL, const gl_constant_value *, param_count);
+ prog_data.base.base.image_param =
+ rzalloc_array(NULL, struct brw_image_param, tes->NumImages);
+ prog_data.base.base.nr_params = param_count;
+ prog_data.base.base.nr_image_params = tes->NumImages;
+
+ brw_nir_setup_glsl_uniforms(nir, shader_prog, &tep->program.Base,
+ &prog_data.base.base,
+ compiler->scalar_stage[MESA_SHADER_TESS_EVAL]);
+
+ if (unlikely(INTEL_DEBUG & DEBUG_TES))
+ brw_dump_ir("tessellation evaluation", shader_prog, tes, NULL);
+
+ int st_index = -1;
+ if (unlikely(INTEL_DEBUG & DEBUG_SHADER_TIME))
+ st_index = brw_get_shader_time_index(brw, shader_prog, NULL, ST_TES);
+
+ if (unlikely(brw->perf_debug)) {
+ start_busy = brw->batch.last_bo && drm_intel_bo_busy(brw->batch.last_bo);
+ start_time = get_time();
+ }
+
+ void *mem_ctx = ralloc_context(NULL);
+ unsigned program_size;
+ char *error_str;
+ const unsigned *program =
+ brw_compile_tes(compiler, brw, mem_ctx, key, &prog_data, nir,
+ shader_prog, st_index, &program_size, &error_str);
+ if (program == NULL) {
+ if (shader_prog) {
+ shader_prog->LinkStatus = false;
+ ralloc_strcat(&shader_prog->InfoLog, error_str);
+ }
+
+ _mesa_problem(NULL, "Failed to compile tessellation evaluation shader: "
+ "%s\n", error_str);
+
+ ralloc_free(mem_ctx);
+ return false;
+ }
+
+ if (unlikely(brw->perf_debug)) {
+ struct brw_shader *btes = (struct brw_shader *) tes;
+ if (btes->compiled_once) {
+ brw_tes_debug_recompile(brw, shader_prog, key);
+ }
+ if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) {
+ perf_debug("TES compile took %.03f ms and stalled the GPU\n",
+ (get_time() - start_time) * 1000);
+ }
+ btes->compiled_once = true;
+ }
+
+ /* Scratch space is used for register spilling */
+ if (prog_data.base.base.total_scratch) {
+ brw_get_scratch_bo(brw, &stage_state->scratch_bo,
+ prog_data.base.base.total_scratch *
+ brw->max_ds_threads);
+ }
+
+ brw_upload_cache(&brw->cache, BRW_CACHE_TES_PROG,
+ key, sizeof(*key),
+ program, program_size,
+ &prog_data, sizeof(prog_data),
+ &stage_state->prog_offset, &brw->tes.prog_data);
+ ralloc_free(mem_ctx);
+
+ return true;
+}
+
+
+void
+brw_upload_tes_prog(struct brw_context *brw)
+{
+ struct gl_context *ctx = &brw->ctx;
+ struct gl_shader_program **current = ctx->_Shader->CurrentProgram;
+ struct brw_stage_state *stage_state = &brw->tes.base;
+ struct brw_tes_prog_key key;
+ /* BRW_NEW_TESS_EVAL_PROGRAM */
+ struct brw_tess_eval_program *tep =
+ (struct brw_tess_eval_program *) brw->tess_eval_program;
+
+ if (!brw_state_dirty(brw,
+ _NEW_TEXTURE,
+ BRW_NEW_TESS_EVAL_PROGRAM))
+ return;
+
+ if (tep == NULL) {
+ /* Other state atoms had better not try to access prog_data, since
+ * there's no TES program.
+ */
+ brw->tes.prog_data = NULL;
+ brw->tes.base.prog_data = NULL;
+ return;
+ }
+
+ struct gl_program *prog = &tep->program.Base;
+
+ memset(&key, 0, sizeof(key));
+
+ key.program_string_id = tep->id;
+
+ /* _NEW_TEXTURE */
+ brw_populate_sampler_prog_key_data(ctx, prog, stage_state->sampler_count,
+ &key.tex);
+
+ if (!brw_search_cache(&brw->cache, BRW_CACHE_TES_PROG,
+ &key, sizeof(key),
+ &stage_state->prog_offset, &brw->tes.prog_data)) {
+ bool success = brw_codegen_tes_prog(brw, current[MESA_SHADER_TESS_EVAL],
+ tep, &key);
+ assert(success);
+ (void)success;
+ }
+ brw->tes.base.prog_data = &brw->tes.prog_data->base.base;
+}
+
+
+bool
+brw_tes_precompile(struct gl_context *ctx,
+ struct gl_shader_program *shader_prog,
+ struct gl_program *prog)
+{
+ struct brw_context *brw = brw_context(ctx);
+ struct brw_tes_prog_key key;
+ uint32_t old_prog_offset = brw->tes.base.prog_offset;
+ struct brw_tes_prog_data *old_prog_data = brw->tes.prog_data;
+ bool success;
+
+ struct gl_tess_eval_program *tep = (struct gl_tess_eval_program *)prog;
+ struct brw_tess_eval_program *btep = brw_tess_eval_program(tep);
+
+ memset(&key, 0, sizeof(key));
+
+ key.program_string_id = btep->id;
+ brw_setup_tex_for_precompile(brw, &key.tex, prog);
+
+ success = brw_codegen_tes_prog(brw, shader_prog, btep, &key);
+
+ brw->tes.base.prog_offset = old_prog_offset;
+ brw->tes.prog_data = old_prog_data;
+
+ return success;
+}
--
2.6.3
More information about the mesa-dev
mailing list