[Mesa-dev] [PATCH v2 11/13] i965: Add tessellation evaluation shaders
Jordan Justen
jordan.l.justen at intel.com
Mon Dec 14 01:24:37 PST 2015
Whew... I probably would have split this one into 5 or so. Then krh
would have grumbled at me. ;)
On 2015-12-11 13:24:00, Kenneth Graunke wrote:
> The TES is essentially a post-tessellator VS, which has access to the
> entire TCS output patch, and a special gl_TessCoord input. Otherwise,
> they're very straightforward.
>
> This patch implements SIMD8 tessellation evaluation shaders for Gen8+.
> The tessellator can generate a lot of geometry, so operating in SIMD8
> mode (8 vertices per thread) is more efficient than SIMD4x2 mode (only
> 2 vertices per thread). I have another patch which implements SIMD4x2
> mode for older hardware (or via an environment variable override).
>
> We currently handle all inputs via the pull model.
>
> Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
> ---
> src/mesa/drivers/dri/i965/Makefile.sources | 1 +
> src/mesa/drivers/dri/i965/brw_compiler.h | 24 +++
> src/mesa/drivers/dri/i965/brw_context.h | 6 +
> src/mesa/drivers/dri/i965/brw_fs.cpp | 48 +++++
> src/mesa/drivers/dri/i965/brw_fs.h | 10 +-
> src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 121 +++++++++++
> src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 12 +-
> src/mesa/drivers/dri/i965/brw_link.cpp | 4 +
> src/mesa/drivers/dri/i965/brw_program.h | 2 +
> src/mesa/drivers/dri/i965/brw_shader.cpp | 94 +++++++++
> src/mesa/drivers/dri/i965/brw_shader.h | 3 +
> src/mesa/drivers/dri/i965/brw_state_upload.c | 3 +
> src/mesa/drivers/dri/i965/brw_tes.c | 300 +++++++++++++++++++++++++++
> 13 files changed, 625 insertions(+), 3 deletions(-)
> create mode 100644 src/mesa/drivers/dri/i965/brw_tes.c
>
> diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
> index d147a73..7354aaf 100644
> --- a/src/mesa/drivers/dri/i965/Makefile.sources
> +++ b/src/mesa/drivers/dri/i965/Makefile.sources
> @@ -151,6 +151,7 @@ i965_FILES = \
> brw_state_upload.c \
> brw_structs.h \
> brw_tcs_surface_state.c \
> + brw_tes.c \
> brw_tes_surface_state.c \
> brw_tex.c \
> brw_tex_layout.c \
> diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h
> index c9e0317..64d831d 100644
> --- a/src/mesa/drivers/dri/i965/brw_compiler.h
> +++ b/src/mesa/drivers/dri/i965/brw_compiler.h
> @@ -191,6 +191,14 @@ struct brw_vs_prog_key {
> struct brw_sampler_prog_key_data tex;
> };
>
> +/** The program key for Tessellation Evaluation Shaders. */
> +struct brw_tes_prog_key
> +{
> + unsigned program_string_id;
> +
> + struct brw_sampler_prog_key_data tex;
> +};
> +
> /** The program key for Geometry Shaders. */
> struct brw_gs_prog_key
> {
> @@ -669,6 +677,22 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data,
> char **error_str);
>
> /**
> + * Compile a tessellation evaluation shader.
> + *
> + * Returns the final assembly and the program's size.
> + */
> +const unsigned *
> +brw_compile_tes(const struct brw_compiler *compiler, void *log_data,
> + void *mem_ctx,
> + const struct brw_tes_prog_key *key,
> + struct brw_tes_prog_data *prog_data,
> + const struct nir_shader *shader,
> + struct gl_shader_program *shader_prog,
> + int shader_time_index,
> + unsigned *final_assembly_size,
> + char **error_str);
> +
> +/**
> * Compile a vertex shader.
> *
> * Returns the final assembly and the program's size.
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
> index 69bc04c..5e840d1 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -1704,6 +1704,12 @@ brw_vertex_program_const(const struct gl_vertex_program *p)
> return (const struct brw_vertex_program *) p;
> }
>
> +static inline struct brw_tess_eval_program *
> +brw_tess_eval_program(struct gl_tess_eval_program *p)
> +{
> + return (struct brw_tess_eval_program *) p;
> +}
> +
> static inline struct brw_geometry_program *
> brw_geometry_program(struct gl_geometry_program *p)
> {
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index c833ef0..de584e4 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -1686,6 +1686,21 @@ fs_visitor::assign_vs_urb_setup()
> }
>
> void
> +fs_visitor::assign_tes_urb_setup()
> +{
> + assert(stage == MESA_SHADER_TESS_EVAL);
> +
> + brw_vue_prog_data *vue_prog_data = (brw_vue_prog_data *) prog_data;
> +
> + first_non_payload_grf += 8 * vue_prog_data->urb_read_length;
> +
> + /* Rewrite all ATTR file references to HW_REGs. */
> + foreach_block_and_inst(block, fs_inst, inst, cfg) {
> + convert_attr_sources_to_hw_regs(inst);
> + }
> +}
> +
> +void
> fs_visitor::assign_gs_urb_setup()
> {
> assert(stage == MESA_SHADER_GEOMETRY);
> @@ -5232,6 +5247,39 @@ fs_visitor::run_vs(gl_clip_plane *clip_planes)
> }
>
> bool
> +fs_visitor::run_tes()
> +{
> + assert(stage == MESA_SHADER_TESS_EVAL);
> +
> + payload.num_regs = 5;
How about a comment like setup_vs_payload has?
Does TessLevel not being accessed affect this? (I'm a little confused
about what happens when TessLevel is or isn't accessed.)
> +
> + if (shader_time_index >= 0)
> + emit_shader_time_begin();
> +
> + emit_nir_code();
> +
> + if (failed)
> + return false;
> +
> + emit_urb_writes();
> +
> + if (shader_time_index >= 0)
> + emit_shader_time_end();
> +
> + calculate_cfg();
> +
> + optimize();
> +
> + assign_curb_setup();
> + assign_tes_urb_setup();
> +
> + fixup_3src_null_dest();
> + allocate_registers();
> +
> + return !failed;
> +}
> +
> +bool
> fs_visitor::run_gs()
> {
> assert(stage == MESA_SHADER_GEOMETRY);
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
> index f2e3841..372f760 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.h
> +++ b/src/mesa/drivers/dri/i965/brw_fs.h
> @@ -81,7 +81,8 @@ public:
> struct gl_program *prog,
> const nir_shader *shader,
> unsigned dispatch_width,
> - int shader_time_index);
> + int shader_time_index,
> + const struct brw_vue_map *input_vue_map = NULL);
> fs_visitor(const struct brw_compiler *compiler, void *log_data,
> void *mem_ctx,
> struct brw_gs_compile *gs_compile,
> @@ -109,6 +110,7 @@ public:
>
> bool run_fs(bool do_rep_send);
> bool run_vs(gl_clip_plane *clip_planes);
> + bool run_tes();
> bool run_gs();
> bool run_cs();
> void optimize();
> @@ -124,6 +126,7 @@ public:
> void assign_urb_setup();
> void convert_attr_sources_to_hw_regs(fs_inst *inst);
> void assign_vs_urb_setup();
> + void assign_tes_urb_setup();
> void assign_gs_urb_setup();
> bool assign_regs(bool allow_spilling);
> void assign_regs_trivial();
> @@ -249,6 +252,8 @@ public:
> nir_intrinsic_instr *instr);
> void nir_emit_intrinsic(const brw::fs_builder &bld,
> nir_intrinsic_instr *instr);
> + void nir_emit_tes_intrinsic(const brw::fs_builder &bld,
> + nir_intrinsic_instr *instr);
> void nir_emit_ssbo_atomic(const brw::fs_builder &bld,
> int op, nir_intrinsic_instr *instr);
> void nir_emit_shared_atomic(const brw::fs_builder &bld,
> @@ -260,6 +265,7 @@ public:
> fs_reg get_nir_src(nir_src src);
> fs_reg get_nir_dest(nir_dest dest);
> fs_reg get_nir_image_deref(const nir_deref_var *deref);
> + fs_reg get_indirect_offset(nir_intrinsic_instr *instr);
> void emit_percomp(const brw::fs_builder &bld, const fs_inst &inst,
> unsigned wr_mask);
>
> @@ -313,6 +319,8 @@ public:
> struct brw_stage_prog_data *prog_data;
> struct gl_program *prog;
>
> + const struct brw_vue_map *input_vue_map;
> +
> int *param_size;
>
> int *virtual_grf_start;
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> index db38f61..fe87561 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> @@ -123,6 +123,7 @@ fs_visitor::nir_setup_outputs()
>
> switch (stage) {
> case MESA_SHADER_VERTEX:
> + case MESA_SHADER_TESS_EVAL:
> case MESA_SHADER_GEOMETRY: {
> unsigned location = var->data.location;
> nir_setup_single_output_varying(®, var->type, &location);
> @@ -443,6 +444,9 @@ fs_visitor::nir_emit_instr(nir_instr *instr)
> case MESA_SHADER_VERTEX:
> nir_emit_vs_intrinsic(abld, nir_instr_as_intrinsic(instr));
> break;
> + case MESA_SHADER_TESS_EVAL:
> + nir_emit_tes_intrinsic(abld, nir_instr_as_intrinsic(instr));
> + break;
> case MESA_SHADER_GEOMETRY:
> nir_emit_gs_intrinsic(abld, nir_instr_as_intrinsic(instr));
> break;
> @@ -1715,6 +1719,24 @@ fs_visitor::emit_gs_input_load(const fs_reg &dst,
> }
> }
>
> +fs_reg
> +fs_visitor::get_indirect_offset(nir_intrinsic_instr *instr)
> +{
> + nir_src *offset_src = nir_get_io_offset_src(instr);
> + nir_const_value *const_value = nir_src_as_const_value(*offset_src);
> +
> + if (const_value) {
> + /* The only constant offset we should find is 0. brw_nir.c's
> + * add_const_offset_to_base() will fold other constant offsets
> + * into instr->const_index[0].
> + */
> + assert(const_value->u[0] == 0);
> + return fs_reg();
> + }
> +
> + return get_nir_src(*offset_src);
> +}
> +
> void
> fs_visitor::nir_emit_vs_intrinsic(const fs_builder &bld,
> nir_intrinsic_instr *instr)
> @@ -1747,6 +1769,105 @@ fs_visitor::nir_emit_vs_intrinsic(const fs_builder &bld,
> }
>
> void
> +fs_visitor::nir_emit_tes_intrinsic(const fs_builder &bld,
> + nir_intrinsic_instr *instr)
> +{
> + assert(stage == MESA_SHADER_TESS_EVAL);
> + struct brw_tes_prog_data *tes_prog_data = (struct brw_tes_prog_data *) prog_data;
> +
> + fs_reg dest;
> + if (nir_intrinsic_infos[instr->intrinsic].has_dest)
> + dest = get_nir_dest(instr->dest);
> +
> + switch (instr->intrinsic) {
> + case nir_intrinsic_load_primitive_id:
> + bld.MOV(dest, fs_reg(brw_vec1_grf(0, 1)));
> + break;
> + case nir_intrinsic_load_tess_coord:
> + /* gl_TessCoord is part of the payload in g1-3 */
> + for (unsigned i = 0; i < 3; i++) {
> + bld.MOV(offset(dest, bld, i), fs_reg(brw_vec8_grf(1 + i, 0)));
> + }
> + break;
> +
> + case nir_intrinsic_load_tess_level_outer:
> + /* When the TES reads gl_TessLevelOuter, we ensure that the patch header
> + * appears as a push-model input. So, we can simply use the ATTR file
> + * rather than issuing URB read messages. Again, the data is stored
> + * in the high DWords in reverse order.
I'm not sure what the 'Again' part is referencing.
> + */
> + switch (tes_prog_data->domain) {
> + case BRW_TESS_DOMAIN_QUAD:
> + for (unsigned i = 0; i < 4; i++)
> + bld.MOV(offset(dest, bld, i), component(fs_reg(ATTR, 0), 7 - i));
> + break;
> + case BRW_TESS_DOMAIN_TRI:
> + for (unsigned i = 0; i < 3; i++)
> + bld.MOV(offset(dest, bld, i), component(fs_reg(ATTR, 0), 7 - i));
> + break;
> + case BRW_TESS_DOMAIN_ISOLINE:
> + for (unsigned i = 0; i < 2; i++)
> + bld.MOV(offset(dest, bld, i), component(fs_reg(ATTR, 0), 7 - i));
> + break;
> + }
> + break;
> +
> + case nir_intrinsic_load_tess_level_inner:
> + /* When the TES reads gl_TessLevelInner, we ensure that the patch header
> + * appears as a push-model input. So, we can simply use the ATTR file
> + * rather than issuing URB read messages.
> + */
> + switch (tes_prog_data->domain) {
> + case BRW_TESS_DOMAIN_QUAD:
> + bld.MOV(dest, component(fs_reg(ATTR, 0), 3));
> + bld.MOV(offset(dest, bld, 1), component(fs_reg(ATTR, 0), 2));
> + break;
> + case BRW_TESS_DOMAIN_TRI:
> + bld.MOV(dest, component(fs_reg(ATTR, 0), 4));
> + break;
> + case BRW_TESS_DOMAIN_ISOLINE:
> + /* ignore - value is undefined */
> + break;
> + }
> + break;
> +
> + case nir_intrinsic_load_input:
> + case nir_intrinsic_load_per_vertex_input: {
> + fs_reg indirect_offset = get_indirect_offset(instr);
> + unsigned imm_offset = instr->const_index[0];
> +
> + fs_inst *inst;
> + if (indirect_offset.file == BAD_FILE) {
> + /* Replicate the patch handle to all enabled channels */
> + fs_reg patch_handle = bld.vgrf(BRW_REGISTER_TYPE_UD, 1);
> + bld.MOV(patch_handle, retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD));
> +
> + inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dest, patch_handle);
> + inst->mlen = 1;
> + } else {
> + /* Indirect indexing - use per-slot offsets as well. */
> + const fs_reg srcs[] = {
> + retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD),
> + indirect_offset
> + };
> + fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, 2);
> + bld.LOAD_PAYLOAD(payload, srcs, ARRAY_SIZE(srcs), 0);
> +
> + inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT, dest, payload);
> + inst->mlen = 2;
> + }
> + inst->offset = imm_offset;
> + inst->base_mrf = -1;
> + inst->regs_written = instr->num_components;
> + break;
> + }
> + default:
> + nir_emit_intrinsic(bld, instr);
> + break;
> + }
> +}
> +
> +void
> fs_visitor::nir_emit_gs_intrinsic(const fs_builder &bld,
> nir_intrinsic_instr *instr)
> {
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> index 0582e78..b6405cd 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> @@ -700,7 +700,10 @@ fs_visitor::emit_urb_writes(const fs_reg &gs_vertex_count)
> fs_reg sources[8];
> fs_reg urb_handle;
>
> - urb_handle = fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
> + if (stage == MESA_SHADER_TESS_EVAL)
> + urb_handle = fs_reg(retype(brw_vec8_grf(4, 0), BRW_REGISTER_TYPE_UD));
> + else
> + urb_handle = fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
>
> /* If we don't have any valid slots to write, just do a minimal urb write
> * send to terminate the shader. This includes 1 slot of undefined data,
> @@ -934,9 +937,11 @@ fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data,
> struct gl_program *prog,
> const nir_shader *shader,
> unsigned dispatch_width,
> - int shader_time_index)
> + int shader_time_index,
> + const struct brw_vue_map *input_vue_map)
> : backend_shader(compiler, log_data, mem_ctx, shader, prog_data),
> key(key), gs_compile(NULL), prog_data(prog_data), prog(prog),
> + input_vue_map(input_vue_map),
> dispatch_width(dispatch_width),
> shader_time_index(shader_time_index),
> bld(fs_builder(this, dispatch_width).at_end())
> @@ -972,6 +977,9 @@ fs_visitor::init()
> case MESA_SHADER_VERTEX:
> key_tex = &((const brw_vs_prog_key *) key)->tex;
> break;
> + case MESA_SHADER_TESS_EVAL:
> + key_tex = &((const brw_tes_prog_key *) key)->tex;
> + break;
> case MESA_SHADER_GEOMETRY:
> key_tex = &((const brw_gs_prog_key *) key)->tex;
> break;
> diff --git a/src/mesa/drivers/dri/i965/brw_link.cpp b/src/mesa/drivers/dri/i965/brw_link.cpp
> index 31d29ec..f5a7d20 100644
> --- a/src/mesa/drivers/dri/i965/brw_link.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_link.cpp
> @@ -42,6 +42,7 @@ brw_shader_precompile(struct gl_context *ctx,
> struct gl_shader_program *sh_prog)
> {
> struct gl_shader *vs = sh_prog->_LinkedShaders[MESA_SHADER_VERTEX];
> + struct gl_shader *tes = sh_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL];
> struct gl_shader *gs = sh_prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
> struct gl_shader *fs = sh_prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
> struct gl_shader *cs = sh_prog->_LinkedShaders[MESA_SHADER_COMPUTE];
> @@ -52,6 +53,9 @@ brw_shader_precompile(struct gl_context *ctx,
> if (gs && !brw_gs_precompile(ctx, sh_prog, gs->Program))
> return false;
>
> + if (tes && !brw_tes_precompile(ctx, sh_prog, tes->Program))
> + return false;
> +
> if (vs && !brw_vs_precompile(ctx, sh_prog, vs->Program))
> return false;
>
> diff --git a/src/mesa/drivers/dri/i965/brw_program.h b/src/mesa/drivers/dri/i965/brw_program.h
> index 339b8e1..1cdab97 100644
> --- a/src/mesa/drivers/dri/i965/brw_program.h
> +++ b/src/mesa/drivers/dri/i965/brw_program.h
> @@ -56,6 +56,8 @@ void
> brw_dump_ir(const char *stage, struct gl_shader_program *shader_prog,
> struct gl_shader *shader, struct gl_program *prog);
>
> +void brw_upload_tes_prog(struct brw_context *brw);
> +
> #ifdef __cplusplus
> } /* extern "C" */
> #endif
> diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
> index 7a6751b..d954568 100644
> --- a/src/mesa/drivers/dri/i965/brw_shader.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
> @@ -24,6 +24,7 @@
> #include "brw_context.h"
> #include "brw_cfg.h"
> #include "brw_eu.h"
> +#include "brw_fs.h"
> #include "brw_nir.h"
> #include "glsl/glsl_parser_extras.h"
> #include "main/shaderobj.h"
> @@ -84,6 +85,7 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
>
> compiler->scalar_stage[MESA_SHADER_VERTEX] =
> devinfo->gen >= 8 && !(INTEL_DEBUG & DEBUG_VEC4VS);
> + compiler->scalar_stage[MESA_SHADER_TESS_EVAL] = true;
> compiler->scalar_stage[MESA_SHADER_GEOMETRY] =
> devinfo->gen >= 8 && env_var_as_boolean("INTEL_SCALAR_GS", false);
> compiler->scalar_stage[MESA_SHADER_FRAGMENT] = true;
> @@ -135,6 +137,8 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
> compiler->glsl_compiler_options[i].LowerBufferInterfaceBlocks = true;
> }
>
> + compiler->glsl_compiler_options[MESA_SHADER_TESS_EVAL].EmitNoIndirectInput = false;
> +
> if (compiler->scalar_stage[MESA_SHADER_GEOMETRY])
> compiler->glsl_compiler_options[MESA_SHADER_GEOMETRY].EmitNoIndirectInput = false;
>
> @@ -1289,3 +1293,93 @@ gl_clip_plane *brw_select_clip_planes(struct gl_context *ctx)
> }
> }
>
> +extern "C" const unsigned *
> +brw_compile_tes(const struct brw_compiler *compiler,
> + void *log_data,
> + void *mem_ctx,
> + const struct brw_tes_prog_key *key,
> + struct brw_tes_prog_data *prog_data,
> + const nir_shader *src_shader,
> + struct gl_shader_program *shader_prog,
> + int shader_time_index,
> + unsigned *final_assembly_size,
> + char **error_str)
There was some discussion of making a new brw_compiler.c to cover the
brw_compiler.h internal 'API'. Of course, all the brw_compile_*
functions require C++, so maybe it would need to be brw_compiler.cpp.
Thoughts?
...
Reviewed-by: Jordan Justen <jordan.l.justen at intel.com>
> +{
> + const struct brw_device_info *devinfo = compiler->devinfo;
> + struct gl_shader *shader =
> + shader_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL];
> + const bool is_scalar = compiler->scalar_stage[MESA_SHADER_TESS_EVAL];
> +
> + nir_shader *nir = nir_shader_clone(mem_ctx, src_shader);
> + nir = brw_nir_apply_sampler_key(nir, devinfo, &key->tex, is_scalar);
> + nir = brw_postprocess_nir(nir, compiler->devinfo, is_scalar);
> +
> + brw_compute_vue_map(devinfo, &prog_data->base.vue_map,
> + nir->info.outputs_written,
> + nir->info.separate_shader);
> +
> + unsigned output_size_bytes = prog_data->base.vue_map.num_slots * 4 * 4;
> +
> + assert(output_size_bytes >= 1);
> + if (output_size_bytes > GEN7_MAX_DS_URB_ENTRY_SIZE_BYTES) {
> + if (error_str)
> + *error_str = ralloc_strdup(mem_ctx, "DS outputs exceed maximum size");
> + return NULL;
> + }
> +
> + /* URB entry sizes are stored as a multiple of 64 bytes. */
> + prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
> +
> + struct brw_vue_map input_vue_map;
> + brw_compute_tess_vue_map(&input_vue_map,
> + nir->info.inputs_read & ~VARYING_BIT_PRIMITIVE_ID,
> + nir->info.patch_inputs_read);
> +
> + bool need_patch_header = nir->info.system_values_read &
> + (BITFIELD64_BIT(SYSTEM_VALUE_TESS_LEVEL_OUTER) |
> + BITFIELD64_BIT(SYSTEM_VALUE_TESS_LEVEL_INNER));
> +
> + /* The TES will pull most inputs using URB read messages.
> + *
> + * However, we push the patch header for TessLevel factors when required,
> + * as it's a tiny amount of extra data.
> + */
> + prog_data->base.urb_read_length = need_patch_header ? 1 : 0;
> +
> + if (unlikely(INTEL_DEBUG & DEBUG_TES)) {
> + fprintf(stderr, "TES Input ");
> + brw_print_vue_map(stderr, &input_vue_map);
> + fprintf(stderr, "TES Output ");
> + brw_print_vue_map(stderr, &prog_data->base.vue_map);
> + }
> +
> + if (is_scalar) {
> + fs_visitor v(compiler, log_data, mem_ctx, (void *) key,
> + &prog_data->base.base, shader->Program, nir, 8,
> + shader_time_index, &input_vue_map);
> + if (!v.run_tes()) {
> + if (error_str)
> + *error_str = ralloc_strdup(mem_ctx, v.fail_msg);
> + return NULL;
> + }
> +
> + prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8;
> +
> + fs_generator g(compiler, log_data, mem_ctx, (void *) key,
> + &prog_data->base.base, v.promoted_constants, false,
> + "TES");
> + if (unlikely(INTEL_DEBUG & DEBUG_TES)) {
> + g.enable_debug(ralloc_asprintf(mem_ctx,
> + "%s tessellation evaluation shader %s",
> + nir->info.label ? nir->info.label
> + : "unnamed",
> + nir->info.name));
> + }
> +
> + g.generate_code(v.cfg, 8);
> +
> + return g.get_assembly(final_assembly_size);
> + } else {
> + unreachable("XXX: vec4 tessellation evalation shaders not merged yet.");
> + }
> +}
> diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h
> index 8c5778f..2e73f12 100644
> --- a/src/mesa/drivers/dri/i965/brw_shader.h
> +++ b/src/mesa/drivers/dri/i965/brw_shader.h
> @@ -273,6 +273,9 @@ brw_assign_common_binding_table_offsets(gl_shader_stage stage,
> bool brw_vs_precompile(struct gl_context *ctx,
> struct gl_shader_program *shader_prog,
> struct gl_program *prog);
> +bool brw_tes_precompile(struct gl_context *ctx,
> + struct gl_shader_program *shader_prog,
> + struct gl_program *prog);
> bool brw_gs_precompile(struct gl_context *ctx,
> struct gl_shader_program *shader_prog,
> struct gl_program *prog);
> diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
> index cf3cf97..c657b25 100644
> --- a/src/mesa/drivers/dri/i965/brw_state_upload.c
> +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
> @@ -678,6 +678,7 @@ brw_upload_programs(struct brw_context *brw,
> {
> if (pipeline == BRW_RENDER_PIPELINE) {
> brw_upload_vs_prog(brw);
> + brw_upload_tes_prog(brw);
>
> if (brw->gen < 6)
> brw_upload_ff_gs_prog(brw);
> @@ -691,6 +692,8 @@ brw_upload_programs(struct brw_context *brw,
> bool old_separate = brw->vue_map_geom_out.separate;
> if (brw->geometry_program)
> brw->vue_map_geom_out = brw->gs.prog_data->base.vue_map;
> + else if (brw->tess_eval_program)
> + brw->vue_map_geom_out = brw->tes.prog_data->base.vue_map;
> else
> brw->vue_map_geom_out = brw->vs.prog_data->base.vue_map;
>
> diff --git a/src/mesa/drivers/dri/i965/brw_tes.c b/src/mesa/drivers/dri/i965/brw_tes.c
> new file mode 100644
> index 0000000..daa8f86
> --- /dev/null
> +++ b/src/mesa/drivers/dri/i965/brw_tes.c
> @@ -0,0 +1,300 @@
> +/*
> + * Copyright © 2013 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> + * DEALINGS IN THE SOFTWARE.
> + */
> +
> +/**
> + * \file brw_tes.c
> + *
> + * Tessellation evaluation shader state upload code.
> + */
> +
> +#include "brw_context.h"
> +#include "brw_nir.h"
> +#include "brw_program.h"
> +#include "brw_shader.h"
> +#include "brw_state.h"
> +#include "program/prog_parameter.h"
> +
> +static void
> +brw_tes_debug_recompile(struct brw_context *brw,
> + struct gl_shader_program *shader_prog,
> + const struct brw_tes_prog_key *key)
> +{
> + struct brw_cache_item *c = NULL;
> + const struct brw_tes_prog_key *old_key = NULL;
> + bool found = false;
> +
> + perf_debug("Recompiling tessellation evaluation shader for program %d\n",
> + shader_prog->Name);
> +
> + for (unsigned int i = 0; i < brw->cache.size; i++) {
> + for (c = brw->cache.items[i]; c; c = c->next) {
> + if (c->cache_id == BRW_CACHE_TES_PROG) {
> + old_key = c->key;
> +
> + if (old_key->program_string_id == key->program_string_id)
> + break;
> + }
> + }
> + if (c)
> + break;
> + }
> +
> + if (!c) {
> + perf_debug(" Didn't find previous compile in the shader cache for "
> + "debug\n");
> + return;
> + }
> +
> + found |= brw_debug_recompile_sampler_key(brw, &old_key->tex, &key->tex);
> +
> + if (!found) {
> + perf_debug(" Something else\n");
> + }
> +}
> +
> +static bool
> +brw_codegen_tes_prog(struct brw_context *brw,
> + struct gl_shader_program *shader_prog,
> + struct brw_tess_eval_program *tep,
> + struct brw_tes_prog_key *key)
> +{
> + const struct brw_compiler *compiler = brw->intelScreen->compiler;
> + const struct brw_device_info *devinfo = brw->intelScreen->devinfo;
> + struct brw_stage_state *stage_state = &brw->tes.base;
> + nir_shader *nir = tep->program.Base.nir;
> + struct brw_tes_prog_data prog_data;
> + bool start_busy = false;
> + double start_time = 0;
> +
> + memset(&prog_data, 0, sizeof(prog_data));
> +
> + brw_assign_common_binding_table_offsets(MESA_SHADER_TESS_EVAL, devinfo,
> + shader_prog, &tep->program.Base,
> + &prog_data.base.base, 0);
> +
> + switch (tep->program.Spacing) {
> + case GL_EQUAL:
> + prog_data.partitioning = BRW_TESS_PARTITIONING_INTEGER;
> + break;
> + case GL_FRACTIONAL_ODD:
> + prog_data.partitioning = BRW_TESS_PARTITIONING_ODD_FRACTIONAL;
> + break;
> + case GL_FRACTIONAL_EVEN:
> + prog_data.partitioning = BRW_TESS_PARTITIONING_EVEN_FRACTIONAL;
> + break;
> + default:
> + unreachable("invalid domain shader spacing");
> + }
> +
> + switch (tep->program.PrimitiveMode) {
> + case GL_QUADS:
> + prog_data.domain = BRW_TESS_DOMAIN_QUAD;
> + break;
> + case GL_TRIANGLES:
> + prog_data.domain = BRW_TESS_DOMAIN_TRI;
> + break;
> + case GL_ISOLINES:
> + prog_data.domain = BRW_TESS_DOMAIN_ISOLINE;
> + break;
> + default:
> + unreachable("invalid domain shader primitive mode");
> + }
> +
> + if (tep->program.PointMode) {
> + prog_data.output_topology = BRW_TESS_OUTPUT_TOPOLOGY_POINT;
> + } else if (tep->program.PrimitiveMode == GL_ISOLINES) {
> + prog_data.output_topology = BRW_TESS_OUTPUT_TOPOLOGY_LINE;
> + } else {
> + /* Hardware winding order is backwards from OpenGL */
> + switch (tep->program.VertexOrder) {
> + case GL_CCW:
> + prog_data.output_topology = BRW_TESS_OUTPUT_TOPOLOGY_TRI_CW;
> + break;
> + case GL_CW:
> + prog_data.output_topology = BRW_TESS_OUTPUT_TOPOLOGY_TRI_CCW;
> + break;
> + default:
> + unreachable("invalid domain shader vertex order");
> + }
> + }
> +
> + /* Allocate the references to the uniforms that will end up in the
> + * prog_data associated with the compiled program, and which will be freed
> + * by the state cache.
> + *
> + * Note: param_count needs to be num_uniform_components * 4, since we add
> + * padding around uniform values below vec4 size, so the worst case is that
> + * every uniform is a float which gets padded to the size of a vec4.
> + */
> + struct gl_shader *tes = shader_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL];
> + int param_count = nir->num_uniforms;
> + if (!compiler->scalar_stage[MESA_SHADER_TESS_EVAL])
> + param_count *= 4;
> +
> + prog_data.base.base.param =
> + rzalloc_array(NULL, const gl_constant_value *, param_count);
> + prog_data.base.base.pull_param =
> + rzalloc_array(NULL, const gl_constant_value *, param_count);
> + prog_data.base.base.image_param =
> + rzalloc_array(NULL, struct brw_image_param, tes->NumImages);
> + prog_data.base.base.nr_params = param_count;
> + prog_data.base.base.nr_image_params = tes->NumImages;
> +
> + brw_nir_setup_glsl_uniforms(nir, shader_prog, &tep->program.Base,
> + &prog_data.base.base,
> + compiler->scalar_stage[MESA_SHADER_TESS_EVAL]);
> +
> + if (unlikely(INTEL_DEBUG & DEBUG_TES))
> + brw_dump_ir("tessellation evaluation", shader_prog, tes, NULL);
> +
> + int st_index = -1;
> + if (unlikely(INTEL_DEBUG & DEBUG_SHADER_TIME))
> + st_index = brw_get_shader_time_index(brw, shader_prog, NULL, ST_TES);
> +
> + if (unlikely(brw->perf_debug)) {
> + start_busy = brw->batch.last_bo && drm_intel_bo_busy(brw->batch.last_bo);
> + start_time = get_time();
> + }
> +
> + void *mem_ctx = ralloc_context(NULL);
> + unsigned program_size;
> + char *error_str;
> + const unsigned *program =
> + brw_compile_tes(compiler, brw, mem_ctx, key, &prog_data, nir,
> + shader_prog, st_index, &program_size, &error_str);
> + if (program == NULL) {
> + if (shader_prog) {
> + shader_prog->LinkStatus = false;
> + ralloc_strcat(&shader_prog->InfoLog, error_str);
> + }
> +
> + _mesa_problem(NULL, "Failed to compile tessellation evaluation shader: "
> + "%s\n", error_str);
> +
> + ralloc_free(mem_ctx);
> + return false;
> + }
> +
> + if (unlikely(brw->perf_debug)) {
> + struct brw_shader *btes = (struct brw_shader *) tes;
> + if (btes->compiled_once) {
> + brw_tes_debug_recompile(brw, shader_prog, key);
> + }
> + if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) {
> + perf_debug("TES compile took %.03f ms and stalled the GPU\n",
> + (get_time() - start_time) * 1000);
> + }
> + btes->compiled_once = true;
> + }
> +
> + /* Scratch space is used for register spilling */
> + if (prog_data.base.base.total_scratch) {
> + brw_get_scratch_bo(brw, &stage_state->scratch_bo,
> + prog_data.base.base.total_scratch *
> + brw->max_ds_threads);
> + }
> +
> + brw_upload_cache(&brw->cache, BRW_CACHE_TES_PROG,
> + key, sizeof(*key),
> + program, program_size,
> + &prog_data, sizeof(prog_data),
> + &stage_state->prog_offset, &brw->tes.prog_data);
> + ralloc_free(mem_ctx);
> +
> + return true;
> +}
> +
> +
> +void
> +brw_upload_tes_prog(struct brw_context *brw)
> +{
> + struct gl_context *ctx = &brw->ctx;
> + struct gl_shader_program **current = ctx->_Shader->CurrentProgram;
> + struct brw_stage_state *stage_state = &brw->tes.base;
> + struct brw_tes_prog_key key;
> + /* BRW_NEW_TESS_EVAL_PROGRAM */
> + struct brw_tess_eval_program *tep =
> + (struct brw_tess_eval_program *) brw->tess_eval_program;
> +
> + if (!brw_state_dirty(brw,
> + _NEW_TEXTURE,
> + BRW_NEW_TESS_EVAL_PROGRAM))
> + return;
> +
> + if (tep == NULL) {
> + /* Other state atoms had better not try to access prog_data, since
> + * there's no TES program.
> + */
> + brw->tes.prog_data = NULL;
> + brw->tes.base.prog_data = NULL;
> + return;
> + }
> +
> + struct gl_program *prog = &tep->program.Base;
> +
> + memset(&key, 0, sizeof(key));
> +
> + key.program_string_id = tep->id;
> +
> + /* _NEW_TEXTURE */
> + brw_populate_sampler_prog_key_data(ctx, prog, stage_state->sampler_count,
> + &key.tex);
> +
> + if (!brw_search_cache(&brw->cache, BRW_CACHE_TES_PROG,
> + &key, sizeof(key),
> + &stage_state->prog_offset, &brw->tes.prog_data)) {
> + bool success = brw_codegen_tes_prog(brw, current[MESA_SHADER_TESS_EVAL],
> + tep, &key);
> + assert(success);
> + (void)success;
> + }
> + brw->tes.base.prog_data = &brw->tes.prog_data->base.base;
> +}
> +
> +
> +bool
> +brw_tes_precompile(struct gl_context *ctx,
> + struct gl_shader_program *shader_prog,
> + struct gl_program *prog)
> +{
> + struct brw_context *brw = brw_context(ctx);
> + struct brw_tes_prog_key key;
> + uint32_t old_prog_offset = brw->tes.base.prog_offset;
> + struct brw_tes_prog_data *old_prog_data = brw->tes.prog_data;
> + bool success;
> +
> + struct gl_tess_eval_program *tep = (struct gl_tess_eval_program *)prog;
> + struct brw_tess_eval_program *btep = brw_tess_eval_program(tep);
> +
> + memset(&key, 0, sizeof(key));
> +
> + key.program_string_id = btep->id;
> + brw_setup_tex_for_precompile(brw, &key.tex, prog);
> +
> + success = brw_codegen_tes_prog(brw, shader_prog, btep, &key);
> +
> + brw->tes.base.prog_offset = old_prog_offset;
> + brw->tes.prog_data = old_prog_data;
> +
> + return success;
> +}
> --
> 2.6.3
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list