[Mesa-dev] [PATCH 8/8] i965: Rework gl_TessLevel*[] handling to use NIR compact arrays.
Jason Ekstrand
jason at jlekstrand.net
Wed Jan 4 21:35:55 UTC 2017
On Wed, Jan 4, 2017 at 3:07 AM, Kenneth Graunke <kenneth at whitecape.org>
wrote:
> Treating everything as scalar arrays allows us to drop a bunch of
> special case input/output munging all throughout the backend.
> Instead, we just need to remap the TessLevel components to the
> appropriate patch URB header locations in remap_patch_urb_offsets().
>
> We also switch to treating the TES input versions of these as ordinary
> shader inputs rather than system values, as remap_patch_urb_offsets()
> just makes everything work out without special handling.
>
> This regresses one Piglit test:
> arb_tessellation_shader-large-uniforms/GL_TESS_CONTROL_
> SHADER-array-at-limit
>
> The compiler starts promoting the constant arrays assigned to gl_TessLevel*
> to uniform arrays. Since the shader also has a uniform array that uses
> the maximum number of uniform components, this puts it over the uniform
> component limit enforced by the linker. This is arguably a bug in the
> constant array promotion code (it should avoid pushing us over limits),
> but is unlikely to penalize any real application.
>
> Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
> ---
> src/mesa/drivers/dri/i965/brw_context.c | 2 +-
> src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 164
> +--------------------
> src/mesa/drivers/dri/i965/brw_nir.c | 74 +++++++++-
> src/mesa/drivers/dri/i965/brw_nir.h | 3 +-
> .../drivers/dri/i965/brw_nir_tcs_workarounds.c | 8 +-
> src/mesa/drivers/dri/i965/brw_shader.cpp | 61 +-------
> src/mesa/drivers/dri/i965/brw_shader.h | 5 -
> src/mesa/drivers/dri/i965/brw_tcs.c | 5 +-
> src/mesa/drivers/dri/i965/brw_tes.c | 17 +--
> src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp | 117 +--------------
> 10 files changed, 92 insertions(+), 364 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_context.c
> b/src/mesa/drivers/dri/i965/brw_context.c
> index 45490a0f5cf..22f872fe782 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.c
> +++ b/src/mesa/drivers/dri/i965/brw_context.c
> @@ -672,7 +672,7 @@ brw_initialize_context_constants(struct brw_context
> *brw)
> if (brw->gen >= 5 || brw->is_g4x)
> ctx->Const.MaxClipPlanes = 8;
>
> - ctx->Const.LowerTessLevel = true;
> + ctx->Const.GLSLTessLevelsAsInputs = true;
> ctx->Const.LowerTCSPatchVerticesIn = brw->gen >= 8;
> ctx->Const.LowerTESPatchVerticesIn = true;
> ctx->Const.PrimitiveRestartForPatches = true;
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> index 2ed843bd03d..8f745dff440 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> @@ -2520,78 +2520,7 @@ fs_visitor::nir_emit_tcs_intrinsic(const
> fs_builder &bld,
> bld.MOV(patch_handle,
> retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UD));
>
> - if (imm_offset == 0) {
> - /* This is a read of gl_TessLevelInner[], which lives in the
> - * Patch URB header. The layout depends on the domain.
> - */
> - dst.type = BRW_REGISTER_TYPE_F;
> - switch (tcs_key->tes_primitive_mode) {
> - case GL_QUADS: {
> - /* DWords 3-2 (reversed) */
> - fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F, 4);
> -
> - inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, tmp,
> patch_handle);
> - inst->offset = 0;
> - inst->mlen = 1;
> - inst->size_written = 4 * REG_SIZE;
> -
> - /* dst.xy = tmp.wz */
> - bld.MOV(dst, offset(tmp, bld, 3));
> - bld.MOV(offset(dst, bld, 1), offset(tmp, bld, 2));
> - break;
> - }
> - case GL_TRIANGLES:
> - /* DWord 4; hardcode offset = 1 and size_written =
> REG_SIZE */
> - inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dst,
> patch_handle);
> - inst->offset = 1;
> - inst->mlen = 1;
> - inst->size_written = REG_SIZE;
> - break;
> - case GL_ISOLINES:
> - /* All channels are undefined. */
> - break;
> - default:
> - unreachable("Bogus tessellation domain");
> - }
> - } else if (imm_offset == 1) {
> - /* This is a read of gl_TessLevelOuter[], which lives in the
> - * Patch URB header. The layout depends on the domain.
> - */
> - dst.type = BRW_REGISTER_TYPE_F;
> -
> - fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F, 4);
> - inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, tmp,
> patch_handle);
> - inst->offset = 1;
> - inst->mlen = 1;
> - inst->size_written = 4 * REG_SIZE;
> -
> - /* Reswizzle: WZYX */
> - fs_reg srcs[4] = {
> - offset(tmp, bld, 3),
> - offset(tmp, bld, 2),
> - offset(tmp, bld, 1),
> - offset(tmp, bld, 0),
> - };
> -
> - unsigned num_components;
> - switch (tcs_key->tes_primitive_mode) {
> - case GL_QUADS:
> - num_components = 4;
> - break;
> - case GL_TRIANGLES:
> - num_components = 3;
> - break;
> - case GL_ISOLINES:
> - /* Isolines are not reversed; swizzle .zw -> .xy */
> - srcs[0] = offset(tmp, bld, 2);
> - srcs[1] = offset(tmp, bld, 3);
> - num_components = 2;
> - break;
> - default:
> - unreachable("Bogus tessellation domain");
> - }
> - bld.LOAD_PAYLOAD(dst, srcs, num_components, 0);
> - } else {
> + {
> if (first_component != 0) {
> unsigned read_components =
> instr->num_components + first_component;
> @@ -2656,55 +2585,6 @@ fs_visitor::nir_emit_tcs_intrinsic(const
> fs_builder &bld,
>
> if (indirect_offset.file != BAD_FILE) {
> srcs[header_regs++] = indirect_offset;
> - } else if (!is_passthrough_shader) {
> - if (imm_offset == 0) {
> - value.type = BRW_REGISTER_TYPE_F;
> -
> - mask &= (1 << tesslevel_inner_components(tcs_key->tes_primitive_mode))
> - 1;
> -
> - /* This is a write to gl_TessLevelInner[], which lives in the
> - * Patch URB header. The layout depends on the domain.
> - */
> - switch (tcs_key->tes_primitive_mode) {
> - case GL_QUADS:
> - /* gl_TessLevelInner[].xy lives at DWords 3-2 (reversed).
> - * We use an XXYX swizzle to reverse put .xy in the .wz
> - * channels, and use a .zw writemask.
> - */
> - mask = writemask_for_backwards_vector(mask);
> - swiz = BRW_SWIZZLE4(0, 0, 1, 0);
> - break;
> - case GL_TRIANGLES:
> - /* gl_TessLevelInner[].x lives at DWord 4, so we set the
> - * writemask to X and bump the URB offset by 1.
> - */
> - imm_offset = 1;
> - break;
> - case GL_ISOLINES:
> - /* Skip; gl_TessLevelInner[] doesn't exist for isolines. */
> - return;
> - default:
> - unreachable("Bogus tessellation domain");
> - }
> - } else if (imm_offset == 1) {
> - /* This is a write to gl_TessLevelOuter[] which lives in the
> - * Patch URB Header at DWords 4-7. However, it's reversed, so
> - * instead of .xyzw we have .wzyx.
> - */
> - value.type = BRW_REGISTER_TYPE_F;
> -
> - mask &= (1 << tesslevel_outer_components(tcs_key->tes_primitive_mode))
> - 1;
> -
> - if (tcs_key->tes_primitive_mode == GL_ISOLINES) {
> - /* Isolines .xy should be stored in .zw, in order. */
> - swiz = BRW_SWIZZLE4(0, 0, 0, 1);
> - mask <<= 2;
> - } else {
> - /* Other domains are reversed; store .wzyx instead of
> .xyzw */
> - swiz = BRW_SWIZZLE_WZYX;
> - mask = writemask_for_backwards_vector(mask);
> - }
> - }
> }
>
> if (mask == 0)
> @@ -2851,48 +2731,6 @@ fs_visitor::nir_emit_tes_intrinsic(const
> fs_builder &bld,
> }
> break;
>
> - case nir_intrinsic_load_tess_level_outer:
> - /* When the TES reads gl_TessLevelOuter, we ensure that the patch
> header
> - * appears as a push-model input. So, we can simply use the ATTR
> file
> - * rather than issuing URB read messages. The data is stored in the
> - * high DWords in reverse order - DWord 7 contains .x, DWord 6
> contains
> - * .y, and so on.
> - */
> - switch (tes_prog_data->domain) {
> - case BRW_TESS_DOMAIN_QUAD:
> - for (unsigned i = 0; i < 4; i++)
> - bld.MOV(offset(dest, bld, i), component(fs_reg(ATTR, 0), 7 -
> i));
> - break;
> - case BRW_TESS_DOMAIN_TRI:
> - for (unsigned i = 0; i < 3; i++)
> - bld.MOV(offset(dest, bld, i), component(fs_reg(ATTR, 0), 7 -
> i));
> - break;
> - case BRW_TESS_DOMAIN_ISOLINE:
> - for (unsigned i = 0; i < 2; i++)
> - bld.MOV(offset(dest, bld, i), component(fs_reg(ATTR, 0), 6 +
> i));
> - break;
> - }
> - break;
> -
> - case nir_intrinsic_load_tess_level_inner:
> - /* When the TES reads gl_TessLevelInner, we ensure that the patch
> header
> - * appears as a push-model input. So, we can simply use the ATTR
> file
> - * rather than issuing URB read messages.
> - */
> - switch (tes_prog_data->domain) {
> - case BRW_TESS_DOMAIN_QUAD:
> - bld.MOV(dest, component(fs_reg(ATTR, 0), 3));
> - bld.MOV(offset(dest, bld, 1), component(fs_reg(ATTR, 0), 2));
> - break;
> - case BRW_TESS_DOMAIN_TRI:
> - bld.MOV(dest, component(fs_reg(ATTR, 0), 4));
> - break;
> - case BRW_TESS_DOMAIN_ISOLINE:
> - /* ignore - value is undefined */
> - break;
> - }
> - break;
> -
> case nir_intrinsic_load_input:
> case nir_intrinsic_load_per_vertex_input: {
> fs_reg indirect_offset = get_indirect_offset(instr);
> diff --git a/src/mesa/drivers/dri/i965/brw_nir.c
> b/src/mesa/drivers/dri/i965/brw_nir.c
> index 6f37e97a86f..46eeb1723b4 100644
> --- a/src/mesa/drivers/dri/i965/brw_nir.c
> +++ b/src/mesa/drivers/dri/i965/brw_nir.c
> @@ -141,9 +141,68 @@ remap_inputs_with_vue_map(nir_block *block, const
> struct brw_vue_map *vue_map)
> }
>
> static bool
> +remap_tess_levels(nir_builder *b, nir_intrinsic_instr *intr,
> + GLenum primitive_mode)
> +{
> + const int location = nir_intrinsic_base(intr);
> + const unsigned component = nir_intrinsic_component(intr);
> + bool out_of_bounds;
> +
> + if (location == VARYING_SLOT_TESS_LEVEL_INNER) {
> + switch (primitive_mode) {
> + case GL_QUADS:
> + /* gl_TessLevelInner[0..1] lives at DWords 3-2 (reversed). */
> + nir_intrinsic_set_base(intr, 0);
> + nir_intrinsic_set_component(intr, 3 - component);
> + out_of_bounds = false;
>
What if component > 1? I guess that's not really a problem but it is
out-of-bounds...
> + break;
> + case GL_TRIANGLES:
> + /* gl_TessLevelInner[0] lives at DWord 4. */
> + nir_intrinsic_set_base(intr, 1);
> + out_of_bounds = component > 0;
> + break;
> + case GL_ISOLINES:
> + out_of_bounds = true;
> + break;
> + default:
> + unreachable("Bogus tessellation domain");
> + }
> + } else if (location == VARYING_SLOT_TESS_LEVEL_OUTER) {
> + if (primitive_mode == GL_ISOLINES) {
> + /* gl_TessLevelOuter[0..1] lives at DWords 6-7 (in order). */
> + nir_intrinsic_set_base(intr, 1);
> + nir_intrinsic_set_component(intr, 2 +
> nir_intrinsic_component(intr));
> + out_of_bounds = component > 1;
> + } else {
> + /* Triangles use DWords 7-5 (reversed); Quads use 7-4 (reversed)
> */
> + nir_intrinsic_set_base(intr, 1);
> + nir_intrinsic_set_component(intr, 3 -
> nir_intrinsic_component(intr));
> + out_of_bounds = component == 3 && primitive_mode == GL_TRIANGLES;
>
+ }
> + } else {
> + return false;
> + }
> +
> + if (out_of_bounds) {
> + if (nir_intrinsic_infos[intr->intrinsic].has_dest) {
> + b->cursor = nir_before_instr(&intr->instr);
> + nir_ssa_def *undef = nir_ssa_undef(b, 1, 32);
> + nir_ssa_def_rewrite_uses(&intr->dest.ssa,
> nir_src_for_ssa(undef));
> + }
> + nir_instr_remove(&intr->instr);
> + }
> +
> + return true;
> +}
> +
> +static bool
> remap_patch_urb_offsets(nir_block *block, nir_builder *b,
> - const struct brw_vue_map *vue_map)
> + const struct brw_vue_map *vue_map,
> + GLenum tes_primitive_mode)
> {
> + const bool is_passthrough_tcs = b->shader->info->name &&
> + strcmp(b->shader->info->name, "passthrough") == 0;
>
This is gross...
Also... Why? What's so special about the passthrough that it doesn't need
tess level remaps? I have a feeling there's some more general thing we
could be doing here.
> +
> nir_foreach_instr_safe(instr, block) {
> if (instr->type != nir_instr_type_intrinsic)
> continue;
> @@ -154,6 +213,11 @@ remap_patch_urb_offsets(nir_block *block,
> nir_builder *b,
>
> if ((stage == MESA_SHADER_TESS_CTRL && is_output(intrin)) ||
> (stage == MESA_SHADER_TESS_EVAL && is_input(intrin))) {
> +
> + if (!is_passthrough_tcs &&
> + remap_tess_levels(b, intrin, tes_primitive_mode))
> + continue;
>
Let's make sure I've got this right...
We map everything from the varying identifiers to VUE slots. For the case
of tesslevel, they will be assigned VUE slots 0 and 1 so the code below
will hever cause any other output to alias. Then we rework stuff in
remap_tess_levels so that they map to the right locaion in 8-dword chunk at
the begining of the VUE.
This function (remap_patch_urb_offsets) could really use a comment at the
top saying what it's doing.
> +
> int vue_slot = vue_map->varying_to_slot[intrin->const_index[0]];
> assert(vue_slot != -1);
> intrin->const_index[0] = vue_slot;
> @@ -273,7 +337,8 @@ brw_nir_lower_tes_inputs(nir_shader *nir, const
> struct brw_vue_map *vue_map)
> nir_builder b;
> nir_builder_init(&b, function->impl);
> nir_foreach_block(block, function->impl) {
> - remap_patch_urb_offsets(block, &b, vue_map);
> + remap_patch_urb_offsets(block, &b, vue_map,
> + nir->info->tes.primitive_mode);
> }
> }
> }
> @@ -341,7 +406,8 @@ brw_nir_lower_vue_outputs(nir_shader *nir,
> }
>
> void
> -brw_nir_lower_tcs_outputs(nir_shader *nir, const struct brw_vue_map
> *vue_map)
> +brw_nir_lower_tcs_outputs(nir_shader *nir, const struct brw_vue_map
> *vue_map,
> + GLenum tes_primitive_mode)
> {
> nir_foreach_variable(var, &nir->outputs) {
> var->data.driver_location = var->data.location;
> @@ -359,7 +425,7 @@ brw_nir_lower_tcs_outputs(nir_shader *nir, const
> struct brw_vue_map *vue_map)
> nir_builder b;
> nir_builder_init(&b, function->impl);
> nir_foreach_block(block, function->impl) {
> - remap_patch_urb_offsets(block, &b, vue_map);
> + remap_patch_urb_offsets(block, &b, vue_map,
> tes_primitive_mode);
> }
> }
> }
> diff --git a/src/mesa/drivers/dri/i965/brw_nir.h
> b/src/mesa/drivers/dri/i965/brw_nir.h
> index 8cfb6c1be68..c6ef437d4f9 100644
> --- a/src/mesa/drivers/dri/i965/brw_nir.h
> +++ b/src/mesa/drivers/dri/i965/brw_nir.h
> @@ -109,7 +109,8 @@ void brw_nir_lower_fs_inputs(nir_shader *nir, struct
> brw_vue_map *vue_map,
> const struct gen_device_info *devinfo,
> const struct brw_wm_prog_key *key);
> void brw_nir_lower_vue_outputs(nir_shader *nir, bool is_scalar);
> -void brw_nir_lower_tcs_outputs(nir_shader *nir, const struct brw_vue_map
> *vue);
> +void brw_nir_lower_tcs_outputs(nir_shader *nir, const struct brw_vue_map
> *vue,
> + GLenum tes_primitive_mode);
> void brw_nir_lower_fs_outputs(nir_shader *nir);
> void brw_nir_lower_cs_shared(nir_shader *nir);
>
> diff --git a/src/mesa/drivers/dri/i965/brw_nir_tcs_workarounds.c
> b/src/mesa/drivers/dri/i965/brw_nir_tcs_workarounds.c
> index caf5c393a5e..e130c8c9c1a 100644
> --- a/src/mesa/drivers/dri/i965/brw_nir_tcs_workarounds.c
> +++ b/src/mesa/drivers/dri/i965/brw_nir_tcs_workarounds.c
> @@ -73,7 +73,7 @@
> */
>
> static inline nir_ssa_def *
> -load_output(nir_builder *b, int num_components, int offset)
> +load_output(nir_builder *b, int num_components, int offset, int component)
> {
> nir_intrinsic_instr *load =
> nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_output);
> @@ -81,6 +81,7 @@ load_output(nir_builder *b, int num_components, int
> offset)
> load->num_components = num_components;
> load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
> nir_intrinsic_set_base(load, offset);
> + nir_intrinsic_set_component(load, component);
>
> nir_builder_instr_insert(b, &load->instr);
>
> @@ -92,8 +93,8 @@ emit_quads_workaround(nir_builder *b, nir_block *block)
> {
> b->cursor = nir_after_block_before_jump(block);
>
> - nir_ssa_def *inner = load_output(b, 2, 0);
> - nir_ssa_def *outer = load_output(b, 4, 1);
> + nir_ssa_def *inner = load_output(b, 2, 0, 2);
> + nir_ssa_def *outer = load_output(b, 4, 1, 0);
>
> nir_ssa_def *any_greater_than_1 =
> nir_ior(b, nir_bany(b, nir_flt(b, nir_imm_float(b, 1.0f), outer)),
> @@ -113,6 +114,7 @@ emit_quads_workaround(nir_builder *b, nir_block
> *block)
> nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_output);
> store->num_components = 2;
> nir_intrinsic_set_write_mask(store, WRITEMASK_XY);
> + nir_intrinsic_set_component(store, 2);
> store->src[0] = nir_src_for_ssa(inner);
> store->src[1] = nir_src_for_ssa(nir_imm_int(b, 0));
> nir_builder_instr_insert(b, &store->instr);
> diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp
> b/src/mesa/drivers/dri/i965/brw_shader.cpp
> index dfc7407ea5b..7dbe3a502ec 100644
> --- a/src/mesa/drivers/dri/i965/brw_shader.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
> @@ -648,53 +648,6 @@ get_atomic_counter_op(nir_intrinsic_op op)
> }
> }
>
> -unsigned
> -tesslevel_outer_components(GLenum tes_primitive_mode)
> -{
> - switch (tes_primitive_mode) {
> - case GL_QUADS:
> - return 4;
> - case GL_TRIANGLES:
> - return 3;
> - case GL_ISOLINES:
> - return 2;
> - default:
> - unreachable("Bogus tessellation domain");
> - }
> - return 0;
> -}
> -
> -unsigned
> -tesslevel_inner_components(GLenum tes_primitive_mode)
> -{
> - switch (tes_primitive_mode) {
> - case GL_QUADS:
> - return 2;
> - case GL_TRIANGLES:
> - return 1;
> - case GL_ISOLINES:
> - return 0;
> - default:
> - unreachable("Bogus tessellation domain");
> - }
> - return 0;
> -}
> -
> -/**
> - * Given a normal .xyzw writemask, convert it to a writemask for a vector
> - * that's stored backwards, i.e. .wzyx.
> - */
> -unsigned
> -writemask_for_backwards_vector(unsigned mask)
> -{
> - unsigned new_mask = 0;
> -
> - for (int i = 0; i < 4; i++)
> - new_mask |= ((mask >> i) & 1) << (3 - i);
> -
> - return new_mask;
> -}
> -
> backend_shader::backend_shader(const struct brw_compiler *compiler,
> void *log_data,
> void *mem_ctx,
> @@ -712,8 +665,6 @@ backend_shader::backend_shader(const struct
> brw_compiler *compiler,
> debug_enabled = INTEL_DEBUG & intel_debug_flag_for_shader_
> stage(stage);
> stage_name = _mesa_shader_stage_to_string(stage);
> stage_abbrev = _mesa_shader_stage_to_abbrev(stage);
> - is_passthrough_shader =
> - nir->info->name && strcmp(nir->info->name, "passthrough") == 0;
> }
>
> bool
> @@ -1399,17 +1350,7 @@ brw_compile_tes(const struct brw_compiler *compiler,
>
> /* URB entry sizes are stored as a multiple of 64 bytes. */
> prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
> -
> - bool need_patch_header = nir->info->system_values_read &
> - (BITFIELD64_BIT(SYSTEM_VALUE_TESS_LEVEL_OUTER) |
> - BITFIELD64_BIT(SYSTEM_VALUE_TESS_LEVEL_INNER));
> -
> - /* The TES will pull most inputs using URB read messages.
> - *
> - * However, we push the patch header for TessLevel factors when
> required,
> - * as it's a tiny amount of extra data.
> - */
> - prog_data->base.urb_read_length = need_patch_header ? 1 : 0;
> + prog_data->base.urb_read_length = 0;
>
> if (unlikely(INTEL_DEBUG & DEBUG_TES)) {
> fprintf(stderr, "TES Input ");
> diff --git a/src/mesa/drivers/dri/i965/brw_shader.h
> b/src/mesa/drivers/dri/i965/brw_shader.h
> index 6b5ee3719a9..13f271db8c1 100644
> --- a/src/mesa/drivers/dri/i965/brw_shader.h
> +++ b/src/mesa/drivers/dri/i965/brw_shader.h
> @@ -217,7 +217,6 @@ public:
> bool debug_enabled;
> const char *stage_name;
> const char *stage_abbrev;
> - bool is_passthrough_shader;
>
> brw::simple_allocator alloc;
>
> @@ -301,10 +300,6 @@ bool brw_cs_precompile(struct gl_context *ctx,
> GLboolean brw_link_shader(struct gl_context *ctx, struct
> gl_shader_program *prog);
> struct gl_linked_shader *brw_new_shader(gl_shader_stage stage);
>
> -unsigned tesslevel_outer_components(GLenum tes_primitive_mode);
> -unsigned tesslevel_inner_components(GLenum tes_primitive_mode);
> -unsigned writemask_for_backwards_vector(unsigned mask);
> -
> unsigned get_atomic_counter_op(nir_intrinsic_op op);
>
> #ifdef __cplusplus
> diff --git a/src/mesa/drivers/dri/i965/brw_tcs.c
> b/src/mesa/drivers/dri/i965/brw_tcs.c
> index f890ccf0296..567ae792dcd 100644
> --- a/src/mesa/drivers/dri/i965/brw_tcs.c
> +++ b/src/mesa/drivers/dri/i965/brw_tcs.c
> @@ -51,7 +51,8 @@ create_passthrough_tcs(void *mem_ctx, const struct
> brw_compiler *compiler,
> nir_ssa_def *invoc_id =
> nir_load_system_value(&b, nir_intrinsic_load_invocation_id, 0);
>
> - nir->info->inputs_read = key->outputs_written;
> + nir->info->inputs_read = key->outputs_written &
> + ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
> nir->info->outputs_written = key->outputs_written;
> nir->info->tcs.vertices_out = key->input_vertices;
> nir->info->name = ralloc_strdup(nir, "passthrough");
> @@ -81,7 +82,7 @@ create_passthrough_tcs(void *mem_ctx, const struct
> brw_compiler *compiler,
> }
>
> /* Copy inputs to outputs. */
> - uint64_t varyings = key->outputs_written;
> + uint64_t varyings = nir->info->inputs_read;
>
> while (varyings != 0) {
> const int varying = ffsll(varyings) - 1;
> diff --git a/src/mesa/drivers/dri/i965/brw_tes.c
> b/src/mesa/drivers/dri/i965/brw_tes.c
> index 20313660734..56d75c28447 100644
> --- a/src/mesa/drivers/dri/i965/brw_tes.c
> +++ b/src/mesa/drivers/dri/i965/brw_tes.c
> @@ -239,16 +239,12 @@ brw_tes_populate_key(struct brw_context *brw,
> */
> if (tcp) {
> struct gl_program *tcp_prog = &tcp->program;
> - per_vertex_slots |= tcp_prog->info.outputs_written;
> + per_vertex_slots |= tcp_prog->info.outputs_written &
> + ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
> per_patch_slots |= tcp_prog->info.patch_outputs_written;
> }
>
> - /* Ignore gl_TessLevelInner/Outer - we treat them as system values,
> - * not inputs, and they're always present in the URB entry regardless
> - * of whether or not we read them.
> - */
> - key->inputs_read = per_vertex_slots &
> - ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
> + key->inputs_read = per_vertex_slots;
> key->patch_inputs_read = per_patch_slots;
>
> /* _NEW_TEXTURE */
> @@ -305,14 +301,11 @@ brw_tes_precompile(struct gl_context *ctx,
> if (shader_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL]) {
> struct gl_program *tcp =
> shader_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL]->Program;
> - key.inputs_read |= tcp->nir->info->outputs_written;
> + key.inputs_read |= tcp->nir->info->outputs_written &
> + ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
> key.patch_inputs_read |= tcp->nir->info->patch_outputs_written;
> }
>
> - /* Ignore gl_TessLevelInner/Outer - they're system values. */
> - key.inputs_read &= ~(VARYING_BIT_TESS_LEVEL_INNER |
> - VARYING_BIT_TESS_LEVEL_OUTER);
> -
> brw_setup_tex_for_precompile(brw, &key.tex, prog);
>
> success = brw_codegen_tes_prog(brw, shader_prog, btep, &key);
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
> b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
> index bfa22458f0e..9ef3dc04665 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
> @@ -319,62 +319,8 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr
> *instr)
> dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
> dst.writemask = brw_writemask_for_size(instr->num_components);
>
> - if (imm_offset == 0 && indirect_offset.file == BAD_FILE) {
> - dst.type = BRW_REGISTER_TYPE_F;
> -
> - /* This is a read of gl_TessLevelInner[], which lives in the
> - * Patch URB header. The layout depends on the domain.
> - */
> - switch (key->tes_primitive_mode) {
> - case GL_QUADS: {
> - /* DWords 3-2 (reversed); use offset 0 and WZYX swizzle. */
> - dst_reg tmp(this, glsl_type::vec4_type);
> - emit_output_urb_read(tmp, 0, 0, src_reg());
> - emit(MOV(writemask(dst, WRITEMASK_XY),
> - swizzle(src_reg(tmp), BRW_SWIZZLE_WZYX)));
> - break;
> - }
> - case GL_TRIANGLES:
> - /* DWord 4; use offset 1 but normal swizzle/writemask. */
> - emit_output_urb_read(writemask(dst, WRITEMASK_X), 1, 0,
> - src_reg());
> - break;
> - case GL_ISOLINES:
> - /* All channels are undefined. */
> - return;
> - default:
> - unreachable("Bogus tessellation domain");
> - }
> - } else if (imm_offset == 1 && indirect_offset.file == BAD_FILE) {
> - dst.type = BRW_REGISTER_TYPE_F;
> - unsigned swiz = BRW_SWIZZLE_WZYX;
> -
> - /* This is a read of gl_TessLevelOuter[], which lives in the
> - * high 4 DWords of the Patch URB header, in reverse order.
> - */
> - switch (key->tes_primitive_mode) {
> - case GL_QUADS:
> - dst.writemask = WRITEMASK_XYZW;
> - break;
> - case GL_TRIANGLES:
> - dst.writemask = WRITEMASK_XYZ;
> - break;
> - case GL_ISOLINES:
> - /* Isolines are not reversed; swizzle .zw -> .xy */
> - swiz = BRW_SWIZZLE_ZWZW;
> - dst.writemask = WRITEMASK_XY;
> - return;
> - default:
> - unreachable("Bogus tessellation domain");
> - }
> -
> - dst_reg tmp(this, glsl_type::vec4_type);
> - emit_output_urb_read(tmp, 1, 0, src_reg());
> - emit(MOV(dst, swizzle(src_reg(tmp), swiz)));
> - } else {
> - emit_output_urb_read(dst, imm_offset,
> nir_intrinsic_component(instr),
> - indirect_offset);
> - }
> + emit_output_urb_read(dst, imm_offset, nir_intrinsic_component(instr)
> ,
> + indirect_offset);
> break;
> }
> case nir_intrinsic_store_output:
> @@ -386,62 +332,6 @@ vec4_tcs_visitor::nir_emit_intrinsic(nir_intrinsic_instr
> *instr)
> src_reg indirect_offset = get_indirect_offset(instr);
> unsigned imm_offset = instr->const_index[0];
>
> - /* The passthrough shader writes the whole patch header as two
> vec4s;
> - * skip all the gl_TessLevelInner/Outer swizzling.
> - */
> - if (indirect_offset.file == BAD_FILE && !is_passthrough_shader) {
> - if (imm_offset == 0) {
> - value.type = BRW_REGISTER_TYPE_F;
> -
> - mask &=
> - (1 << tesslevel_inner_components(key->tes_primitive_mode))
> - 1;
> -
> - /* This is a write to gl_TessLevelInner[], which lives in the
> - * Patch URB header. The layout depends on the domain.
> - */
> - switch (key->tes_primitive_mode) {
> - case GL_QUADS:
> - /* gl_TessLevelInner[].xy lives at DWords 3-2 (reversed).
> - * We use an XXYX swizzle to reverse put .xy in the .wz
> - * channels, and use a .zw writemask.
> - */
> - swiz = BRW_SWIZZLE4(0, 0, 1, 0);
> - mask = writemask_for_backwards_vector(mask);
> - break;
> - case GL_TRIANGLES:
> - /* gl_TessLevelInner[].x lives at DWord 4, so we set the
> - * writemask to X and bump the URB offset by 1.
> - */
> - imm_offset = 1;
> - break;
> - case GL_ISOLINES:
> - /* Skip; gl_TessLevelInner[] doesn't exist for isolines. */
> - return;
> - default:
> - unreachable("Bogus tessellation domain");
> - }
> - } else if (imm_offset == 1) {
> - value.type = BRW_REGISTER_TYPE_F;
> -
> - mask &=
> - (1 << tesslevel_outer_components(key->tes_primitive_mode))
> - 1;
> -
> - /* This is a write to gl_TessLevelOuter[] which lives in the
> - * Patch URB Header at DWords 4-7. However, it's reversed, so
> - * instead of .xyzw we have .wzyx.
> - */
> - if (key->tes_primitive_mode == GL_ISOLINES) {
> - /* Isolines .xy should be stored in .zw, in order. */
> - swiz = BRW_SWIZZLE4(0, 0, 0, 1);
> - mask <<= 2;
> - } else {
> - /* Other domains are reversed; store .wzyx instead of
> .xyzw. */
> - swiz = BRW_SWIZZLE_WZYX;
> - mask = writemask_for_backwards_vector(mask);
> - }
> - }
> - }
> -
> unsigned first_component = nir_intrinsic_component(instr);
> if (first_component) {
> if (nir_src_bit_size(instr->src[0]) == 64)
> @@ -522,7 +412,8 @@ brw_compile_tcs(const struct brw_compiler *compiler,
>
> nir = brw_nir_apply_sampler_key(nir, compiler, &key->tex, is_scalar);
> brw_nir_lower_vue_inputs(nir, is_scalar, &input_vue_map);
> - brw_nir_lower_tcs_outputs(nir, &vue_prog_data->vue_map);
> + brw_nir_lower_tcs_outputs(nir, &vue_prog_data->vue_map,
> + key->tes_primitive_mode);
> if (key->quads_workaround)
> brw_nir_apply_tcs_quads_workaround(nir);
>
> --
> 2.11.0
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20170104/028e3eaa/attachment-0001.html>
More information about the mesa-dev
mailing list