[Mesa-dev] [PATCH 3/3] radv/ac: handle gs->copy shader clip distances.
Bas Nieuwenhuizen
bas at basnieuwenhuizen.nl
Thu Feb 23 01:38:02 UTC 2017
Series is:
Reviewed-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
On Tue, Feb 21, 2017 at 5:14 AM, Dave Airlie <airlied at gmail.com> wrote:
> From: Dave Airlie <airlied at redhat.com>
>
> This fixes up the clip distance passing between the geometry
> shader and the copy shader. It packs the clip and cull distances
> into one or two consecutive slots, and avoids wasting space and
> make sure the gs output and copy shader input agree on where
> things are stored.
>
> Signed-off-by: Dave Airlie <airlied at redhat.com>
> ---
> src/amd/common/ac_nir_to_llvm.c | 81 ++++++++++++++++++++++++++++++++++-------
> 1 file changed, 68 insertions(+), 13 deletions(-)
>
> diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> index 43d5295..a74b906 100644
> --- a/src/amd/common/ac_nir_to_llvm.c
> +++ b/src/amd/common/ac_nir_to_llvm.c
> @@ -2980,7 +2980,7 @@ visit_emit_vertex(struct nir_to_llvm_context *ctx,
> LLVMValueRef gs_next_vertex;
> LLVMValueRef can_emit, kill;
> int idx;
> -
> + int clip_cull_slot = -1;
> assert(instr->const_index[0] == 0);
> /* Write vertex attribute values to GSVS ring */
> gs_next_vertex = LLVMBuildLoad(ctx->builder,
> @@ -3005,13 +3005,40 @@ visit_emit_vertex(struct nir_to_llvm_context *ctx,
> idx = 0;
> for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
> LLVMValueRef *out_ptr = &ctx->outputs[i * 4];
> + int length = 4;
> + int start = 0;
> + int slot = idx;
> + int slot_inc = 1;
> +
> if (!(ctx->output_mask & (1ull << i)))
> continue;
>
> - for (unsigned j = 0; j < 4; j++) {
> + if (i == VARYING_SLOT_CLIP_DIST1 ||
> + i == VARYING_SLOT_CULL_DIST1)
> + continue;
> +
> + if (i == VARYING_SLOT_CLIP_DIST0 ||
> + i == VARYING_SLOT_CULL_DIST0) {
> + /* pack clip and cull into a single set of slots */
> + if (clip_cull_slot == -1) {
> + clip_cull_slot = idx;
> + if (ctx->num_output_clips + ctx->num_output_culls > 4)
> + slot_inc = 2;
> + } else {
> + slot = clip_cull_slot;
> + slot_inc = 0;
> + }
> + if (i == VARYING_SLOT_CLIP_DIST0)
> + length = ctx->num_output_clips;
> + if (i == VARYING_SLOT_CULL_DIST0) {
> + start = ctx->num_output_clips;
> + length = ctx->num_output_culls;
> + }
> + }
> + for (unsigned j = 0; j < length; j++) {
> LLVMValueRef out_val = LLVMBuildLoad(ctx->builder,
> out_ptr[j], "");
> - LLVMValueRef voffset = LLVMConstInt(ctx->i32, (idx * 4 + j) * ctx->gs_max_out_vertices, false);
> + LLVMValueRef voffset = LLVMConstInt(ctx->i32, (slot * 4 + j + start) * ctx->gs_max_out_vertices, false);
> voffset = LLVMBuildAdd(ctx->builder, voffset, gs_next_vertex, "");
> voffset = LLVMBuildMul(ctx->builder, voffset, LLVMConstInt(ctx->i32, 4, false), "");
>
> @@ -3024,7 +3051,7 @@ visit_emit_vertex(struct nir_to_llvm_context *ctx,
> V_008F0C_BUF_NUM_FORMAT_UINT,
> 1, 0, 1, 1, 0);
> }
> - idx++;
> + idx += slot_inc;
> }
>
> gs_next_vertex = LLVMBuildAdd(ctx->builder, gs_next_vertex,
> @@ -4155,14 +4182,14 @@ handle_shader_output_decl(struct nir_to_llvm_context *ctx,
> if (idx == VARYING_SLOT_CLIP_DIST0 ||
> idx == VARYING_SLOT_CULL_DIST0) {
> int length = glsl_get_length(variable->type);
> - if (ctx->stage == MESA_SHADER_VERTEX) {
> - if (idx == VARYING_SLOT_CLIP_DIST0) {
> + if (idx == VARYING_SLOT_CLIP_DIST0) {
> + if (ctx->stage == MESA_SHADER_VERTEX)
> ctx->shader_info->vs.clip_dist_mask = (1 << length) - 1;
> - ctx->num_output_clips = length;
> - } else if (idx == VARYING_SLOT_CULL_DIST0) {
> + ctx->num_output_clips = length;
> + } else if (idx == VARYING_SLOT_CULL_DIST0) {
> + if (ctx->stage == MESA_SHADER_VERTEX)
> ctx->shader_info->vs.cull_dist_mask = (1 << length) - 1;
> - ctx->num_output_culls = length;
> - }
> + ctx->num_output_culls = length;
> }
> if (length > 4)
> attrib_count = 2;
> @@ -5049,14 +5076,42 @@ ac_gs_copy_shader_emit(struct nir_to_llvm_context *ctx)
> args[8] = ctx->i32zero; /* TFE */
>
> int idx = 0;
> + int clip_cull_slot = -1;
> for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
> + int length = 4;
> + int start = 0;
> + int slot = idx;
> + int slot_inc = 1;
> if (!(ctx->output_mask & (1ull << i)))
> continue;
>
> - for (unsigned j = 0; j < 4; j++) {
> + if (i == VARYING_SLOT_CLIP_DIST1 ||
> + i == VARYING_SLOT_CULL_DIST1)
> + continue;
> +
> + if (i == VARYING_SLOT_CLIP_DIST0 ||
> + i == VARYING_SLOT_CULL_DIST0) {
> + /* unpack clip and cull from a single set of slots */
> + if (clip_cull_slot == -1) {
> + clip_cull_slot = idx;
> + if (ctx->num_output_clips + ctx->num_output_culls > 4)
> + slot_inc = 2;
> + } else {
> + slot = clip_cull_slot;
> + slot_inc = 0;
> + }
> + if (i == VARYING_SLOT_CLIP_DIST0)
> + length = ctx->num_output_clips;
> + if (i == VARYING_SLOT_CULL_DIST0) {
> + start = ctx->num_output_clips;
> + length = ctx->num_output_culls;
> + }
> + }
> +
> + for (unsigned j = 0; j < length; j++) {
> LLVMValueRef value;
> args[2] = LLVMConstInt(ctx->i32,
> - (idx * 4 + j) *
> + (slot * 4 + j + start) *
> ctx->gs_max_out_vertices * 16 * 4, false);
>
> value = ac_emit_llvm_intrinsic(&ctx->ac,
> @@ -5067,7 +5122,7 @@ ac_gs_copy_shader_emit(struct nir_to_llvm_context *ctx)
> LLVMBuildStore(ctx->builder,
> to_float(ctx, value), ctx->outputs[radeon_llvm_reg_index_soa(i, j)]);
> }
> - idx++;
> + idx += slot_inc;
> }
> handle_vs_outputs_post(ctx);
> }
> --
> 2.9.3
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list