Mesa (main): radeonsi: fix view culling for wide lines

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Tue Nov 16 02:41:11 UTC 2021


Module: Mesa
Branch: main
Commit: f8a0aa6852756d8f1593ef6627ddb1754ae967be
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=f8a0aa6852756d8f1593ef6627ddb1754ae967be

Author: Marek Olšák <marek.olsak at amd.com>
Date:   Sat Nov  6 14:07:25 2021 -0400

radeonsi: fix view culling for wide lines

We need to cull wide lines as quads, but only for view culling.

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer at amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13700>

---

 src/amd/llvm/ac_llvm_cull.c                      | 17 ++++++++++++-----
 src/amd/llvm/ac_llvm_cull.h                      |  4 ++--
 src/gallium/drivers/radeonsi/gfx10_shader_ngg.c  | 17 +++++++++++++----
 src/gallium/drivers/radeonsi/si_pipe.h           |  2 ++
 src/gallium/drivers/radeonsi/si_state.c          |  3 ++-
 src/gallium/drivers/radeonsi/si_state_viewport.c |  9 +++++++++
 6 files changed, 40 insertions(+), 12 deletions(-)

diff --git a/src/amd/llvm/ac_llvm_cull.c b/src/amd/llvm/ac_llvm_cull.c
index 681c186cd35..87d201f0781 100644
--- a/src/amd/llvm/ac_llvm_cull.c
+++ b/src/amd/llvm/ac_llvm_cull.c
@@ -125,7 +125,9 @@ static LLVMValueRef ac_cull_face(struct ac_llvm_context *ctx, LLVMValueRef pos[3
 static void cull_bbox(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4],
                       LLVMValueRef initially_accepted, struct ac_position_w_info *w,
                       LLVMValueRef vp_scale[2], LLVMValueRef vp_translate[2],
-                      LLVMValueRef small_prim_precision, struct ac_cull_options *options,
+                      LLVMValueRef small_prim_precision,
+                      LLVMValueRef clip_half_line_width[2],
+                      struct ac_cull_options *options,
                       ac_cull_accept_func accept_func, void *userdata)
 {
    LLVMBuilderRef builder = ctx->builder;
@@ -153,6 +155,11 @@ static void cull_bbox(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4],
             bbox_min[chan] = ac_build_fmin(ctx, bbox_min[chan], pos[2][chan]);
             bbox_max[chan] = ac_build_fmax(ctx, bbox_max[chan], pos[2][chan]);
          }
+
+         if (clip_half_line_width[chan]) {
+            bbox_min[chan] = LLVMBuildFSub(builder, bbox_min[chan], clip_half_line_width[chan], "");
+            bbox_max[chan] = LLVMBuildFAdd(builder, bbox_max[chan], clip_half_line_width[chan], "");
+         }
       }
 
       /* View culling. */
@@ -238,8 +245,8 @@ static void cull_bbox(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4],
 void ac_cull_primitive(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4],
                        LLVMValueRef initially_accepted, LLVMValueRef vp_scale[2],
                        LLVMValueRef vp_translate[2], LLVMValueRef small_prim_precision,
-                       struct ac_cull_options *options, ac_cull_accept_func accept_func,
-                       void *userdata)
+                       LLVMValueRef clip_half_line_width[2], struct ac_cull_options *options,
+                       ac_cull_accept_func accept_func, void *userdata)
 {
    struct ac_position_w_info w;
    ac_analyze_position_w(ctx, pos, &w, options->num_vertices);
@@ -255,6 +262,6 @@ void ac_cull_primitive(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4],
       "");
 
    /* View culling and small primitive elimination. */
-   cull_bbox(ctx, pos, accepted, &w, vp_scale, vp_translate, small_prim_precision, options,
-             accept_func, userdata);
+   cull_bbox(ctx, pos, accepted, &w, vp_scale, vp_translate, small_prim_precision,
+             clip_half_line_width, options, accept_func, userdata);
 }
diff --git a/src/amd/llvm/ac_llvm_cull.h b/src/amd/llvm/ac_llvm_cull.h
index db1dcdde9f7..dc978d3fe04 100644
--- a/src/amd/llvm/ac_llvm_cull.h
+++ b/src/amd/llvm/ac_llvm_cull.h
@@ -57,7 +57,7 @@ typedef void (*ac_cull_accept_func)(struct ac_llvm_context *ctx, LLVMValueRef ac
 void ac_cull_primitive(struct ac_llvm_context *ctx, LLVMValueRef pos[3][4],
                        LLVMValueRef initially_accepted, LLVMValueRef vp_scale[2],
                        LLVMValueRef vp_translate[2], LLVMValueRef small_prim_precision,
-                       struct ac_cull_options *options, ac_cull_accept_func accept_func,
-                       void *userdata);
+                       LLVMValueRef clip_half_line_width[2], struct ac_cull_options *options,
+                       ac_cull_accept_func accept_func, void *userdata);
 
 #endif
diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
index 745dd90eb21..46ad2520f7a 100644
--- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
+++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
@@ -970,9 +970,11 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi)
          }
       }
 
+      LLVMValueRef clip_half_line_width[2] = {};
+
       /* Load the viewport state for small prim culling. */
-      LLVMValueRef vp = ac_build_load_invariant(
-         &ctx->ac, ac_get_arg(&ctx->ac, ctx->small_prim_cull_info), ctx->ac.i32_0);
+      LLVMValueRef ptr = ac_get_arg(&ctx->ac, ctx->small_prim_cull_info);
+      LLVMValueRef vp = ac_build_load_invariant(&ctx->ac, ptr, ctx->ac.i32_0);
       vp = LLVMBuildBitCast(builder, vp, ctx->ac.v4f32, "");
       LLVMValueRef vp_scale[2], vp_translate[2];
       vp_scale[0] = ac_llvm_extract_elem(&ctx->ac, vp, 0);
@@ -994,6 +996,13 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi)
       options.cull_w = true;
 
       if (shader->key.ge.opt.ngg_culling & SI_NGG_CULL_LINES) {
+         ptr = LLVMBuildPointerCast(builder, ptr,
+                                    LLVMPointerType(ctx->ac.v2i32, AC_ADDR_SPACE_CONST_32BIT), "");
+         LLVMValueRef terms = ac_build_load_to_sgpr(&ctx->ac, ptr, LLVMConstInt(ctx->ac.i32, 2, 0));
+         terms = LLVMBuildBitCast(builder, terms, ctx->ac.v2f32, "");
+         clip_half_line_width[0] = ac_llvm_extract_elem(&ctx->ac, terms, 0);
+         clip_half_line_width[1] = ac_llvm_extract_elem(&ctx->ac, terms, 1);
+
          options.num_vertices = 2;
 
          assert(!(shader->key.ge.opt.ngg_culling & SI_NGG_CULL_BACK_FACE));
@@ -1012,8 +1021,8 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi)
          (void*)gs_vtxptr,
       };
       ac_cull_primitive(&ctx->ac, pos, ctx->ac.i1true, vp_scale, vp_translate,
-                        small_prim_precision, &options,
-                        gfx10_build_primitive_accepted, params);
+                        small_prim_precision, clip_half_line_width,
+                        &options, gfx10_build_primitive_accepted, params);
    }
    ac_build_endif(&ctx->ac, 16002);
    ac_build_s_barrier(&ctx->ac);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 08103a8a6d2..38e40f1405f 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -899,6 +899,8 @@ struct si_saved_cs {
 
 struct si_small_prim_cull_info {
    float scale[2], translate[2];
+   float clip_half_line_width[2];      /* line_width * 0.5 in clip space in X and Y directions */
+   /* The above fields are uploaded to memory. The below fields are passed via user SGPRs. */
    float small_prim_precision;
 };
 
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 828b3467e63..f9092482b0a 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -1109,7 +1109,8 @@ static void si_bind_rs_state(struct pipe_context *ctx, void *state)
    }
 
    if (sctx->screen->use_ngg_culling &&
-       old_rs->half_pixel_center != rs->half_pixel_center)
+       (old_rs->half_pixel_center != rs->half_pixel_center ||
+        old_rs->line_width != rs->line_width))
       si_mark_atom_dirty(sctx, &sctx->atoms.s.ngg_cull_state);
 
    sctx->current_vs_state &= C_VS_STATE_CLAMP_VERTEX_COLOR;
diff --git a/src/gallium/drivers/radeonsi/si_state_viewport.c b/src/gallium/drivers/radeonsi/si_state_viewport.c
index f5d7a59a52e..81020db405a 100644
--- a/src/gallium/drivers/radeonsi/si_state_viewport.c
+++ b/src/gallium/drivers/radeonsi/si_state_viewport.c
@@ -45,6 +45,15 @@ void si_get_small_prim_cull_info(struct si_context *sctx, struct si_small_prim_c
    /* The viewport shouldn't flip the X axis for the small prim culling to work. */
    assert(-info.scale[0] + info.translate[0] <= info.scale[0] + info.translate[0]);
 
+   /* Compute the line width used by the rasterizer. */
+   float line_width = sctx->queued.named.rasterizer->line_width;
+   if (num_samples == 1)
+      line_width = roundf(line_width);
+   line_width = MAX2(line_width, 1);
+
+   info.clip_half_line_width[0] = line_width * 0.5 / fabs(info.scale[0]);
+   info.clip_half_line_width[1] = line_width * 0.5 / fabs(info.scale[1]);
+
    /* If the Y axis is inverted (OpenGL default framebuffer), reverse it.
     * This is because the viewport transformation inverts the clip space
     * bounding box, so min becomes max, which breaks small primitive



More information about the mesa-commit mailing list