[Mesa-dev] [PATCH v3] swr: implement clipPlanes/clipVertex/clipDistance/cullDistance
Ilia Mirkin
imirkin at alum.mit.edu
Thu May 26 02:16:35 UTC 2016
On Wed, May 25, 2016 at 10:03 PM, Tim Rowley <timothy.o.rowley at intel.com> wrote:
> v2: only load the clip vertex once
>
> v3: fix clip enable logic, add cullDistance
> ---
> docs/GL3.txt | 2 +-
> src/gallium/drivers/swr/swr_context.h | 2 ++
> src/gallium/drivers/swr/swr_screen.cpp | 3 +-
> src/gallium/drivers/swr/swr_shader.cpp | 63 ++++++++++++++++++++++++++++++++++
> src/gallium/drivers/swr/swr_shader.h | 4 +++
> src/gallium/drivers/swr/swr_state.cpp | 24 ++++++++++++-
> 6 files changed, 95 insertions(+), 3 deletions(-)
>
> diff --git a/docs/GL3.txt b/docs/GL3.txt
> index 555a9be..5965f25 100644
> --- a/docs/GL3.txt
> +++ b/docs/GL3.txt
> @@ -211,7 +211,7 @@ GL 4.5, GLSL 4.50:
> GL_ARB_ES3_1_compatibility DONE (nvc0, radeonsi)
> GL_ARB_clip_control DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe, swr)
> GL_ARB_conditional_render_inverted DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe, swr)
> - GL_ARB_cull_distance DONE (i965, nv50, nvc0, llvmpipe, softpipe)
> + GL_ARB_cull_distance DONE (i965, nv50, nvc0, llvmpipe, softpipe, swr)
> GL_ARB_derivative_control DONE (i965, nv50, nvc0, r600, radeonsi)
> GL_ARB_direct_state_access DONE (all drivers)
> GL_ARB_get_texture_sub_image DONE (all drivers)
> diff --git a/src/gallium/drivers/swr/swr_context.h b/src/gallium/drivers/swr/swr_context.h
> index a7383bb..75ecae3 100644
> --- a/src/gallium/drivers/swr/swr_context.h
> +++ b/src/gallium/drivers/swr/swr_context.h
> @@ -89,6 +89,8 @@ struct swr_draw_context {
> swr_jit_texture texturesFS[PIPE_MAX_SHADER_SAMPLER_VIEWS];
> swr_jit_sampler samplersFS[PIPE_MAX_SAMPLERS];
>
> + float userClipPlanes[PIPE_MAX_CLIP_PLANES][4];
> +
> SWR_SURFACE_STATE renderTargets[SWR_NUM_ATTACHMENTS];
> };
>
> diff --git a/src/gallium/drivers/swr/swr_screen.cpp b/src/gallium/drivers/swr/swr_screen.cpp
> index 0772274..7851346 100644
> --- a/src/gallium/drivers/swr/swr_screen.cpp
> +++ b/src/gallium/drivers/swr/swr_screen.cpp
> @@ -333,6 +333,8 @@ swr_get_param(struct pipe_screen *screen, enum pipe_cap param)
> case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
> case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
> return 1;
> + case PIPE_CAP_CULL_DISTANCE:
> + return 1;
> case PIPE_CAP_TGSI_TXQS:
> case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
> case PIPE_CAP_SHAREABLE_SHADERS:
> @@ -358,7 +360,6 @@ swr_get_param(struct pipe_screen *screen, enum pipe_cap param)
> case PIPE_CAP_PCI_DEVICE:
> case PIPE_CAP_PCI_FUNCTION:
> case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
> - case PIPE_CAP_CULL_DISTANCE:
> case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES:
> return 0;
> }
> diff --git a/src/gallium/drivers/swr/swr_shader.cpp b/src/gallium/drivers/swr/swr_shader.cpp
> index f693f51..25ea7ae 100644
> --- a/src/gallium/drivers/swr/swr_shader.cpp
> +++ b/src/gallium/drivers/swr/swr_shader.cpp
> @@ -40,6 +40,9 @@
> #include "swr_state.h"
> #include "swr_screen.h"
>
> +static unsigned
> +locate_linkage(ubyte name, ubyte index, struct tgsi_shader_info *info);
> +
> bool operator==(const swr_jit_fs_key &lhs, const swr_jit_fs_key &rhs)
> {
> return !memcmp(&lhs, &rhs, sizeof(lhs));
> @@ -120,6 +123,11 @@ swr_generate_vs_key(struct swr_jit_vs_key &key,
> {
> memset(&key, 0, sizeof(key));
>
> + key.clip_plane_mask = ctx->rasterizer->clip_plane_enable;
> + key.clip_distance_mask = swr_vs->info.base.clipdist_writemask;
> + key.cull_distance_mask = swr_vs->info.base.culldist_writemask;
> + key.writes_clipvertex = swr_vs->info.base.writes_clipvertex;
> +
> swr_generate_sampler_key(swr_vs->info, ctx, PIPE_SHADER_VERTEX, key);
> }
>
> @@ -252,6 +260,61 @@ BuilderSWR::CompileVS(struct swr_context *ctx, swr_jit_vs_key &key)
> }
> }
>
> + if (ctx->rasterizer->clip_plane_enable) {
I think you want if (ctx->rasterizer->clip_plane_enable &&
(swr_vs->info.base.clipdist_writemask |
swr_vs->info.base.culldist_writemask) == 0)
Note that for culling, clip_plane_enable won't be set. That's only for
clip planes and cull distances.
> + unsigned clip_mask = ctx->rasterizer->clip_plane_enable;
> +
> + unsigned cv;
> + if (swr_vs->info.base.writes_clipvertex) {
> + cv = 1 + locate_linkage(TGSI_SEMANTIC_CLIPVERTEX, 0,
> + &swr_vs->info.base);
> + } else {
> + for (int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
> + if (swr_vs->info.base.output_semantic_name[i] == TGSI_SEMANTIC_POSITION &&
> + swr_vs->info.base.output_semantic_index[i] == 0) {
> + cv = i;
> + break;
> + }
> + }
> + }
> + LLVMValueRef cx = LLVMBuildLoad(gallivm->builder, outputs[cv][0], "");
> + LLVMValueRef cy = LLVMBuildLoad(gallivm->builder, outputs[cv][1], "");
> + LLVMValueRef cz = LLVMBuildLoad(gallivm->builder, outputs[cv][2], "");
> + LLVMValueRef cw = LLVMBuildLoad(gallivm->builder, outputs[cv][3], "");
> +
> + for (unsigned val = 0; val < PIPE_MAX_CLIP_PLANES; val++) {
> + // clip distance overrides user clip planes
So hard that if any clip/cull distances are set, all the UCP stuff
goes away. You want to make those writes just go directly to
VERTEX_CLIPCULL_DIST_LO_SLOT/HI_SLOT. And then do these fixups for
when clip/cull distances aren't explicitly written but clip planes are
enabled.
Also note that this is going to cause you a recompile on every
rasterizer change. Not sure if this is OK or not, but just wanted to
point it out... ideally you'd only do the recompile when something
relevant changed, like the clip plane mask. Or what we do in nouveau,
which is we emit code for up to N planes, and then as long as the
"max" number of planes doesn't go up, we don't recompile. Not
super-familiar with the rest of your infrastructure though, perhaps
this is difficult/undesirable/whatever.
-ilia
> + if ((swr_vs->info.base.clipdist_writemask & clip_mask & (1 << val)) ||
> + (swr_vs->info.base.culldist_writemask & (1 << val))) {
> + unsigned cv = 1 + locate_linkage(TGSI_SEMANTIC_CLIPDIST, val,
> + &swr_vs->info.base);
> + LLVMValueRef dist = LLVMBuildLoad(gallivm->builder, outputs[cv][0], "");
> +
> + if (val < 4)
> + STORE(unwrap(dist), vtxOutput, {0, 0, VERTEX_CLIPCULL_DIST_LO_SLOT, val});
> + else
> + STORE(unwrap(dist), vtxOutput, {0, 0, VERTEX_CLIPCULL_DIST_HI_SLOT, val - 4});
> + continue;
> + }
> +
> + if (!(clip_mask & (1 << val)))
> + continue;
> +
> + Value *px = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 0}));
> + Value *py = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 1}));
> + Value *pz = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 2}));
> + Value *pw = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 3}));
> + Value *dist = FADD(FMUL(unwrap(cx), VBROADCAST(px)),
> + FADD(FMUL(unwrap(cy), VBROADCAST(py)),
> + FADD(FMUL(unwrap(cz), VBROADCAST(pz)),
> + FMUL(unwrap(cw), VBROADCAST(pw)))));
> +
> + if (val < 4)
> + STORE(dist, vtxOutput, {0, 0, VERTEX_CLIPCULL_DIST_LO_SLOT, val});
> + else
> + STORE(dist, vtxOutput, {0, 0, VERTEX_CLIPCULL_DIST_HI_SLOT, val - 4});
> + }
> + }
> +
> RET_VOID();
>
> gallivm_verify_function(gallivm, wrap(pFunction));
> diff --git a/src/gallium/drivers/swr/swr_shader.h b/src/gallium/drivers/swr/swr_shader.h
> index 11d50c3..5f5b232 100644
> --- a/src/gallium/drivers/swr/swr_shader.h
> +++ b/src/gallium/drivers/swr/swr_shader.h
> @@ -57,6 +57,10 @@ struct swr_jit_fs_key : swr_jit_sampler_key {
> };
>
> struct swr_jit_vs_key : swr_jit_sampler_key {
> + unsigned clip_plane_mask; // from rasterizer state
> + unsigned clip_distance_mask; // from vs outputs (direct)
> + unsigned cull_distance_mask; // from vs outputs (direct)
> + bool writes_clipvertex; // from vs_info
> };
>
> namespace std
> diff --git a/src/gallium/drivers/swr/swr_state.cpp b/src/gallium/drivers/swr/swr_state.cpp
> index f9326f3..7732322 100644
> --- a/src/gallium/drivers/swr/swr_state.cpp
> +++ b/src/gallium/drivers/swr/swr_state.cpp
> @@ -849,7 +849,9 @@ swr_update_derived(struct pipe_context *pipe,
> }
>
> /* Raster state */
> - if (ctx->dirty & (SWR_NEW_RASTERIZER | SWR_NEW_FRAMEBUFFER)) {
> + if (ctx->dirty & (SWR_NEW_RASTERIZER |
> + SWR_NEW_VS | // clipping
> + SWR_NEW_FRAMEBUFFER)) {
> pipe_rasterizer_state *rasterizer = ctx->rasterizer;
> pipe_framebuffer_state *fb = &ctx->framebuffer;
>
> @@ -906,6 +908,13 @@ swr_update_derived(struct pipe_context *pipe,
>
> rastState->depthClipEnable = rasterizer->depth_clip;
>
> + rastState->clipDistanceMask =
> + ctx->vs->info.base.num_written_clipdistance ?
> + ctx->vs->info.base.clipdist_writemask & rasterizer->clip_plane_enable :
> + rasterizer->clip_plane_enable;
> +
> + rastState->cullDistanceMask = ctx->vs->info.base.culldist_writemask;
> +
> SwrSetRastState(ctx->swrContext, rastState);
> }
>
> @@ -1067,6 +1076,7 @@ swr_update_derived(struct pipe_context *pipe,
>
> /* VertexShader */
> if (ctx->dirty & (SWR_NEW_VS |
> + SWR_NEW_RASTERIZER | // for clip planes
> SWR_NEW_SAMPLER |
> SWR_NEW_SAMPLER_VIEW |
> SWR_NEW_FRAMEBUFFER)) {
> @@ -1341,6 +1351,18 @@ swr_update_derived(struct pipe_context *pipe,
> }
> }
>
> + if (ctx->dirty & SWR_NEW_CLIP) {
> + // shader exporting clip distances overrides all user clip planes
> + if (ctx->rasterizer->clip_plane_enable &&
> + !ctx->vs->info.base.num_written_clipdistance)
> + {
> + swr_draw_context *pDC = &ctx->swrDC;
> + memcpy(pDC->userClipPlanes,
> + ctx->clip.ucp,
> + sizeof(pDC->userClipPlanes));
> + }
> + }
> +
> uint32_t linkage = ctx->vs->linkageMask;
> if (ctx->rasterizer->sprite_coord_enable)
> linkage |= (1 << ctx->vs->info.base.num_outputs);
> --
> 1.9.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list