[Mesa-dev] [PATCH v4] swr: implement clipPlanes/clipVertex/clipDistance/cullDistance
Ilia Mirkin
imirkin at alum.mit.edu
Thu May 26 13:33:01 UTC 2016
On Thu, May 26, 2016 at 9:11 AM, Tim Rowley <timothy.o.rowley at intel.com> wrote:
> v2: only load the clip vertex once
>
> v3: fix clip enable logic, add cullDistance
>
> v4: remove duplicate fields in vs jit key, fix test of clip fixup needed
> ---
> docs/GL3.txt | 2 +-
> src/gallium/drivers/swr/swr_context.h | 2 ++
> src/gallium/drivers/swr/swr_screen.cpp | 3 +-
> src/gallium/drivers/swr/swr_shader.cpp | 64 ++++++++++++++++++++++++++++++++++
> src/gallium/drivers/swr/swr_shader.h | 1 +
> src/gallium/drivers/swr/swr_state.cpp | 24 ++++++++++++-
> 6 files changed, 93 insertions(+), 3 deletions(-)
>
> diff --git a/docs/GL3.txt b/docs/GL3.txt
> index 555a9be..5965f25 100644
> --- a/docs/GL3.txt
> +++ b/docs/GL3.txt
> @@ -211,7 +211,7 @@ GL 4.5, GLSL 4.50:
> GL_ARB_ES3_1_compatibility DONE (nvc0, radeonsi)
> GL_ARB_clip_control DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe, swr)
> GL_ARB_conditional_render_inverted DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe, swr)
> - GL_ARB_cull_distance DONE (i965, nv50, nvc0, llvmpipe, softpipe)
> + GL_ARB_cull_distance DONE (i965, nv50, nvc0, llvmpipe, softpipe, swr)
> GL_ARB_derivative_control DONE (i965, nv50, nvc0, r600, radeonsi)
> GL_ARB_direct_state_access DONE (all drivers)
> GL_ARB_get_texture_sub_image DONE (all drivers)
> diff --git a/src/gallium/drivers/swr/swr_context.h b/src/gallium/drivers/swr/swr_context.h
> index a7383bb..75ecae3 100644
> --- a/src/gallium/drivers/swr/swr_context.h
> +++ b/src/gallium/drivers/swr/swr_context.h
> @@ -89,6 +89,8 @@ struct swr_draw_context {
> swr_jit_texture texturesFS[PIPE_MAX_SHADER_SAMPLER_VIEWS];
> swr_jit_sampler samplersFS[PIPE_MAX_SAMPLERS];
>
> + float userClipPlanes[PIPE_MAX_CLIP_PLANES][4];
> +
> SWR_SURFACE_STATE renderTargets[SWR_NUM_ATTACHMENTS];
> };
>
> diff --git a/src/gallium/drivers/swr/swr_screen.cpp b/src/gallium/drivers/swr/swr_screen.cpp
> index 0772274..7851346 100644
> --- a/src/gallium/drivers/swr/swr_screen.cpp
> +++ b/src/gallium/drivers/swr/swr_screen.cpp
> @@ -333,6 +333,8 @@ swr_get_param(struct pipe_screen *screen, enum pipe_cap param)
> case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
> case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
> return 1;
> + case PIPE_CAP_CULL_DISTANCE:
> + return 1;
> case PIPE_CAP_TGSI_TXQS:
> case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
> case PIPE_CAP_SHAREABLE_SHADERS:
> @@ -358,7 +360,6 @@ swr_get_param(struct pipe_screen *screen, enum pipe_cap param)
> case PIPE_CAP_PCI_DEVICE:
> case PIPE_CAP_PCI_FUNCTION:
> case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
> - case PIPE_CAP_CULL_DISTANCE:
> case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES:
> return 0;
> }
> diff --git a/src/gallium/drivers/swr/swr_shader.cpp b/src/gallium/drivers/swr/swr_shader.cpp
> index f693f51..5201c8f 100644
> --- a/src/gallium/drivers/swr/swr_shader.cpp
> +++ b/src/gallium/drivers/swr/swr_shader.cpp
> @@ -40,6 +40,9 @@
> #include "swr_state.h"
> #include "swr_screen.h"
>
> +static unsigned
> +locate_linkage(ubyte name, ubyte index, struct tgsi_shader_info *info);
> +
> bool operator==(const swr_jit_fs_key &lhs, const swr_jit_fs_key &rhs)
> {
> return !memcmp(&lhs, &rhs, sizeof(lhs));
> @@ -120,6 +123,11 @@ swr_generate_vs_key(struct swr_jit_vs_key &key,
> {
> memset(&key, 0, sizeof(key));
>
> + key.clip_plane_mask =
> + swr_vs->info.base.clipdist_writemask ?
> + swr_vs->info.base.clipdist_writemask & ctx->rasterizer->clip_plane_enable :
> + ctx->rasterizer->clip_plane_enable;
What about cull planes? What does this key control exactly? If it's
clip | cull, then this needs to be more like
(swr_vs->info.base.clipdist_writemask &
ctx->rasterizer->clip_plane_enable) |
swr_vs->info.base.culldist_writemask
[if clipdist | culldist are written, otherwise just clip_plane_enable,
like you have it now]
> +
> swr_generate_sampler_key(swr_vs->info, ctx, PIPE_SHADER_VERTEX, key);
> }
>
> @@ -252,6 +260,62 @@ BuilderSWR::CompileVS(struct swr_context *ctx, swr_jit_vs_key &key)
> }
> }
>
> + if (ctx->rasterizer->clip_plane_enable ||
> + swr_vs->info.base.culldist_writemask) {
> + unsigned clip_mask = ctx->rasterizer->clip_plane_enable;
> +
> + unsigned cv;
> + if (swr_vs->info.base.writes_clipvertex) {
> + cv = 1 + locate_linkage(TGSI_SEMANTIC_CLIPVERTEX, 0,
> + &swr_vs->info.base);
> + } else {
> + for (int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
> + if (swr_vs->info.base.output_semantic_name[i] == TGSI_SEMANTIC_POSITION &&
> + swr_vs->info.base.output_semantic_index[i] == 0) {
> + cv = i;
> + break;
> + }
> + }
> + }
> + LLVMValueRef cx = LLVMBuildLoad(gallivm->builder, outputs[cv][0], "");
> + LLVMValueRef cy = LLVMBuildLoad(gallivm->builder, outputs[cv][1], "");
> + LLVMValueRef cz = LLVMBuildLoad(gallivm->builder, outputs[cv][2], "");
> + LLVMValueRef cw = LLVMBuildLoad(gallivm->builder, outputs[cv][3], "");
> +
> + for (unsigned val = 0; val < PIPE_MAX_CLIP_PLANES; val++) {
> + // clip distance overrides user clip planes
> + if ((swr_vs->info.base.clipdist_writemask & clip_mask & (1 << val)) ||
> + (swr_vs->info.base.culldist_writemask & (1 << val))) {
> + unsigned cv = 1 + locate_linkage(TGSI_SEMANTIC_CLIPDIST, val,
> + &swr_vs->info.base);
> + LLVMValueRef dist = LLVMBuildLoad(gallivm->builder, outputs[cv][0], "");
> +
> + if (val < 4)
> + STORE(unwrap(dist), vtxOutput, {0, 0, VERTEX_CLIPCULL_DIST_LO_SLOT, val});
> + else
> + STORE(unwrap(dist), vtxOutput, {0, 0, VERTEX_CLIPCULL_DIST_HI_SLOT, val - 4});
> + continue;
> + }
OK, so let's say the shader writes CLIPDIST .x, but 4 clip planes are
enabled. This is a "undefined behavior" scenario, but you'll end up
loading that UCP data. I guess since it's UB, you can do whatever, but
personally I think this would be clearer split up like
if (clipdist_write || culldist_write) {
mask = (clip & clip_mask) | cull; [aka key.clip_plane_mask - no need
to rederive it - ideally you'd only ever pull info from the key, not
ctx]
for each plane ...
if (in mask)
copy the output
} else if (clip_mask) {
for each plane ...
do ucp thing
}
Your call though.
> +
> + if (!(clip_mask & (1 << val)))
> + continue;
> +
> + Value *px = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 0}));
> + Value *py = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 1}));
> + Value *pz = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 2}));
> + Value *pw = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 3}));
> + Value *dist = FADD(FMUL(unwrap(cx), VBROADCAST(px)),
> + FADD(FMUL(unwrap(cy), VBROADCAST(py)),
> + FADD(FMUL(unwrap(cz), VBROADCAST(pz)),
> + FMUL(unwrap(cw), VBROADCAST(pw)))));
> +
> + if (val < 4)
> + STORE(dist, vtxOutput, {0, 0, VERTEX_CLIPCULL_DIST_LO_SLOT, val});
> + else
> + STORE(dist, vtxOutput, {0, 0, VERTEX_CLIPCULL_DIST_HI_SLOT, val - 4});
> + }
> + }
> +
> RET_VOID();
>
> gallivm_verify_function(gallivm, wrap(pFunction));
> diff --git a/src/gallium/drivers/swr/swr_shader.h b/src/gallium/drivers/swr/swr_shader.h
> index 11d50c3..6dfc8fa 100644
> --- a/src/gallium/drivers/swr/swr_shader.h
> +++ b/src/gallium/drivers/swr/swr_shader.h
> @@ -57,6 +57,7 @@ struct swr_jit_fs_key : swr_jit_sampler_key {
> };
>
> struct swr_jit_vs_key : swr_jit_sampler_key {
> + unsigned clip_plane_mask; // from rasterizer state & vs_info
So what I was saying earlier, is you're going to end up regenerating a
fresh VS anytime this changes. You could, instead, make this key be
unsigned num_ucps;
and have it always be 0 when a clip/cull distance is written. Since
you have a rastState->clip/cullDistanceMask, that should work out, no?
You'll end up with shaders that do a tiny bit of extra work in those
cases, but less shader recompilation. (I guess since your shtick is
that you run vertex shaders over TB's of data, the way you have it now
could work out better actually...)
> };
>
> namespace std
> diff --git a/src/gallium/drivers/swr/swr_state.cpp b/src/gallium/drivers/swr/swr_state.cpp
> index f9326f3..7732322 100644
> --- a/src/gallium/drivers/swr/swr_state.cpp
> +++ b/src/gallium/drivers/swr/swr_state.cpp
> @@ -849,7 +849,9 @@ swr_update_derived(struct pipe_context *pipe,
> }
>
> /* Raster state */
> - if (ctx->dirty & (SWR_NEW_RASTERIZER | SWR_NEW_FRAMEBUFFER)) {
> + if (ctx->dirty & (SWR_NEW_RASTERIZER |
> + SWR_NEW_VS | // clipping
> + SWR_NEW_FRAMEBUFFER)) {
> pipe_rasterizer_state *rasterizer = ctx->rasterizer;
> pipe_framebuffer_state *fb = &ctx->framebuffer;
>
> @@ -906,6 +908,13 @@ swr_update_derived(struct pipe_context *pipe,
>
> rastState->depthClipEnable = rasterizer->depth_clip;
>
> + rastState->clipDistanceMask =
> + ctx->vs->info.base.num_written_clipdistance ?
> + ctx->vs->info.base.clipdist_writemask & rasterizer->clip_plane_enable :
> + rasterizer->clip_plane_enable;
> +
> + rastState->cullDistanceMask = ctx->vs->info.base.culldist_writemask;
> +
> SwrSetRastState(ctx->swrContext, rastState);
> }
>
> @@ -1067,6 +1076,7 @@ swr_update_derived(struct pipe_context *pipe,
>
> /* VertexShader */
> if (ctx->dirty & (SWR_NEW_VS |
> + SWR_NEW_RASTERIZER | // for clip planes
> SWR_NEW_SAMPLER |
> SWR_NEW_SAMPLER_VIEW |
> SWR_NEW_FRAMEBUFFER)) {
> @@ -1341,6 +1351,18 @@ swr_update_derived(struct pipe_context *pipe,
> }
> }
>
> + if (ctx->dirty & SWR_NEW_CLIP) {
> + // shader exporting clip distances overrides all user clip planes
> + if (ctx->rasterizer->clip_plane_enable &&
> + !ctx->vs->info.base.num_written_clipdistance)
> + {
> + swr_draw_context *pDC = &ctx->swrDC;
> + memcpy(pDC->userClipPlanes,
> + ctx->clip.ucp,
> + sizeof(pDC->userClipPlanes));
> + }
> + }
> +
> uint32_t linkage = ctx->vs->linkageMask;
> if (ctx->rasterizer->sprite_coord_enable)
> linkage |= (1 << ctx->vs->info.base.num_outputs);
> --
> 1.9.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list