[Mesa-dev] [PATCH v4] swr: implement clipPlanes/clipVertex/clipDistance/cullDistance

Rowley, Timothy O timothy.o.rowley at intel.com
Thu May 26 15:24:52 UTC 2016


> On May 26, 2016, at 8:33 AM, Ilia Mirkin <imirkin at alum.mit.edu> wrote:
> 
> On Thu, May 26, 2016 at 9:11 AM, Tim Rowley <timothy.o.rowley at intel.com> wrote:
>> v2: only load the clip vertex once
>> 
>> v3: fix clip enable logic, add cullDistance
>> 
>> v4: remove duplicate fields in vs jit key, fix test of clip fixup needed
>> ---
>> docs/GL3.txt                           |  2 +-
>> src/gallium/drivers/swr/swr_context.h  |  2 ++
>> src/gallium/drivers/swr/swr_screen.cpp |  3 +-
>> src/gallium/drivers/swr/swr_shader.cpp | 64 ++++++++++++++++++++++++++++++++++
>> src/gallium/drivers/swr/swr_shader.h   |  1 +
>> src/gallium/drivers/swr/swr_state.cpp  | 24 ++++++++++++-
>> 6 files changed, 93 insertions(+), 3 deletions(-)
>> 
>> diff --git a/docs/GL3.txt b/docs/GL3.txt
>> index 555a9be..5965f25 100644
>> --- a/docs/GL3.txt
>> +++ b/docs/GL3.txt
>> @@ -211,7 +211,7 @@ GL 4.5, GLSL 4.50:
>>   GL_ARB_ES3_1_compatibility                            DONE (nvc0, radeonsi)
>>   GL_ARB_clip_control                                   DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe, swr)
>>   GL_ARB_conditional_render_inverted                    DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe, swr)
>> -  GL_ARB_cull_distance                                  DONE (i965, nv50, nvc0, llvmpipe, softpipe)
>> +  GL_ARB_cull_distance                                  DONE (i965, nv50, nvc0, llvmpipe, softpipe, swr)
>>   GL_ARB_derivative_control                             DONE (i965, nv50, nvc0, r600, radeonsi)
>>   GL_ARB_direct_state_access                            DONE (all drivers)
>>   GL_ARB_get_texture_sub_image                          DONE (all drivers)
>> diff --git a/src/gallium/drivers/swr/swr_context.h b/src/gallium/drivers/swr/swr_context.h
>> index a7383bb..75ecae3 100644
>> --- a/src/gallium/drivers/swr/swr_context.h
>> +++ b/src/gallium/drivers/swr/swr_context.h
>> @@ -89,6 +89,8 @@ struct swr_draw_context {
>>    swr_jit_texture texturesFS[PIPE_MAX_SHADER_SAMPLER_VIEWS];
>>    swr_jit_sampler samplersFS[PIPE_MAX_SAMPLERS];
>> 
>> +   float userClipPlanes[PIPE_MAX_CLIP_PLANES][4];
>> +
>>    SWR_SURFACE_STATE renderTargets[SWR_NUM_ATTACHMENTS];
>> };
>> 
>> diff --git a/src/gallium/drivers/swr/swr_screen.cpp b/src/gallium/drivers/swr/swr_screen.cpp
>> index 0772274..7851346 100644
>> --- a/src/gallium/drivers/swr/swr_screen.cpp
>> +++ b/src/gallium/drivers/swr/swr_screen.cpp
>> @@ -333,6 +333,8 @@ swr_get_param(struct pipe_screen *screen, enum pipe_cap param)
>>    case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
>>    case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
>>       return 1;
>> +   case PIPE_CAP_CULL_DISTANCE:
>> +      return 1;
>>    case PIPE_CAP_TGSI_TXQS:
>>    case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
>>    case PIPE_CAP_SHAREABLE_SHADERS:
>> @@ -358,7 +360,6 @@ swr_get_param(struct pipe_screen *screen, enum pipe_cap param)
>>    case PIPE_CAP_PCI_DEVICE:
>>    case PIPE_CAP_PCI_FUNCTION:
>>    case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
>> -   case PIPE_CAP_CULL_DISTANCE:
>>    case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES:
>>       return 0;
>>    }
>> diff --git a/src/gallium/drivers/swr/swr_shader.cpp b/src/gallium/drivers/swr/swr_shader.cpp
>> index f693f51..5201c8f 100644
>> --- a/src/gallium/drivers/swr/swr_shader.cpp
>> +++ b/src/gallium/drivers/swr/swr_shader.cpp
>> @@ -40,6 +40,9 @@
>> #include "swr_state.h"
>> #include "swr_screen.h"
>> 
>> +static unsigned
>> +locate_linkage(ubyte name, ubyte index, struct tgsi_shader_info *info);
>> +
>> bool operator==(const swr_jit_fs_key &lhs, const swr_jit_fs_key &rhs)
>> {
>>    return !memcmp(&lhs, &rhs, sizeof(lhs));
>> @@ -120,6 +123,11 @@ swr_generate_vs_key(struct swr_jit_vs_key &key,
>> {
>>    memset(&key, 0, sizeof(key));
>> 
>> +   key.clip_plane_mask =
>> +      swr_vs->info.base.clipdist_writemask ?
>> +      swr_vs->info.base.clipdist_writemask & ctx->rasterizer->clip_plane_enable :
>> +      ctx->rasterizer->clip_plane_enable;
> 
> What about cull planes? What does this key control exactly? If it's
> clip | cull, then this needs to be more like
> 
> (swr_vs->info.base.clipdist_writemask &
> ctx->rasterizer->clip_plane_enable) |
> swr_vs->info.base.culldist_writemask
> 
> [if clipdist | culldist are written, otherwise just clip_plane_enable,
> like you have it now]

This key is an index to a hashtable per shader of the compiled variants.  Since culldist_writemask is intrinsic to the shader, it doesn’t need to be duplicated in the key.

I’m going to take a closer look at piglit results for the next iteration of this patch; I’ll consider your other comments in the process.  Might address them in a follow-up patch for optimizing revalidation/recompilation.  Thanks.

-Tim

>> +
>>    swr_generate_sampler_key(swr_vs->info, ctx, PIPE_SHADER_VERTEX, key);
>> }
>> 
>> @@ -252,6 +260,62 @@ BuilderSWR::CompileVS(struct swr_context *ctx, swr_jit_vs_key &key)
>>       }
>>    }
>> 
>> +   if (ctx->rasterizer->clip_plane_enable ||
>> +       swr_vs->info.base.culldist_writemask) {
>> +      unsigned clip_mask = ctx->rasterizer->clip_plane_enable;
>> +
>> +      unsigned cv;
>> +      if (swr_vs->info.base.writes_clipvertex) {
>> +         cv = 1 + locate_linkage(TGSI_SEMANTIC_CLIPVERTEX, 0,
>> +                                 &swr_vs->info.base);
>> +      } else {
>> +         for (int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
>> +            if (swr_vs->info.base.output_semantic_name[i] == TGSI_SEMANTIC_POSITION &&
>> +                swr_vs->info.base.output_semantic_index[i] == 0) {
>> +               cv = i;
>> +               break;
>> +            }
>> +         }
>> +      }
>> +      LLVMValueRef cx = LLVMBuildLoad(gallivm->builder, outputs[cv][0], "");
>> +      LLVMValueRef cy = LLVMBuildLoad(gallivm->builder, outputs[cv][1], "");
>> +      LLVMValueRef cz = LLVMBuildLoad(gallivm->builder, outputs[cv][2], "");
>> +      LLVMValueRef cw = LLVMBuildLoad(gallivm->builder, outputs[cv][3], "");
>> +
>> +      for (unsigned val = 0; val < PIPE_MAX_CLIP_PLANES; val++) {
>> +         // clip distance overrides user clip planes
>> +         if ((swr_vs->info.base.clipdist_writemask & clip_mask & (1 << val)) ||
>> +             (swr_vs->info.base.culldist_writemask & (1 << val))) {
>> +            unsigned cv = 1 + locate_linkage(TGSI_SEMANTIC_CLIPDIST, val,
>> +                                             &swr_vs->info.base);
>> +            LLVMValueRef dist = LLVMBuildLoad(gallivm->builder, outputs[cv][0], "");
>> +
>> +            if (val < 4)
>> +               STORE(unwrap(dist), vtxOutput, {0, 0, VERTEX_CLIPCULL_DIST_LO_SLOT, val});
>> +            else
>> +               STORE(unwrap(dist), vtxOutput, {0, 0, VERTEX_CLIPCULL_DIST_HI_SLOT, val - 4});
>> +            continue;
>> +         }
> 
> OK, so let's say the shader writes CLIPDIST .x, but 4 clip planes are
> enabled. This is a "undefined behavior" scenario, but you'll end up
> loading that UCP data. I guess since it's UB, you can do whatever, but
> personally I think this would be clearer split up like
> 
> if (clipdist_write || culldist_write) {
>  mask = (clip & clip_mask) | cull; [aka key.clip_plane_mask - no need
> to rederive it - ideally you'd only ever pull info from the key, not
> ctx]
>  for each plane ...
>    if (in mask)
>      copy the output
> } else if (clip_mask) {
>  for each plane ...
>    do ucp thing
> }
> 
> Your call though.
> 
>> +
>> +         if (!(clip_mask & (1 << val)))
>> +            continue;
>> +
>> +         Value *px = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 0}));
>> +         Value *py = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 1}));
>> +         Value *pz = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 2}));
>> +         Value *pw = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 3}));
>> +         Value *dist = FADD(FMUL(unwrap(cx), VBROADCAST(px)),
>> +                            FADD(FMUL(unwrap(cy), VBROADCAST(py)),
>> +                                 FADD(FMUL(unwrap(cz), VBROADCAST(pz)),
>> +                                      FMUL(unwrap(cw), VBROADCAST(pw)))));
>> +
>> +         if (val < 4)
>> +            STORE(dist, vtxOutput, {0, 0, VERTEX_CLIPCULL_DIST_LO_SLOT, val});
>> +         else
>> +            STORE(dist, vtxOutput, {0, 0, VERTEX_CLIPCULL_DIST_HI_SLOT, val - 4});
>> +      }
>> +   }
>> +
>>    RET_VOID();
>> 
>>    gallivm_verify_function(gallivm, wrap(pFunction));
>> diff --git a/src/gallium/drivers/swr/swr_shader.h b/src/gallium/drivers/swr/swr_shader.h
>> index 11d50c3..6dfc8fa 100644
>> --- a/src/gallium/drivers/swr/swr_shader.h
>> +++ b/src/gallium/drivers/swr/swr_shader.h
>> @@ -57,6 +57,7 @@ struct swr_jit_fs_key : swr_jit_sampler_key {
>> };
>> 
>> struct swr_jit_vs_key : swr_jit_sampler_key {
>> +   unsigned clip_plane_mask; // from rasterizer state & vs_info
> 
> So what I was saying earlier, is you're going to end up regenerating a
> fresh VS anytime this changes. You could, instead, make this key be
> 
> unsigned num_ucps;
> 
> and have it always be 0 when a clip/cull distance is written. Since
> you have a rastState->clip/cullDistanceMask, that should work out, no?
> You'll end up with shaders that do a tiny bit of extra work in those
> cases, but less shader recompilation. (I guess since your shtick is
> that you run vertex shaders over TB's of data, the way you have it now
> could work out better actually...)
> 
>> };
>> 
>> namespace std
>> diff --git a/src/gallium/drivers/swr/swr_state.cpp b/src/gallium/drivers/swr/swr_state.cpp
>> index f9326f3..7732322 100644
>> --- a/src/gallium/drivers/swr/swr_state.cpp
>> +++ b/src/gallium/drivers/swr/swr_state.cpp
>> @@ -849,7 +849,9 @@ swr_update_derived(struct pipe_context *pipe,
>>    }
>> 
>>    /* Raster state */
>> -   if (ctx->dirty & (SWR_NEW_RASTERIZER | SWR_NEW_FRAMEBUFFER)) {
>> +   if (ctx->dirty & (SWR_NEW_RASTERIZER |
>> +                     SWR_NEW_VS | // clipping
>> +                     SWR_NEW_FRAMEBUFFER)) {
>>       pipe_rasterizer_state *rasterizer = ctx->rasterizer;
>>       pipe_framebuffer_state *fb = &ctx->framebuffer;
>> 
>> @@ -906,6 +908,13 @@ swr_update_derived(struct pipe_context *pipe,
>> 
>>       rastState->depthClipEnable = rasterizer->depth_clip;
>> 
>> +      rastState->clipDistanceMask =
>> +         ctx->vs->info.base.num_written_clipdistance ?
>> +         ctx->vs->info.base.clipdist_writemask & rasterizer->clip_plane_enable :
>> +         rasterizer->clip_plane_enable;
>> +
>> +      rastState->cullDistanceMask = ctx->vs->info.base.culldist_writemask;
>> +
>>       SwrSetRastState(ctx->swrContext, rastState);
>>    }
>> 
>> @@ -1067,6 +1076,7 @@ swr_update_derived(struct pipe_context *pipe,
>> 
>>    /* VertexShader */
>>    if (ctx->dirty & (SWR_NEW_VS |
>> +                     SWR_NEW_RASTERIZER | // for clip planes
>>                      SWR_NEW_SAMPLER |
>>                      SWR_NEW_SAMPLER_VIEW |
>>                      SWR_NEW_FRAMEBUFFER)) {
>> @@ -1341,6 +1351,18 @@ swr_update_derived(struct pipe_context *pipe,
>>       }
>>    }
>> 
>> +   if (ctx->dirty & SWR_NEW_CLIP) {
>> +      // shader exporting clip distances overrides all user clip planes
>> +      if (ctx->rasterizer->clip_plane_enable &&
>> +          !ctx->vs->info.base.num_written_clipdistance)
>> +      {
>> +         swr_draw_context *pDC = &ctx->swrDC;
>> +         memcpy(pDC->userClipPlanes,
>> +                ctx->clip.ucp,
>> +                sizeof(pDC->userClipPlanes));
>> +      }
>> +   }
>> +
>>    uint32_t linkage = ctx->vs->linkageMask;
>>    if (ctx->rasterizer->sprite_coord_enable)
>>       linkage |= (1 << ctx->vs->info.base.num_outputs);
>> --
>> 1.9.1
>> 
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev



More information about the mesa-dev mailing list