[Mesa-dev] [PATCH v3] swr: implement clipPlanes/clipVertex/clipDistance/cullDistance

Rowley, Timothy O timothy.o.rowley at intel.com
Thu May 26 13:08:26 UTC 2016


> On May 25, 2016, at 9:16 PM, Ilia Mirkin <imirkin at alum.mit.edu> wrote:
> 
> On Wed, May 25, 2016 at 10:03 PM, Tim Rowley <timothy.o.rowley at intel.com> wrote:
>> v2: only load the clip vertex once
>> 
>> v3: fix clip enable logic, add cullDistance
>> ---
>> docs/GL3.txt                           |  2 +-
>> src/gallium/drivers/swr/swr_context.h  |  2 ++
>> src/gallium/drivers/swr/swr_screen.cpp |  3 +-
>> src/gallium/drivers/swr/swr_shader.cpp | 63 ++++++++++++++++++++++++++++++++++
>> src/gallium/drivers/swr/swr_shader.h   |  4 +++
>> src/gallium/drivers/swr/swr_state.cpp  | 24 ++++++++++++-
>> 6 files changed, 95 insertions(+), 3 deletions(-)
>> 
>> diff --git a/docs/GL3.txt b/docs/GL3.txt
>> index 555a9be..5965f25 100644
>> --- a/docs/GL3.txt
>> +++ b/docs/GL3.txt
>> @@ -211,7 +211,7 @@ GL 4.5, GLSL 4.50:
>>   GL_ARB_ES3_1_compatibility                            DONE (nvc0, radeonsi)
>>   GL_ARB_clip_control                                   DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe, swr)
>>   GL_ARB_conditional_render_inverted                    DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe, swr)
>> -  GL_ARB_cull_distance                                  DONE (i965, nv50, nvc0, llvmpipe, softpipe)
>> +  GL_ARB_cull_distance                                  DONE (i965, nv50, nvc0, llvmpipe, softpipe, swr)
>>   GL_ARB_derivative_control                             DONE (i965, nv50, nvc0, r600, radeonsi)
>>   GL_ARB_direct_state_access                            DONE (all drivers)
>>   GL_ARB_get_texture_sub_image                          DONE (all drivers)
>> diff --git a/src/gallium/drivers/swr/swr_context.h b/src/gallium/drivers/swr/swr_context.h
>> index a7383bb..75ecae3 100644
>> --- a/src/gallium/drivers/swr/swr_context.h
>> +++ b/src/gallium/drivers/swr/swr_context.h
>> @@ -89,6 +89,8 @@ struct swr_draw_context {
>>    swr_jit_texture texturesFS[PIPE_MAX_SHADER_SAMPLER_VIEWS];
>>    swr_jit_sampler samplersFS[PIPE_MAX_SAMPLERS];
>> 
>> +   float userClipPlanes[PIPE_MAX_CLIP_PLANES][4];
>> +
>>    SWR_SURFACE_STATE renderTargets[SWR_NUM_ATTACHMENTS];
>> };
>> 
>> diff --git a/src/gallium/drivers/swr/swr_screen.cpp b/src/gallium/drivers/swr/swr_screen.cpp
>> index 0772274..7851346 100644
>> --- a/src/gallium/drivers/swr/swr_screen.cpp
>> +++ b/src/gallium/drivers/swr/swr_screen.cpp
>> @@ -333,6 +333,8 @@ swr_get_param(struct pipe_screen *screen, enum pipe_cap param)
>>    case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
>>    case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:
>>       return 1;
>> +   case PIPE_CAP_CULL_DISTANCE:
>> +      return 1;
>>    case PIPE_CAP_TGSI_TXQS:
>>    case PIPE_CAP_FORCE_PERSAMPLE_INTERP:
>>    case PIPE_CAP_SHAREABLE_SHADERS:
>> @@ -358,7 +360,6 @@ swr_get_param(struct pipe_screen *screen, enum pipe_cap param)
>>    case PIPE_CAP_PCI_DEVICE:
>>    case PIPE_CAP_PCI_FUNCTION:
>>    case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:
>> -   case PIPE_CAP_CULL_DISTANCE:
>>    case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES:
>>       return 0;
>>    }
>> diff --git a/src/gallium/drivers/swr/swr_shader.cpp b/src/gallium/drivers/swr/swr_shader.cpp
>> index f693f51..25ea7ae 100644
>> --- a/src/gallium/drivers/swr/swr_shader.cpp
>> +++ b/src/gallium/drivers/swr/swr_shader.cpp
>> @@ -40,6 +40,9 @@
>> #include "swr_state.h"
>> #include "swr_screen.h"
>> 
>> +static unsigned
>> +locate_linkage(ubyte name, ubyte index, struct tgsi_shader_info *info);
>> +
>> bool operator==(const swr_jit_fs_key &lhs, const swr_jit_fs_key &rhs)
>> {
>>    return !memcmp(&lhs, &rhs, sizeof(lhs));
>> @@ -120,6 +123,11 @@ swr_generate_vs_key(struct swr_jit_vs_key &key,
>> {
>>    memset(&key, 0, sizeof(key));
>> 
>> +   key.clip_plane_mask = ctx->rasterizer->clip_plane_enable;
>> +   key.clip_distance_mask = swr_vs->info.base.clipdist_writemask;
>> +   key.cull_distance_mask = swr_vs->info.base.culldist_writemask;
>> +   key.writes_clipvertex = swr_vs->info.base.writes_clipvertex;
>> +
>>    swr_generate_sampler_key(swr_vs->info, ctx, PIPE_SHADER_VERTEX, key);
>> }
>> 
>> @@ -252,6 +260,61 @@ BuilderSWR::CompileVS(struct swr_context *ctx, swr_jit_vs_key &key)
>>       }
>>    }
>> 
>> +   if (ctx->rasterizer->clip_plane_enable) {
> 
> I think you want if (ctx->rasterizer->clip_plane_enable &&
> (swr_vs->info.base.clipdist_writemask |
> swr_vs->info.base.culldist_writemask) == 0)
> 
> Note that for culling, clip_plane_enable won't be set. That's only for
> clip planes and cull distances.
> 

I think the test actually needs to be "if (ctx->rasterizer->clip_plane_enable || swr_vs->info.base.culldist_writemask)” since I need to do the output rewiring for clip and cull.

>> +      unsigned clip_mask = ctx->rasterizer->clip_plane_enable;
>> +
>> +      unsigned cv;
>> +      if (swr_vs->info.base.writes_clipvertex) {
>> +         cv = 1 + locate_linkage(TGSI_SEMANTIC_CLIPVERTEX, 0,
>> +                                 &swr_vs->info.base);
>> +      } else {
>> +         for (int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
>> +            if (swr_vs->info.base.output_semantic_name[i] == TGSI_SEMANTIC_POSITION &&
>> +                swr_vs->info.base.output_semantic_index[i] == 0) {
>> +               cv = i;
>> +               break;
>> +            }
>> +         }
>> +      }
>> +      LLVMValueRef cx = LLVMBuildLoad(gallivm->builder, outputs[cv][0], "");
>> +      LLVMValueRef cy = LLVMBuildLoad(gallivm->builder, outputs[cv][1], "");
>> +      LLVMValueRef cz = LLVMBuildLoad(gallivm->builder, outputs[cv][2], "");
>> +      LLVMValueRef cw = LLVMBuildLoad(gallivm->builder, outputs[cv][3], "");
>> +
>> +      for (unsigned val = 0; val < PIPE_MAX_CLIP_PLANES; val++) {
>> +         // clip distance overrides user clip planes
> 
> So hard that if any clip/cull distances are set, all the UCP stuff
> goes away. You want to make those writes just go directly to
> VERTEX_CLIPCULL_DIST_LO_SLOT/HI_SLOT. And then do these fixups for
> when clip/cull distances aren't explicitly written but clip planes are
> enabled.

Unfortunately it seems that lp_build_tgsi_soa doesn’t allow control of the emit (it creates the lp_build_tgsi_soa_context internally), so I have to do the fixup and then rely on llvm optimization passes to remove what essentially is an extra spill/fill.

> Also note that this is going to cause you a recompile on every
> rasterizer change. Not sure if this is OK or not, but just wanted to
> point it out... ideally you'd only do the recompile when something
> relevant changed, like the clip plane mask. Or what we do in nouveau,
> which is we emit code for up to N planes, and then as long as the
> "max" number of planes doesn't go up, we don't recompile. Not
> super-familiar with the rest of your infrastructure though, perhaps
> this is difficult/undesirable/whatever.

Unless you’ve spotted some error in our jit caching logic, we only recompile when the JIT key changes (i.e., the clip plane mask or sampler information in the case of vertex shaders).  I did notice some duplicate state in the vs jit key that I’m removing in an upcoming patch revision.

-Tim

>  -ilia
> 
>> +         if ((swr_vs->info.base.clipdist_writemask & clip_mask & (1 << val)) ||
>> +             (swr_vs->info.base.culldist_writemask & (1 << val))) {
>> +            unsigned cv = 1 + locate_linkage(TGSI_SEMANTIC_CLIPDIST, val,
>> +                                             &swr_vs->info.base);
>> +            LLVMValueRef dist = LLVMBuildLoad(gallivm->builder, outputs[cv][0], "");
>> +
>> +            if (val < 4)
>> +               STORE(unwrap(dist), vtxOutput, {0, 0, VERTEX_CLIPCULL_DIST_LO_SLOT, val});
>> +            else
>> +               STORE(unwrap(dist), vtxOutput, {0, 0, VERTEX_CLIPCULL_DIST_HI_SLOT, val - 4});
>> +            continue;
>> +         }
>> +
>> +         if (!(clip_mask & (1 << val)))
>> +            continue;
>> +
>> +         Value *px = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 0}));
>> +         Value *py = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 1}));
>> +         Value *pz = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 2}));
>> +         Value *pw = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 3}));
>> +         Value *dist = FADD(FMUL(unwrap(cx), VBROADCAST(px)),
>> +                            FADD(FMUL(unwrap(cy), VBROADCAST(py)),
>> +                                 FADD(FMUL(unwrap(cz), VBROADCAST(pz)),
>> +                                      FMUL(unwrap(cw), VBROADCAST(pw)))));
>> +
>> +         if (val < 4)
>> +            STORE(dist, vtxOutput, {0, 0, VERTEX_CLIPCULL_DIST_LO_SLOT, val});
>> +         else
>> +            STORE(dist, vtxOutput, {0, 0, VERTEX_CLIPCULL_DIST_HI_SLOT, val - 4});
>> +      }
>> +   }
>> +
>>    RET_VOID();
>> 
>>    gallivm_verify_function(gallivm, wrap(pFunction));
>> diff --git a/src/gallium/drivers/swr/swr_shader.h b/src/gallium/drivers/swr/swr_shader.h
>> index 11d50c3..5f5b232 100644
>> --- a/src/gallium/drivers/swr/swr_shader.h
>> +++ b/src/gallium/drivers/swr/swr_shader.h
>> @@ -57,6 +57,10 @@ struct swr_jit_fs_key : swr_jit_sampler_key {
>> };
>> 
>> struct swr_jit_vs_key : swr_jit_sampler_key {
>> +   unsigned clip_plane_mask; // from rasterizer state
>> +   unsigned clip_distance_mask; // from vs outputs (direct)
>> +   unsigned cull_distance_mask; // from vs outputs (direct)
>> +   bool writes_clipvertex; // from vs_info
>> };
>> 
>> namespace std
>> diff --git a/src/gallium/drivers/swr/swr_state.cpp b/src/gallium/drivers/swr/swr_state.cpp
>> index f9326f3..7732322 100644
>> --- a/src/gallium/drivers/swr/swr_state.cpp
>> +++ b/src/gallium/drivers/swr/swr_state.cpp
>> @@ -849,7 +849,9 @@ swr_update_derived(struct pipe_context *pipe,
>>    }
>> 
>>    /* Raster state */
>> -   if (ctx->dirty & (SWR_NEW_RASTERIZER | SWR_NEW_FRAMEBUFFER)) {
>> +   if (ctx->dirty & (SWR_NEW_RASTERIZER |
>> +                     SWR_NEW_VS | // clipping
>> +                     SWR_NEW_FRAMEBUFFER)) {
>>       pipe_rasterizer_state *rasterizer = ctx->rasterizer;
>>       pipe_framebuffer_state *fb = &ctx->framebuffer;
>> 
>> @@ -906,6 +908,13 @@ swr_update_derived(struct pipe_context *pipe,
>> 
>>       rastState->depthClipEnable = rasterizer->depth_clip;
>> 
>> +      rastState->clipDistanceMask =
>> +         ctx->vs->info.base.num_written_clipdistance ?
>> +         ctx->vs->info.base.clipdist_writemask & rasterizer->clip_plane_enable :
>> +         rasterizer->clip_plane_enable;
>> +
>> +      rastState->cullDistanceMask = ctx->vs->info.base.culldist_writemask;
>> +
>>       SwrSetRastState(ctx->swrContext, rastState);
>>    }
>> 
>> @@ -1067,6 +1076,7 @@ swr_update_derived(struct pipe_context *pipe,
>> 
>>    /* VertexShader */
>>    if (ctx->dirty & (SWR_NEW_VS |
>> +                     SWR_NEW_RASTERIZER | // for clip planes
>>                      SWR_NEW_SAMPLER |
>>                      SWR_NEW_SAMPLER_VIEW |
>>                      SWR_NEW_FRAMEBUFFER)) {
>> @@ -1341,6 +1351,18 @@ swr_update_derived(struct pipe_context *pipe,
>>       }
>>    }
>> 
>> +   if (ctx->dirty & SWR_NEW_CLIP) {
>> +      // shader exporting clip distances overrides all user clip planes
>> +      if (ctx->rasterizer->clip_plane_enable &&
>> +          !ctx->vs->info.base.num_written_clipdistance)
>> +      {
>> +         swr_draw_context *pDC = &ctx->swrDC;
>> +         memcpy(pDC->userClipPlanes,
>> +                ctx->clip.ucp,
>> +                sizeof(pDC->userClipPlanes));
>> +      }
>> +   }
>> +
>>    uint32_t linkage = ctx->vs->linkageMask;
>>    if (ctx->rasterizer->sprite_coord_enable)
>>       linkage |= (1 << ctx->vs->info.base.num_outputs);
>> --
>> 1.9.1
>> 
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev



More information about the mesa-dev mailing list