[Mesa-dev] [PATCH 6/6] i965 Gen6+: De-compact clip planes.
Ian Romanick
idr at freedesktop.org
Tue Oct 4 10:54:23 PDT 2011
On 10/03/2011 03:11 PM, Paul Berry wrote:
> Previously, if the user enabled a non-consecutive set of clip planes
> (e.g. 0, 1, and 3), the driver would compact them down to a
> consecutive set starting at 0. This optimization was of dubious
> value, and complicated the implementation of gl_ClipDistance.
>
> This patch changes the driver so that with Gen6 and later chipsets, we
> no longer compact the clip planes. However, we still discard any clip
> planes beyond the highest number that is in use, so performance should
> not be affected for applications that use clip planes consecutively
> from 0.
>
> With chipsets previous to Gen6, we still compact the clip planes,
> since the pre-Gen6 clipper thread relies on this behavior.
Unless we know of some specific cases where a non-consecutive set of
clip planes is used and performance would be adversely affected by this
change, I'm in favor of removing this optimization.
The code gets a bit ugly due to treating GEN6 and GEN4/5 differently.
How much work would it be fix the clipping on those chipsets to not need
the compaction? (Based on previous discussions with Eric about the
clipper, I suspect the answer is, "A lot.")
> ---
> src/mesa/drivers/dri/i965/brw_state.h | 5 ---
> src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 36 ++++++++++++++++--------
> src/mesa/drivers/dri/i965/brw_vs.c | 17 ++++++++---
> src/mesa/drivers/dri/i965/brw_vs.h | 15 ++++++----
> src/mesa/drivers/dri/i965/brw_vs_emit.c | 14 ++++----
> src/mesa/drivers/dri/i965/gen6_clip_state.c | 28 +-----------------
> src/mesa/drivers/dri/i965/gen7_clip_state.c | 9 +----
> 7 files changed, 56 insertions(+), 68 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
> index fabf0c0..6fc95eb 100644
> --- a/src/mesa/drivers/dri/i965/brw_state.h
> +++ b/src/mesa/drivers/dri/i965/brw_state.h
> @@ -213,9 +213,4 @@ get_attr_override(struct brw_vue_map *vue_map, int urb_entry_read_offset,
> unsigned int
> gen7_depth_format(struct brw_context *brw);
>
> -/* gen6_clip_state.c */
> -uint32_t
> -brw_compute_userclip_flags(bool uses_clip_distance,
> - GLbitfield clip_planes_enabled);
> -
> #endif
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> index 3c2f9ba..a117c37 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> @@ -559,18 +559,29 @@ vec4_visitor::setup_uniform_clipplane_values()
> {
> GLclipplane *clip_planes = brw_select_clip_planes(ctx);
>
> + /* Pre-Gen6, we compact clip planes. For example, if the user
> + * enables just clip planes 0, 1, and 3, we will enable clip planes
> + * 0, 1, and 2 in the hardware, and we'll move clip plane 3 to clip
> + * plane 2. This simplifies the implementation of the Gen6 clip
> + * thread.
> + *
> + * In Gen6 and later, we don't compact clip planes, because this
> + * simplifies the implementation of gl_ClipDistance.
> + */
> int compacted_clipplane_index = 0;
> - for (int i = 0; i< MAX_CLIP_PLANES; ++i) {
> - if (c->key.userclip_planes_enabled& (1<< i)) {
> - this->uniform_vector_size[this->uniforms] = 4;
> - this->userplane[compacted_clipplane_index] = dst_reg(UNIFORM, this->uniforms);
> - this->userplane[compacted_clipplane_index].type = BRW_REGISTER_TYPE_F;
> - for (int j = 0; j< 4; ++j) {
> - c->prog_data.param[this->uniforms * 4 + j] =&clip_planes[i][j];
> - }
> - ++compacted_clipplane_index;
> - ++this->uniforms;
> + for (int i = 0; i< c->key.nr_userclip_plane_consts; ++i) {
> + if (intel->gen< 6&&
> + !(c->key.userclip_planes_enabled_gen_4_5& (1<< i))) {
> + continue;
> + }
> + this->uniform_vector_size[this->uniforms] = 4;
> + this->userplane[compacted_clipplane_index] = dst_reg(UNIFORM, this->uniforms);
> + this->userplane[compacted_clipplane_index].type = BRW_REGISTER_TYPE_F;
> + for (int j = 0; j< 4; ++j) {
> + c->prog_data.param[this->uniforms * 4 + j] =&clip_planes[i][j];
> }
> + ++compacted_clipplane_index;
> + ++this->uniforms;
> }
> }
>
> @@ -1807,7 +1818,7 @@ vec4_visitor::emit_psiz_and_flags(struct brw_reg reg)
> }
>
> current_annotation = "Clipping flags";
> - for (i = 0; i< c->key.nr_userclip_planes; i++) {
> + for (i = 0; i< c->key.nr_userclip_plane_consts; i++) {
> vec4_instruction *inst;
>
> inst = emit(DP4(dst_null_f(), src_reg(output_reg[VERT_RESULT_HPOS]),
> @@ -1883,7 +1894,8 @@ vec4_visitor::emit_clip_distances(struct brw_reg reg, int offset)
> clip_vertex = VERT_RESULT_HPOS;
> }
>
> - for (int i = 0; i + offset< c->key.nr_userclip_planes&& i< 4; ++i) {
> + for (int i = 0; i + offset< c->key.nr_userclip_plane_consts&& i< 4;
> + ++i) {
> emit(DP4(dst_reg(brw_writemask(reg, 1<< i)),
> src_reg(output_reg[clip_vertex]),
> src_reg(this->userplane[i + offset])));
> diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
> index 19d9552..7e7a230 100644
> --- a/src/mesa/drivers/dri/i965/brw_vs.c
> +++ b/src/mesa/drivers/dri/i965/brw_vs.c
> @@ -276,7 +276,8 @@ do_vs_prog(struct brw_context *brw,
>
> static void brw_upload_vs_prog(struct brw_context *brw)
> {
> - struct gl_context *ctx =&brw->intel.ctx;
> + struct intel_context *intel =&brw->intel;
> + struct gl_context *ctx =&intel->ctx;
> struct brw_vs_prog_key key;
> struct brw_vertex_program *vp =
> (struct brw_vertex_program *)brw->vertex_program;
> @@ -290,10 +291,16 @@ static void brw_upload_vs_prog(struct brw_context *brw)
> key.program_string_id = vp->id;
> key.userclip_active = (ctx->Transform.ClipPlanesEnabled != 0);
> key.uses_clip_distance = vp->program.UsesClipDistance;
> - if (!key.uses_clip_distance) {
> - key.userclip_planes_enabled = ctx->Transform.ClipPlanesEnabled;
> - key.nr_userclip_planes
> - = brw_count_bits(ctx->Transform.ClipPlanesEnabled);
> + if (key.userclip_active&& !key.uses_clip_distance) {
> + if (intel->gen< 6) {
> + key.nr_userclip_plane_consts
> + = brw_count_bits(ctx->Transform.ClipPlanesEnabled);
> + key.userclip_planes_enabled_gen_4_5
> + = ctx->Transform.ClipPlanesEnabled;
> + } else {
> + key.nr_userclip_plane_consts
> + = _mesa_logbase2(ctx->Transform.ClipPlanesEnabled) + 1;
> + }
> }
> key.copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL ||
> ctx->Polygon.BackMode != GL_FILL);
> diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h
> index b8d11df..85a1d82 100644
> --- a/src/mesa/drivers/dri/i965/brw_vs.h
> +++ b/src/mesa/drivers/dri/i965/brw_vs.h
> @@ -53,10 +53,10 @@ struct brw_vs_prog_key {
> GLuint userclip_active:1;
>
> /**
> - * Number of user clip planes active. Zero if the shader uses
> - * gl_ClipDistance.
> + * How many user clipping planes are being uploaded to the vertex shader as
> + * push constants.
> */
> - GLuint nr_userclip_planes:4;
> + GLuint nr_userclip_plane_consts:4;
>
> /**
> * True if the shader uses gl_ClipDistance, regardless of whether any clip
> @@ -65,10 +65,13 @@ struct brw_vs_prog_key {
> GLuint uses_clip_distance:1;
>
> /**
> - * Which user clip planes are active. Zero if the shader uses
> - * gl_ClipDistance.
> + * For pre-Gen6 hardware, a bitfield indicating which clipping planes are
> + * enabled. This is used to compact clip planes.
> + *
> + * For Gen6 and later hardware, clip planes are not compacted, so this
> + * value is zero to avoid provoking unnecessary shader recompiles.
> */
> - GLuint userclip_planes_enabled:MAX_CLIP_PLANES;
> + GLuint userclip_planes_enabled_gen_4_5:MAX_CLIP_PLANES;
>
> GLuint copy_edgeflag:1;
> GLuint point_coord_replace:8;
> diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
> index 8845580..7326b3a 100644
> --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
> +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
> @@ -204,17 +204,17 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
> */
> if (c->key.userclip_active) {
> if (intel->gen>= 6) {
> - for (i = 0; i< c->key.nr_userclip_planes; i++) {
> + for (i = 0; i<= c->key.nr_userclip_plane_consts; i++) {
> c->userplane[i] = stride(brw_vec4_grf(reg + i / 2,
> (i % 2) * 4), 0, 4, 1);
> }
> - reg += ALIGN(c->key.nr_userclip_planes, 2) / 2;
> + reg += ALIGN(c->key.nr_userclip_plane_consts, 2) / 2;
> } else {
> - for (i = 0; i< c->key.nr_userclip_planes; i++) {
> + for (i = 0; i< c->key.nr_userclip_plane_consts; i++) {
> c->userplane[i] = stride(brw_vec4_grf(reg + (6 + i) / 2,
> (i % 2) * 4), 0, 4, 1);
> }
> - reg += (ALIGN(6 + c->key.nr_userclip_planes, 4) / 4) * 2;
> + reg += (ALIGN(6 + c->key.nr_userclip_plane_consts, 4) / 4) * 2;
> }
>
> }
> @@ -239,7 +239,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
> */
> if (intel->gen>= 6) {
> /* We can only load 32 regs of push constants. */
> - max_constant = 32 * 2 - c->key.nr_userclip_planes;
> + max_constant = 32 * 2 - c->key.nr_userclip_plane_consts;
> } else {
> max_constant = BRW_MAX_GRF - 20 - c->vp->program.Base.NumTemporaries;
> }
> @@ -1565,7 +1565,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
>
> /* Set the user clip distances in dword 8-15. (m3-4)*/
> if (c->key.userclip_active) {
> - for (i = 0; i< c->key.nr_userclip_planes; i++) {
> + for (i = 0; i< c->key.nr_userclip_plane_consts; i++) {
> struct brw_reg m;
> if (i< 4)
> m = brw_message_reg(3);
> @@ -1593,7 +1593,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
> header1, brw_imm_ud(0x7ff<<8));
> }
>
> - for (i = 0; i< c->key.nr_userclip_planes; i++) {
> + for (i = 0; i< c->key.nr_userclip_plane_consts; i++) {
> brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
> brw_DP4(p, brw_null_reg(), pos, c->userplane[i]);
> brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<i));
> diff --git a/src/mesa/drivers/dri/i965/gen6_clip_state.c b/src/mesa/drivers/dri/i965/gen6_clip_state.c
> index ffe2c53..9b36af4 100644
> --- a/src/mesa/drivers/dri/i965/gen6_clip_state.c
> +++ b/src/mesa/drivers/dri/i965/gen6_clip_state.c
> @@ -31,25 +31,6 @@
> #include "brw_util.h"
> #include "intel_batchbuffer.h"
>
> -uint32_t
> -brw_compute_userclip_flags(bool uses_clip_distance,
> - GLbitfield clip_planes_enabled)
> -{
> - if (uses_clip_distance) {
> - /* When using gl_ClipDistance, it is up to the shader to decide which
> - * clip distance values to use.
> - */
> - return clip_planes_enabled;
> - } else {
> - /* When using clipping planes, we compact the ones that are in use so
> - * that they are always numbered consecutively from zero, so we need to
> - * enable clipping planes 0 through n-1 in the hardware regardless of
> - * which planes the user has selected.
> - */
> - return (1<< brw_count_bits(clip_planes_enabled)) - 1;
> - }
> -}
> -
> static void
> upload_clip_state(struct brw_context *brw)
> {
> @@ -58,10 +39,6 @@ upload_clip_state(struct brw_context *brw)
> uint32_t depth_clamp = 0;
> uint32_t provoking, userclip;
>
> - /* BRW_NEW_VERTEX_PROGRAM */
> - struct brw_vertex_program *vp =
> - (struct brw_vertex_program *)brw->vertex_program;
> -
> if (!ctx->Transform.DepthClamp)
> depth_clamp = GEN6_CLIP_Z_TEST;
>
> @@ -79,8 +56,7 @@ upload_clip_state(struct brw_context *brw)
> }
>
> /* _NEW_TRANSFORM */
> - userclip = brw_compute_userclip_flags(vp->program.UsesClipDistance,
> - ctx->Transform.ClipPlanesEnabled);
> + userclip = ctx->Transform.ClipPlanesEnabled;
>
> BEGIN_BATCH(4);
> OUT_BATCH(_3DSTATE_CLIP<< 16 | (4 - 2));
> @@ -101,7 +77,7 @@ upload_clip_state(struct brw_context *brw)
> const struct brw_tracked_state gen6_clip_state = {
> .dirty = {
> .mesa = _NEW_TRANSFORM | _NEW_LIGHT,
> - .brw = BRW_NEW_CONTEXT | BRW_NEW_VERTEX_PROGRAM,
> + .brw = BRW_NEW_CONTEXT,
> .cache = 0
> },
> .emit = upload_clip_state,
> diff --git a/src/mesa/drivers/dri/i965/gen7_clip_state.c b/src/mesa/drivers/dri/i965/gen7_clip_state.c
> index 5458ce8..2f512f5 100644
> --- a/src/mesa/drivers/dri/i965/gen7_clip_state.c
> +++ b/src/mesa/drivers/dri/i965/gen7_clip_state.c
> @@ -39,10 +39,6 @@ upload_clip_state(struct brw_context *brw)
> /* _NEW_BUFFERS */
> GLboolean render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
>
> - /* BRW_NEW_VERTEX_PROGRAM */
> - struct brw_vertex_program *vp =
> - (struct brw_vertex_program *)brw->vertex_program;
> -
> dw1 |= GEN7_CLIP_EARLY_CULL;
>
> /* _NEW_POLYGON */
> @@ -86,8 +82,7 @@ upload_clip_state(struct brw_context *brw)
> }
>
> /* _NEW_TRANSFORM */
> - userclip = brw_compute_userclip_flags(vp->program.UsesClipDistance,
> - ctx->Transform.ClipPlanesEnabled);
> + userclip = ctx->Transform.ClipPlanesEnabled;
>
> BEGIN_BATCH(4);
> OUT_BATCH(_3DSTATE_CLIP<< 16 | (4 - 2));
> @@ -111,7 +106,7 @@ const struct brw_tracked_state gen7_clip_state = {
> _NEW_POLYGON |
> _NEW_LIGHT |
> _NEW_TRANSFORM),
> - .brw = BRW_NEW_CONTEXT | BRW_NEW_VERTEX_PROGRAM,
> + .brw = BRW_NEW_CONTEXT,
> .cache = 0
> },
> .emit = upload_clip_state,
More information about the mesa-dev
mailing list