[Intel-gfx] [Mesa-dev] [PATCH 3/5] intel gen4-5: Correctly setup the parameters in the sf.
Paul Berry
stereotype441 at gmail.com
Tue Jul 17 15:07:23 CEST 2012
On 17 July 2012 05:50, Paul Berry <stereotype441 at gmail.com> wrote:
> On 30 June 2012 11:50, Olivier Galibert <galibert at pobox.com> wrote:
>
>> This patch also correct a couple of problems with noperspective
>> interpolation.
>>
>> At that point all the glsl 1.1/1.3 interpolation tests that do not
>> clip pass (the -none ones).
>>
>> The fs code does not use the pre-resolved interpolation modes in order
>> not to mess with gen6+. Sharing the resolution would require putting
>> brw_wm_prog before brw_clip_prog and brw_sf_prog. This may be a good
>> thing, but it could have unexpected consequences, so it's better be
>> done independently in any case.
>>
>> Signed-off-by: Olivier Galibert <galibert at pobox.com>
>> ---
>> src/mesa/drivers/dri/i965/brw_fs.cpp | 2 +-
>> src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 5 +
>> src/mesa/drivers/dri/i965/brw_sf.c | 9 +-
>> src/mesa/drivers/dri/i965/brw_sf.h | 2 +-
>> src/mesa/drivers/dri/i965/brw_sf_emit.c | 164
>> +++++++++++++-------------
>> 5 files changed, 95 insertions(+), 87 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp
>> b/src/mesa/drivers/dri/i965/brw_fs.cpp
>> index 710f2ff..b142f2b 100644
>> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
>> @@ -506,7 +506,7 @@ fs_visitor::emit_general_interpolation(ir_variable
>> *ir)
>> struct brw_reg interp = interp_reg(location, k);
>> emit_linterp(attr, fs_reg(interp), interpolation_mode,
>> ir->centroid);
>> - if (intel->gen < 6) {
>> + if (intel->gen < 6 && interpolation_mode ==
>> INTERP_QUALIFIER_SMOOTH) {
>> emit(BRW_OPCODE_MUL, attr, attr, this->pixel_w);
>> }
>> }
>> diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
>> b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
>> index 9bd1e67..ab83a95 100644
>> --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
>> @@ -1924,6 +1924,11 @@ fs_visitor::emit_interpolation_setup_gen4()
>> emit(BRW_OPCODE_ADD,
>> this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
>> this->pixel_y, fs_reg(negate(brw_vec1_grf(1, 1))));
>>
>> + this->delta_x[BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC] =
>> + this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC];
>> + this->delta_y[BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC] =
>> + this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC];
>> +
>>
>
> Can we add a comment explaining why this is correct? Something like this
> maybe:
>
> "On Gen4-5, we accomplish perspective-correct interpolation by dividing
> the attribute values by w in the vertex shader, interpolating the result
> linearly in screen space, and then multiplying by w in the fragment
> shader. So the interpolation step is always linear in screen space,
> regardless of whether the attribute is perspective or non-perspective.
> Accordingly, we use the same delta_x and delta_y values for both kinds of
> interpolation."
>
Whoops, my memory was faulty. This should say "...in the sf thread...",
not "...in the vertex shader...".
>
>
>> this->current_annotation = "compute pos.w and 1/pos.w";
>> /* Compute wpos.w. It's always in our setup, since it's needed to
>> * interpolate the other attributes.
>> diff --git a/src/mesa/drivers/dri/i965/brw_sf.c
>> b/src/mesa/drivers/dri/i965/brw_sf.c
>> index 0cc4fc7..85f5f51 100644
>> --- a/src/mesa/drivers/dri/i965/brw_sf.c
>> +++ b/src/mesa/drivers/dri/i965/brw_sf.c
>> @@ -139,6 +139,7 @@ brw_upload_sf_prog(struct brw_context *brw)
>> struct brw_sf_prog_key key;
>> /* _NEW_BUFFERS */
>> bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
>> + int i;
>>
>> memset(&key, 0, sizeof(key));
>>
>> @@ -191,7 +192,13 @@ brw_upload_sf_prog(struct brw_context *brw)
>> key.sprite_origin_lower_left = true;
>>
>> /* _NEW_LIGHT */
>> - key.do_flat_shading = (ctx->Light.ShadeModel == GL_FLAT);
>> + key.has_flat_shading = 0;
>> + for (i = 0; i < BRW_VERT_RESULT_MAX; i++) {
>> + if (brw_get_interpolation_mode(brw, i) == INTERP_QUALIFIER_FLAT) {
>> + key.has_flat_shading = 1;
>> + break;
>> + }
>> + }
>> key.do_twoside_color = (ctx->Light.Enabled &&
>> ctx->Light.Model.TwoSide) ||
>> ctx->VertexProgram._TwoSideEnabled;
>> brw_copy_interpolation_modes(brw, key.interpolation_mode);
>> diff --git a/src/mesa/drivers/dri/i965/brw_sf.h
>> b/src/mesa/drivers/dri/i965/brw_sf.h
>> index 0a8135c..c718072 100644
>> --- a/src/mesa/drivers/dri/i965/brw_sf.h
>> +++ b/src/mesa/drivers/dri/i965/brw_sf.h
>> @@ -50,7 +50,7 @@ struct brw_sf_prog_key {
>> uint8_t point_sprite_coord_replace;
>> GLuint primitive:2;
>> GLuint do_twoside_color:1;
>> - GLuint do_flat_shading:1;
>> + GLuint has_flat_shading:1;
>> GLuint frontface_ccw:1;
>> GLuint do_point_sprite:1;
>> GLuint do_point_coord:1;
>> diff --git a/src/mesa/drivers/dri/i965/brw_sf_emit.c
>> b/src/mesa/drivers/dri/i965/brw_sf_emit.c
>> index 9d8aa38..387685a 100644
>> --- a/src/mesa/drivers/dri/i965/brw_sf_emit.c
>> +++ b/src/mesa/drivers/dri/i965/brw_sf_emit.c
>> @@ -44,6 +44,17 @@
>>
>>
>> /**
>> + * Determine the vue slot corresponding to the given half of the given
>> + * register. half=0 means the first half of a register, half=1 means the
>> + * second half.
>> + */
>> +static inline int vert_reg_to_vue_slot(struct brw_sf_compile *c, GLuint
>> reg,
>> + int half)
>> +{
>> + return (reg + c->urb_entry_read_offset) * 2 + half;
>> +}
>> +
>> +/**
>> * Determine the vert_result corresponding to the given half of the given
>> * register. half=0 means the first half of a register, half=1 means the
>> * second half.
>> @@ -51,11 +62,24 @@
>> static inline int vert_reg_to_vert_result(struct brw_sf_compile *c,
>> GLuint reg,
>> int half)
>> {
>> - int vue_slot = (reg + c->urb_entry_read_offset) * 2 + half;
>> + int vue_slot = vert_reg_to_vue_slot(c, reg, half);
>> return c->vue_map.slot_to_vert_result[vue_slot];
>> }
>>
>> /**
>> + * Determine the register corresponding to the given vue slot.
>> + */
>> +static struct brw_reg get_vue_slot(struct brw_sf_compile *c,
>> + struct brw_reg vert,
>> + int vue_slot)
>> +{
>> + GLuint off = vue_slot / 2 - c->urb_entry_read_offset;
>> + GLuint sub = vue_slot % 2;
>> +
>> + return brw_vec4_grf(vert.nr + off, sub * 4);
>>
> +}
>> +
>> +/**
>> * Determine the register corresponding to the given vert_result.
>> */
>> static struct brw_reg get_vert_result(struct brw_sf_compile *c,
>> @@ -64,10 +88,7 @@ static struct brw_reg get_vert_result(struct
>> brw_sf_compile *c,
>> {
>> int vue_slot = c->vue_map.vert_result_to_slot[vert_result];
>> assert (vue_slot >= c->urb_entry_read_offset);
>> - GLuint off = vue_slot / 2 - c->urb_entry_read_offset;
>> - GLuint sub = vue_slot % 2;
>> -
>> - return brw_vec4_grf(vert.nr + off, sub * 4);
>> + return get_vue_slot(c, vert, vue_slot);
>> }
>>
>> static bool
>> @@ -128,31 +149,37 @@ static void do_twoside_color( struct brw_sf_compile
>> *c )
>> * Flat shading
>> */
>>
>> -#define VERT_RESULT_COLOR_BITS (BITFIELD64_BIT(VERT_RESULT_COL0) | \
>> - BITFIELD64_BIT(VERT_RESULT_COL1))
>> -
>> -static void copy_colors( struct brw_sf_compile *c,
>> - struct brw_reg dst,
>> - struct brw_reg src,
>> - int allow_twoside)
>> +static void copy_flatshaded_attributes( struct brw_sf_compile *c,
>> + struct brw_reg dst,
>> + struct brw_reg src)
>> {
>> struct brw_compile *p = &c->func;
>> + struct brw_context *brw = p->brw;
>> GLuint i;
>>
>> - for (i = VERT_RESULT_COL0; i <= VERT_RESULT_COL1; i++) {
>> - if (have_attr(c,i)) {
>> + for (i = 0; i < BRW_VERT_RESULT_MAX; i++) {
>> + if (brw_get_interpolation_mode(brw, i) == INTERP_QUALIFIER_FLAT) {
>> brw_MOV(p,
>> - get_vert_result(c, dst, i),
>> - get_vert_result(c, src, i));
>> + get_vue_slot(c, dst, i),
>> + get_vue_slot(c, src, i));
>>
>> - } else if(allow_twoside && have_attr(c, i - VERT_RESULT_COL0 +
>> VERT_RESULT_BFC0)) {
>> - brw_MOV(p,
>> - get_vert_result(c, dst, i - VERT_RESULT_COL0 +
>> VERT_RESULT_BFC0),
>> - get_vert_result(c, src, i - VERT_RESULT_COL0 +
>> VERT_RESULT_BFC0));
>> }
>> }
>> }
>>
>> +static GLuint count_flatshaded_attributes(struct brw_sf_compile *c )
>> +{
>> + struct brw_compile *p = &c->func;
>> + struct brw_context *brw = p->brw;
>> + GLuint count = 0;
>> + GLuint i;
>> + for (i = 0; i < BRW_VERT_RESULT_MAX; i++) {
>> + if (brw_get_interpolation_mode(brw, i) == INTERP_QUALIFIER_FLAT)
>> + count++;
>> + }
>> + return count;
>> +}
>> +
>>
>>
>> /* Need to use a computed jump to copy flatshaded attributes as the
>> @@ -168,18 +195,6 @@ static void do_flatshade_triangle( struct
>> brw_sf_compile *c )
>>
>> GLuint nr;
>>
>> - if (c->key.do_twoside_color) {
>> - nr = ((c->key.attrs & (BITFIELD64_BIT(VERT_RESULT_COL0) |
>> BITFIELD64_BIT(VERT_RESULT_BFC0))) != 0) +
>> - ((c->key.attrs & (BITFIELD64_BIT(VERT_RESULT_COL1) |
>> BITFIELD64_BIT(VERT_RESULT_BFC1))) != 0);
>> -
>> - } else {
>> - nr = ((c->key.attrs & BITFIELD64_BIT(VERT_RESULT_COL0)) != 0) +
>> - ((c->key.attrs & BITFIELD64_BIT(VERT_RESULT_COL1)) != 0);
>> - }
>> -
>> - if (!nr)
>> - return;
>> -
>> /* Already done in clip program:
>> */
>> if (c->key.primitive == SF_UNFILLED_TRIS)
>> @@ -188,21 +203,23 @@ static void do_flatshade_triangle( struct
>> brw_sf_compile *c )
>> if (intel->gen == 5)
>> jmpi = 2;
>>
>> + nr = count_flatshaded_attributes(c);
>> +
>>
>
> We used to have this optimization:
>
> if (!nr)
> return;
>
> I understand why you removed it: because now this code should only be
> called if c->key.has_flat_shading is true. However, for the sake of safety
> (and documentation), can we add something like this:
>
> /* This code should only be called if c->key.has_flat_shading is true,
> * meaning there is at least one flatshaded attribute.
> */
> assert(nr != 0);
>
> If you would prefer to put this assertion inside
> count_flatshaded_attributes(), I would be happy with that too.
>
>
>> brw_push_insn_state(p);
>>
>> brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr*2+1)));
>> brw_JMPI(p, ip, ip, c->pv);
>>
>> - copy_colors(c, c->vert[1], c->vert[0], c->key.do_twoside_color);
>> - copy_colors(c, c->vert[2], c->vert[0], c->key.do_twoside_color);
>> + copy_flatshaded_attributes(c, c->vert[1], c->vert[0]);
>> + copy_flatshaded_attributes(c, c->vert[2], c->vert[0]);
>> brw_JMPI(p, ip, ip, brw_imm_d(jmpi*(nr*4+1)));
>>
>> - copy_colors(c, c->vert[0], c->vert[1], c->key.do_twoside_color);
>> - copy_colors(c, c->vert[2], c->vert[1], c->key.do_twoside_color);
>> + copy_flatshaded_attributes(c, c->vert[0], c->vert[1]);
>> + copy_flatshaded_attributes(c, c->vert[2], c->vert[1]);
>> brw_JMPI(p, ip, ip, brw_imm_d(jmpi*nr*2));
>>
>> - copy_colors(c, c->vert[0], c->vert[2], c->key.do_twoside_color);
>> - copy_colors(c, c->vert[1], c->vert[2], c->key.do_twoside_color);
>> + copy_flatshaded_attributes(c, c->vert[0], c->vert[2]);
>> + copy_flatshaded_attributes(c, c->vert[1], c->vert[2]);
>>
>> brw_pop_insn_state(p);
>> }
>> @@ -213,12 +230,9 @@ static void do_flatshade_line( struct brw_sf_compile
>> *c )
>> struct brw_compile *p = &c->func;
>> struct intel_context *intel = &p->brw->intel;
>> struct brw_reg ip = brw_ip_reg();
>> - GLuint nr = _mesa_bitcount_64(c->key.attrs & VERT_RESULT_COLOR_BITS);
>> + GLuint nr;
>> GLuint jmpi = 1;
>>
>> - if (!nr)
>> - return;
>> -
>> /* Already done in clip program:
>> */
>> if (c->key.primitive == SF_UNFILLED_TRIS)
>> @@ -227,14 +241,16 @@ static void do_flatshade_line( struct
>> brw_sf_compile *c )
>> if (intel->gen == 5)
>> jmpi = 2;
>>
>> + nr = count_flatshaded_attributes(c);
>> +
>>
>
> A similar assertion should go here.
>
>
>> brw_push_insn_state(p);
>>
>> brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr+1)));
>> brw_JMPI(p, ip, ip, c->pv);
>> - copy_colors(c, c->vert[1], c->vert[0], 0);
>> + copy_flatshaded_attributes(c, c->vert[1], c->vert[0]);
>>
>> brw_JMPI(p, ip, ip, brw_imm_ud(jmpi*nr));
>> - copy_colors(c, c->vert[0], c->vert[1], 0);
>> + copy_flatshaded_attributes(c, c->vert[0], c->vert[1]);
>>
>> brw_pop_insn_state(p);
>> }
>> @@ -332,40 +348,25 @@ static void invert_det( struct brw_sf_compile *c)
>>
>> static bool
>> calculate_masks(struct brw_sf_compile *c,
>> - GLuint reg,
>> - GLushort *pc,
>> - GLushort *pc_persp,
>> - GLushort *pc_linear)
>> + GLuint reg,
>> + GLushort *pc,
>> + GLushort *pc_persp,
>> + GLushort *pc_linear)
>>
>
> I like the way you've rewritten this function. Nicely done.
>
>
>> {
>> + struct brw_compile *p = &c->func;
>> + struct brw_context *brw = p->brw;
>> + enum glsl_interp_qualifier interp;
>> bool is_last_attr = (reg == c->nr_setup_regs - 1);
>> - GLbitfield64 persp_mask;
>> - GLbitfield64 linear_mask;
>> -
>> - if (c->key.do_flat_shading)
>> - persp_mask = c->key.attrs & ~(BITFIELD64_BIT(VERT_RESULT_HPOS) |
>> - BITFIELD64_BIT(VERT_RESULT_COL0) |
>> - BITFIELD64_BIT(VERT_RESULT_COL1) |
>> - BITFIELD64_BIT(VERT_RESULT_BFC0) |
>> - BITFIELD64_BIT(VERT_RESULT_BFC1));
>> - else
>> - persp_mask = c->key.attrs & ~(BITFIELD64_BIT(VERT_RESULT_HPOS));
>> -
>> - if (c->key.do_flat_shading)
>> - linear_mask = c->key.attrs & ~(BITFIELD64_BIT(VERT_RESULT_COL0) |
>> - BITFIELD64_BIT(VERT_RESULT_COL1) |
>> - BITFIELD64_BIT(VERT_RESULT_BFC0) |
>> - BITFIELD64_BIT(VERT_RESULT_BFC1));
>> - else
>> - linear_mask = c->key.attrs;
>>
>> *pc_persp = 0;
>> *pc_linear = 0;
>> *pc = 0xf;
>> -
>> - if (persp_mask & BITFIELD64_BIT(vert_reg_to_vert_result(c, reg, 0)))
>> - *pc_persp = 0xf;
>>
>> - if (linear_mask & BITFIELD64_BIT(vert_reg_to_vert_result(c, reg, 0)))
>> + interp = brw_get_interpolation_mode(brw, vert_reg_to_vue_slot(c, reg,
>> 0));
>> + if (interp == INTERP_QUALIFIER_SMOOTH) {
>> + *pc_linear = 0xf;
>> + *pc_persp = 0xf;
>> + } else if(interp == INTERP_QUALIFIER_NOPERSPECTIVE)
>> *pc_linear = 0xf;
>>
>> /* Maybe only processs one attribute on the final round:
>> @@ -373,11 +374,12 @@ calculate_masks(struct brw_sf_compile *c,
>> if (vert_reg_to_vert_result(c, reg, 1) != BRW_VERT_RESULT_MAX) {
>> *pc |= 0xf0;
>>
>> - if (persp_mask & BITFIELD64_BIT(vert_reg_to_vert_result(c, reg,
>> 1)))
>> - *pc_persp |= 0xf0;
>> -
>> - if (linear_mask & BITFIELD64_BIT(vert_reg_to_vert_result(c, reg,
>> 1)))
>> - *pc_linear |= 0xf0;
>> + interp = brw_get_interpolation_mode(brw, vert_reg_to_vue_slot(c,
>> reg, 1));
>> + if (interp == INTERP_QUALIFIER_SMOOTH) {
>> + *pc_linear |= 0xf0;
>> + *pc_persp |= 0xf0;
>> + } else if(interp == INTERP_QUALIFIER_NOPERSPECTIVE)
>> + *pc_linear |= 0xf0;
>> }
>>
>> return is_last_attr;
>> @@ -430,7 +432,7 @@ void brw_emit_tri_setup(struct brw_sf_compile *c,
>> bool allocate)
>> if (c->key.do_twoside_color)
>> do_twoside_color(c);
>>
>> - if (c->key.do_flat_shading)
>> + if (c->key.has_flat_shading)
>> do_flatshade_triangle(c);
>>
>>
>> @@ -443,7 +445,6 @@ void brw_emit_tri_setup(struct brw_sf_compile *c,
>> bool allocate)
>> struct brw_reg a2 = offset(c->vert[2], i);
>> GLushort pc, pc_persp, pc_linear;
>> bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
>> -
>> if (pc_persp)
>> {
>> brw_set_predicate_control_flag_value(p, pc_persp);
>> @@ -507,7 +508,6 @@ void brw_emit_line_setup(struct brw_sf_compile *c,
>> bool allocate)
>> struct brw_compile *p = &c->func;
>> GLuint i;
>>
>> -
>> c->nr_verts = 2;
>>
>> if (allocate)
>> @@ -516,7 +516,7 @@ void brw_emit_line_setup(struct brw_sf_compile *c,
>> bool allocate)
>> invert_det(c);
>> copy_z_inv_w(c);
>>
>> - if (c->key.do_flat_shading)
>> + if (c->key.has_flat_shading)
>> do_flatshade_line(c);
>>
>> for (i = 0; i < c->nr_setup_regs; i++)
>> @@ -799,7 +799,3 @@ void brw_emit_anyprim_setup( struct brw_sf_compile *c
>> )
>>
>> brw_emit_point_setup( c, false );
>> }
>> -
>> -
>> -
>> -
>> --
>> 1.7.10.rc3.1.gb306
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>>
>
> With the minor changes noted above, this patch is:
>
> Reviewed-by: Paul Berry <stereotype441 at gmail.com>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freedesktop.org/archives/intel-gfx/attachments/20120717/0abc38ac/attachment.html>
More information about the Intel-gfx
mailing list