<div dir="ltr">Since we're using inputs_read and the packing from the fs compiler, is this really needed?  It seems like we could just add two floats worth of padding after rect_grid_y1 to pad it to a vec4.<br></div><div class="gmail_extra"><br><div class="gmail_quote">On Thu, Jun 23, 2016 at 12:17 PM, Topi Pohjolainen <span dir="ltr"><<a href="mailto:topi.pohjolainen@intel.com" target="_blank">topi.pohjolainen@intel.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">These are never used in parallel, lets document this.<br>
<br>
Signed-off-by: Topi Pohjolainen <<a href="mailto:topi.pohjolainen@intel.com">topi.pohjolainen@intel.com</a>><br>
---<br>
 src/mesa/drivers/dri/i965/brw_blorp.h         | 47 ++++++++++++---<br>
 src/mesa/drivers/dri/i965/brw_blorp_blit.cpp  | 82 ++++++++++++++++++---------<br>
 src/mesa/drivers/dri/i965/brw_blorp_clear.cpp |  6 +-<br>
 3 files changed, 97 insertions(+), 38 deletions(-)<br>
<br>
diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h b/src/mesa/drivers/dri/i965/brw_blorp.h<br>
index a4036c1..6f3581c 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_blorp.h<br>
+++ b/src/mesa/drivers/dri/i965/brw_blorp.h<br>
@@ -179,15 +179,48 @@ struct brw_blorp_coord_transform<br>
    float offset;<br>
 };<br>
<br>
+/**<br>
+ * Bounding rectangle telling pixel discard which pixels are not to be<br>
+ * touched. This is needed in when surfaces are configured as something else<br>
+ * what they really are:<br>
+ *<br>
+ *    - writing W-tiled stencil as Y-tiled<br>
+ *    - writing interleaved multisampled as single sampled.<br>
+ *<br>
+ * See blorp_nir_discard_if_outside_rect().<br>
+ */<br>
+struct brw_blorp_discard_rect<br>
+{<br>
+   uint32_t x0;<br>
+   uint32_t x1;<br>
+   uint32_t y0;<br>
+   uint32_t y1;<br>
+};<br>
+<br>
+/**<br>
+ * Grid needed for blended and scaled blits of integer formats, see<br>
+ * blorp_nir_manual_blend_bilinear().<br>
+ */<br>
+struct brw_blorp_rect_grid<br>
+{<br>
+   float x1;<br>
+   float y1;<br>
+};<br>
+<br>
 struct brw_blorp_wm_inputs<br>
 {<br>
-   uint32_t dst_x0;<br>
-   uint32_t dst_x1;<br>
-   uint32_t dst_y0;<br>
-   uint32_t dst_y1;<br>
+   /* Blended and scaled blits never use pixel discard meaning<br>
+    * blorp_nir_discard_if_outside_rect() and blorp_nir_manual_blend_bilinear()<br>
+    * can emit code using the same input slot. Clear color in turn is only<br>
+    * used by clear programs.<br>
+    */<br>
+   union {<br>
+      struct brw_blorp_discard_rect discard_rect;<br>
+      struct brw_blorp_rect_grid rect_grid;<br>
+      union gl_color_union clear_color;<br>
+   };<br>
+<br>
    /* Top right coordinates of the rectangular grid used for scaled blitting */<br>
-   float rect_grid_x1;<br>
-   float rect_grid_y1;<br>
    struct brw_blorp_coord_transform x_transform;<br>
    struct brw_blorp_coord_transform y_transform;<br>
<br>
@@ -197,7 +230,7 @@ struct brw_blorp_wm_inputs<br>
    uint32_t src_z;<br>
<br>
    /* Pad out to an integral number of registers */<br>
-   uint32_t pad[5];<br>
+   uint32_t pad[7];<br>
 };<br>
<br>
 #define BRW_BLORP_NUM_PUSH_CONSTANT_DWORDS \<br>
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp<br>
index 16d2504..a9eac60 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp<br>
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp<br>
@@ -331,12 +331,8 @@ enum sampler_message_arg<br>
<br>
 struct brw_blorp_blit_vars {<br>
    /* Input values from brw_blorp_wm_inputs */<br>
-   nir_variable *u_dst_x0;<br>
-   nir_variable *u_dst_x1;<br>
-   nir_variable *u_dst_y0;<br>
-   nir_variable *u_dst_y1;<br>
-   nir_variable *u_rect_grid_x1;<br>
-   nir_variable *u_rect_grid_y1;<br>
+   nir_variable *u_discard_rect;<br>
+   nir_variable *u_rect_grid;<br>
    struct {<br>
       nir_variable *multiplier;<br>
       nir_variable *offset;<br>
@@ -354,17 +350,32 @@ static void<br>
 brw_blorp_blit_vars_init(nir_builder *b, struct brw_blorp_blit_vars *v,<br>
                          const struct brw_blorp_blit_prog_key *key)<br>
 {<br>
+   if (key->use_kill) {<br>
+      assert(!(key->blend && key->blit_scaled));<br>
+      v->u_discard_rect = nir_variable_create(b->shader, nir_var_uniform,<br>
+                                              glsl_type::uvec4_type,<br>
+                                              "discard_rect");<br>
+      v->u_discard_rect->data.location =<br>
+         offsetof(struct brw_blorp_wm_inputs, discard_rect);<br>
+      v->u_rect_grid = NULL;<br>
+   } else {<br>
+      /* Blending grid only has two components but loading it as vec4<br>
+       * will keep offsets for the subsequent inputs the same between<br>
+       * this and the discard branch.<br>
+       */<br>
+      v->u_rect_grid = nir_variable_create(b->shader, nir_var_uniform,<br>
+                                           glsl_type::vec2_type,<br>
+                                           "rect_grid");<br>
+      v->u_rect_grid->data.location =<br>
+         offsetof(struct brw_blorp_wm_inputs, rect_grid);<br>
+      v->u_discard_rect = NULL;<br>
+   }<br>
+<br>
 #define LOAD_UNIFORM(name, type)\<br>
    v->u_##name = nir_variable_create(b->shader, nir_var_uniform, type, #name); \<br>
    v->u_##name->data.location = \<br>
       offsetof(struct brw_blorp_wm_inputs, name);<br>
<br>
-   LOAD_UNIFORM(dst_x0, glsl_uint_type())<br>
-   LOAD_UNIFORM(dst_x1, glsl_uint_type())<br>
-   LOAD_UNIFORM(dst_y0, glsl_uint_type())<br>
-   LOAD_UNIFORM(dst_y1, glsl_uint_type())<br>
-   LOAD_UNIFORM(rect_grid_x1, glsl_float_type())<br>
-   LOAD_UNIFORM(rect_grid_y1, glsl_float_type())<br>
    LOAD_UNIFORM(x_transform.multiplier, glsl_float_type())<br>
    LOAD_UNIFORM(x_transform.offset, glsl_float_type())<br>
    LOAD_UNIFORM(y_transform.multiplier, glsl_float_type())<br>
@@ -419,10 +430,17 @@ blorp_nir_discard_if_outside_rect(nir_builder *b, nir_ssa_def *pos,<br>
                                   struct brw_blorp_blit_vars *v)<br>
 {<br>
    nir_ssa_def *c0, *c1, *c2, *c3;<br>
-   c0 = nir_ult(b, nir_channel(b, pos, 0), nir_load_var(b, v->u_dst_x0));<br>
-   c1 = nir_uge(b, nir_channel(b, pos, 0), nir_load_var(b, v->u_dst_x1));<br>
-   c2 = nir_ult(b, nir_channel(b, pos, 1), nir_load_var(b, v->u_dst_y0));<br>
-   c3 = nir_uge(b, nir_channel(b, pos, 1), nir_load_var(b, v->u_dst_y1));<br>
+   nir_ssa_def *discard_rect = nir_load_var(b, v->u_discard_rect);<br>
+   nir_ssa_def *dst_x0 = nir_channel(b, discard_rect, 0);<br>
+   nir_ssa_def *dst_x1 = nir_channel(b, discard_rect, 1);<br>
+   nir_ssa_def *dst_y0 = nir_channel(b, discard_rect, 2);<br>
+   nir_ssa_def *dst_y1 = nir_channel(b, discard_rect, 3);<br>
+<br>
+   c0 = nir_ult(b, nir_channel(b, pos, 0), dst_x0);<br>
+   c1 = nir_uge(b, nir_channel(b, pos, 0), dst_x1);<br>
+   c2 = nir_ult(b, nir_channel(b, pos, 1), dst_y0);<br>
+   c3 = nir_uge(b, nir_channel(b, pos, 1), dst_y1);<br>
+<br>
    nir_ssa_def *oob = nir_ior(b, nir_ior(b, c0, c1), nir_ior(b, c2, c3));<br>
<br>
    nir_intrinsic_instr *discard =<br>
@@ -1033,7 +1051,7 @@ blorp_nir_manual_blend_bilinear(nir_builder *b, nir_ssa_def *pos,<br>
                                 struct brw_blorp_blit_vars *v)<br>
 {<br>
    nir_ssa_def *pos_xy = nir_channels(b, pos, 0x3);<br>
-<br>
+   nir_ssa_def *rect_grid = nir_load_var(b, v->u_rect_grid);<br>
    nir_ssa_def *scale = nir_imm_vec2(b, key->x_scale, key->y_scale);<br>
<br>
    /* Translate coordinates to lay out the samples in a rectangular  grid<br>
@@ -1048,8 +1066,8 @@ blorp_nir_manual_blend_bilinear(nir_builder *b, nir_ssa_def *pos,<br>
     * texels on texture edges.<br>
     */<br>
    pos_xy = nir_fmin(b, nir_fmax(b, pos_xy, nir_imm_float(b, 0.0)),<br>
-                        nir_vec2(b, nir_load_var(b, v->u_rect_grid_x1),<br>
-                                    nir_load_var(b, v->u_rect_grid_y1)));<br>
+                        nir_vec2(b, nir_channel(b, rect_grid, 0),<br>
+                                    nir_channel(b, rect_grid, 1)));<br>
<br>
    /* Store the fractional parts to be used as bilinear interpolation<br>
     * coefficients.<br>
@@ -1385,8 +1403,10 @@ brw_blorp_build_nir_shader(struct brw_context *brw,<br>
     * If we need to kill pixels that are outside the destination rectangle,<br>
     * now is the time to do it.<br>
     */<br>
-   if (key->use_kill)<br>
+   if (key->use_kill) {<br>
+      assert(!(key->blend && key->blit_scaled));<br>
       blorp_nir_discard_if_outside_rect(&b, dst_pos, &v);<br>
+   }<br>
<br>
    src_pos = blorp_blit_apply_transform(&b, nir_i2f(&b, dst_pos), &v);<br>
    if (dst_pos->num_components == 3) {<br>
@@ -1434,6 +1454,7 @@ brw_blorp_build_nir_shader(struct brw_context *brw,<br>
                                                 key->texture_data_type);<br>
       }<br>
    } else if (key->blend && key->blit_scaled) {<br>
+      assert(!key->use_kill);<br>
       color = blorp_nir_manual_blend_bilinear(&b, src_pos, key->src_samples, key, &v);<br>
    } else {<br>
       if (key->bilinear_filter) {<br>
@@ -1832,14 +1853,19 @@ brw_blorp_blit_miptrees(struct brw_context *brw,<br>
    /* Round floating point values to nearest integer to avoid "off by one texel"<br>
     * kind of errors when blitting.<br>
     */<br>
-   params.x0 = params.wm_inputs.dst_x0 = roundf(dst_x0);<br>
-   params.y0 = params.wm_inputs.dst_y0 = roundf(dst_y0);<br>
-   params.x1 = params.wm_inputs.dst_x1 = roundf(dst_x1);<br>
-   params.y1 = params.wm_inputs.dst_y1 = roundf(dst_y1);<br>
-   params.wm_inputs.rect_grid_x1 =<br>
-      minify(src_mt->logical_width0, src_level) * wm_prog_key.x_scale - 1.0f;<br>
-   params.wm_inputs.rect_grid_y1 =<br>
-      minify(src_mt->logical_height0, src_level) * wm_prog_key.y_scale - 1.0f;<br>
+   params.x0 = params.wm_inputs.discard_rect.x0 = roundf(dst_x0);<br>
+   params.y0 = params.wm_inputs.discard_rect.y0 = roundf(dst_y0);<br>
+   params.x1 = params.wm_inputs.discard_rect.x1 = roundf(dst_x1);<br>
+   params.y1 = params.wm_inputs.discard_rect.y1 = roundf(dst_y1);<br>
+<br>
+   if (wm_prog_key.blend && wm_prog_key.blit_scaled) {<br>
+      params.wm_inputs.rect_grid.x1 =<br>
+         minify(src_mt->logical_width0, src_level) *<br>
+         wm_prog_key.x_scale - 1.0f;<br>
+      params.wm_inputs.rect_grid.y1 =<br>
+         minify(src_mt->logical_height0, src_level) *<br>
+         wm_prog_key.y_scale - 1.0f;<br>
+   }<br>
<br>
    brw_blorp_setup_coord_transform(&params.wm_inputs.x_transform,<br>
                                    src_x0, src_x1, dst_x0, dst_x1, mirror_x);<br>
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp b/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp<br>
index 3283a08..b4beec2 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp<br>
+++ b/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp<br>
@@ -150,8 +150,7 @@ do_single_blorp_clear(struct brw_context *brw, struct gl_framebuffer *fb,<br>
       params.y1 = rb->Height - fb->_Ymin;<br>
    }<br>
<br>
-   memcpy(&params.wm_inputs.dst_x0,<br>
-          ctx->Color.ClearColor.f, sizeof(float) * 4);<br>
+   params.wm_inputs.clear_color = ctx->Color.ClearColor;<br>
<br>
    bool use_simd16_replicated_data = true;<br>
<br>
@@ -175,7 +174,8 @@ do_single_blorp_clear(struct brw_context *brw, struct gl_framebuffer *fb,<br>
        !partial_clear && use_simd16_replicated_data &&<br>
        brw_is_color_fast_clear_compatible(brw, irb->mt,<br>
                                           &ctx->Color.ClearColor)) {<br>
-      memset(&params.wm_inputs, 0xff, 4*sizeof(float));<br>
+      memset(&params.wm_inputs.clear_color, 0xff,<br>
+             sizeof(params.wm_inputs.clear_color));<br>
       params.fast_clear_op = GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE;<br>
<br>
       brw_get_fast_clear_rect(brw, fb, irb->mt, &params.x0, &params.y0,<br>
<span class="HOEnZb"><font color="#888888">--<br>
2.5.5<br>
<br>
_______________________________________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a><br>
<a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank">https://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</font></span></blockquote></div><br></div>