[Mesa-dev] [PATCH 06/18] i965/blorp: Share input slot between pixel kill and blend/scaled
Topi Pohjolainen
topi.pohjolainen at intel.com
Thu Jun 23 19:17:02 UTC 2016
These are never used in parallel, lets document this.
Signed-off-by: Topi Pohjolainen <topi.pohjolainen at intel.com>
---
src/mesa/drivers/dri/i965/brw_blorp.h | 47 ++++++++++++---
src/mesa/drivers/dri/i965/brw_blorp_blit.cpp | 82 ++++++++++++++++++---------
src/mesa/drivers/dri/i965/brw_blorp_clear.cpp | 6 +-
3 files changed, 97 insertions(+), 38 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h b/src/mesa/drivers/dri/i965/brw_blorp.h
index a4036c1..6f3581c 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.h
+++ b/src/mesa/drivers/dri/i965/brw_blorp.h
@@ -179,15 +179,48 @@ struct brw_blorp_coord_transform
float offset;
};
+/**
+ * Bounding rectangle telling pixel discard which pixels are not to be
+ * touched. This is needed in when surfaces are configured as something else
+ * what they really are:
+ *
+ * - writing W-tiled stencil as Y-tiled
+ * - writing interleaved multisampled as single sampled.
+ *
+ * See blorp_nir_discard_if_outside_rect().
+ */
+struct brw_blorp_discard_rect
+{
+ uint32_t x0;
+ uint32_t x1;
+ uint32_t y0;
+ uint32_t y1;
+};
+
+/**
+ * Grid needed for blended and scaled blits of integer formats, see
+ * blorp_nir_manual_blend_bilinear().
+ */
+struct brw_blorp_rect_grid
+{
+ float x1;
+ float y1;
+};
+
struct brw_blorp_wm_inputs
{
- uint32_t dst_x0;
- uint32_t dst_x1;
- uint32_t dst_y0;
- uint32_t dst_y1;
+ /* Blended and scaled blits never use pixel discard meaning
+ * blorp_nir_discard_if_outside_rect() and blorp_nir_manual_blend_bilinear()
+ * can emit code using the same input slot. Clear color in turn is only
+ * used by clear programs.
+ */
+ union {
+ struct brw_blorp_discard_rect discard_rect;
+ struct brw_blorp_rect_grid rect_grid;
+ union gl_color_union clear_color;
+ };
+
/* Top right coordinates of the rectangular grid used for scaled blitting */
- float rect_grid_x1;
- float rect_grid_y1;
struct brw_blorp_coord_transform x_transform;
struct brw_blorp_coord_transform y_transform;
@@ -197,7 +230,7 @@ struct brw_blorp_wm_inputs
uint32_t src_z;
/* Pad out to an integral number of registers */
- uint32_t pad[5];
+ uint32_t pad[7];
};
#define BRW_BLORP_NUM_PUSH_CONSTANT_DWORDS \
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index 16d2504..a9eac60 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -331,12 +331,8 @@ enum sampler_message_arg
struct brw_blorp_blit_vars {
/* Input values from brw_blorp_wm_inputs */
- nir_variable *u_dst_x0;
- nir_variable *u_dst_x1;
- nir_variable *u_dst_y0;
- nir_variable *u_dst_y1;
- nir_variable *u_rect_grid_x1;
- nir_variable *u_rect_grid_y1;
+ nir_variable *u_discard_rect;
+ nir_variable *u_rect_grid;
struct {
nir_variable *multiplier;
nir_variable *offset;
@@ -354,17 +350,32 @@ static void
brw_blorp_blit_vars_init(nir_builder *b, struct brw_blorp_blit_vars *v,
const struct brw_blorp_blit_prog_key *key)
{
+ if (key->use_kill) {
+ assert(!(key->blend && key->blit_scaled));
+ v->u_discard_rect = nir_variable_create(b->shader, nir_var_uniform,
+ glsl_type::uvec4_type,
+ "discard_rect");
+ v->u_discard_rect->data.location =
+ offsetof(struct brw_blorp_wm_inputs, discard_rect);
+ v->u_rect_grid = NULL;
+ } else {
+ /* Blending grid only has two components but loading it as vec4
+ * will keep offsets for the subsequent inputs the same between
+ * this and the discard branch.
+ */
+ v->u_rect_grid = nir_variable_create(b->shader, nir_var_uniform,
+ glsl_type::vec2_type,
+ "rect_grid");
+ v->u_rect_grid->data.location =
+ offsetof(struct brw_blorp_wm_inputs, rect_grid);
+ v->u_discard_rect = NULL;
+ }
+
#define LOAD_UNIFORM(name, type)\
v->u_##name = nir_variable_create(b->shader, nir_var_uniform, type, #name); \
v->u_##name->data.location = \
offsetof(struct brw_blorp_wm_inputs, name);
- LOAD_UNIFORM(dst_x0, glsl_uint_type())
- LOAD_UNIFORM(dst_x1, glsl_uint_type())
- LOAD_UNIFORM(dst_y0, glsl_uint_type())
- LOAD_UNIFORM(dst_y1, glsl_uint_type())
- LOAD_UNIFORM(rect_grid_x1, glsl_float_type())
- LOAD_UNIFORM(rect_grid_y1, glsl_float_type())
LOAD_UNIFORM(x_transform.multiplier, glsl_float_type())
LOAD_UNIFORM(x_transform.offset, glsl_float_type())
LOAD_UNIFORM(y_transform.multiplier, glsl_float_type())
@@ -419,10 +430,17 @@ blorp_nir_discard_if_outside_rect(nir_builder *b, nir_ssa_def *pos,
struct brw_blorp_blit_vars *v)
{
nir_ssa_def *c0, *c1, *c2, *c3;
- c0 = nir_ult(b, nir_channel(b, pos, 0), nir_load_var(b, v->u_dst_x0));
- c1 = nir_uge(b, nir_channel(b, pos, 0), nir_load_var(b, v->u_dst_x1));
- c2 = nir_ult(b, nir_channel(b, pos, 1), nir_load_var(b, v->u_dst_y0));
- c3 = nir_uge(b, nir_channel(b, pos, 1), nir_load_var(b, v->u_dst_y1));
+ nir_ssa_def *discard_rect = nir_load_var(b, v->u_discard_rect);
+ nir_ssa_def *dst_x0 = nir_channel(b, discard_rect, 0);
+ nir_ssa_def *dst_x1 = nir_channel(b, discard_rect, 1);
+ nir_ssa_def *dst_y0 = nir_channel(b, discard_rect, 2);
+ nir_ssa_def *dst_y1 = nir_channel(b, discard_rect, 3);
+
+ c0 = nir_ult(b, nir_channel(b, pos, 0), dst_x0);
+ c1 = nir_uge(b, nir_channel(b, pos, 0), dst_x1);
+ c2 = nir_ult(b, nir_channel(b, pos, 1), dst_y0);
+ c3 = nir_uge(b, nir_channel(b, pos, 1), dst_y1);
+
nir_ssa_def *oob = nir_ior(b, nir_ior(b, c0, c1), nir_ior(b, c2, c3));
nir_intrinsic_instr *discard =
@@ -1033,7 +1051,7 @@ blorp_nir_manual_blend_bilinear(nir_builder *b, nir_ssa_def *pos,
struct brw_blorp_blit_vars *v)
{
nir_ssa_def *pos_xy = nir_channels(b, pos, 0x3);
-
+ nir_ssa_def *rect_grid = nir_load_var(b, v->u_rect_grid);
nir_ssa_def *scale = nir_imm_vec2(b, key->x_scale, key->y_scale);
/* Translate coordinates to lay out the samples in a rectangular grid
@@ -1048,8 +1066,8 @@ blorp_nir_manual_blend_bilinear(nir_builder *b, nir_ssa_def *pos,
* texels on texture edges.
*/
pos_xy = nir_fmin(b, nir_fmax(b, pos_xy, nir_imm_float(b, 0.0)),
- nir_vec2(b, nir_load_var(b, v->u_rect_grid_x1),
- nir_load_var(b, v->u_rect_grid_y1)));
+ nir_vec2(b, nir_channel(b, rect_grid, 0),
+ nir_channel(b, rect_grid, 1)));
/* Store the fractional parts to be used as bilinear interpolation
* coefficients.
@@ -1385,8 +1403,10 @@ brw_blorp_build_nir_shader(struct brw_context *brw,
* If we need to kill pixels that are outside the destination rectangle,
* now is the time to do it.
*/
- if (key->use_kill)
+ if (key->use_kill) {
+ assert(!(key->blend && key->blit_scaled));
blorp_nir_discard_if_outside_rect(&b, dst_pos, &v);
+ }
src_pos = blorp_blit_apply_transform(&b, nir_i2f(&b, dst_pos), &v);
if (dst_pos->num_components == 3) {
@@ -1434,6 +1454,7 @@ brw_blorp_build_nir_shader(struct brw_context *brw,
key->texture_data_type);
}
} else if (key->blend && key->blit_scaled) {
+ assert(!key->use_kill);
color = blorp_nir_manual_blend_bilinear(&b, src_pos, key->src_samples, key, &v);
} else {
if (key->bilinear_filter) {
@@ -1832,14 +1853,19 @@ brw_blorp_blit_miptrees(struct brw_context *brw,
/* Round floating point values to nearest integer to avoid "off by one texel"
* kind of errors when blitting.
*/
- params.x0 = params.wm_inputs.dst_x0 = roundf(dst_x0);
- params.y0 = params.wm_inputs.dst_y0 = roundf(dst_y0);
- params.x1 = params.wm_inputs.dst_x1 = roundf(dst_x1);
- params.y1 = params.wm_inputs.dst_y1 = roundf(dst_y1);
- params.wm_inputs.rect_grid_x1 =
- minify(src_mt->logical_width0, src_level) * wm_prog_key.x_scale - 1.0f;
- params.wm_inputs.rect_grid_y1 =
- minify(src_mt->logical_height0, src_level) * wm_prog_key.y_scale - 1.0f;
+ params.x0 = params.wm_inputs.discard_rect.x0 = roundf(dst_x0);
+ params.y0 = params.wm_inputs.discard_rect.y0 = roundf(dst_y0);
+ params.x1 = params.wm_inputs.discard_rect.x1 = roundf(dst_x1);
+ params.y1 = params.wm_inputs.discard_rect.y1 = roundf(dst_y1);
+
+ if (wm_prog_key.blend && wm_prog_key.blit_scaled) {
+ params.wm_inputs.rect_grid.x1 =
+ minify(src_mt->logical_width0, src_level) *
+ wm_prog_key.x_scale - 1.0f;
+ params.wm_inputs.rect_grid.y1 =
+ minify(src_mt->logical_height0, src_level) *
+ wm_prog_key.y_scale - 1.0f;
+ }
brw_blorp_setup_coord_transform(¶ms.wm_inputs.x_transform,
src_x0, src_x1, dst_x0, dst_x1, mirror_x);
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp b/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp
index 3283a08..b4beec2 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp
@@ -150,8 +150,7 @@ do_single_blorp_clear(struct brw_context *brw, struct gl_framebuffer *fb,
params.y1 = rb->Height - fb->_Ymin;
}
- memcpy(¶ms.wm_inputs.dst_x0,
- ctx->Color.ClearColor.f, sizeof(float) * 4);
+ params.wm_inputs.clear_color = ctx->Color.ClearColor;
bool use_simd16_replicated_data = true;
@@ -175,7 +174,8 @@ do_single_blorp_clear(struct brw_context *brw, struct gl_framebuffer *fb,
!partial_clear && use_simd16_replicated_data &&
brw_is_color_fast_clear_compatible(brw, irb->mt,
&ctx->Color.ClearColor)) {
- memset(¶ms.wm_inputs, 0xff, 4*sizeof(float));
+ memset(¶ms.wm_inputs.clear_color, 0xff,
+ sizeof(params.wm_inputs.clear_color));
params.fast_clear_op = GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE;
brw_get_fast_clear_rect(brw, fb, irb->mt, ¶ms.x0, ¶ms.y0,
--
2.5.5
More information about the mesa-dev
mailing list