<div dir="ltr">On 16 May 2013 11:44, Anuj Phogat <span dir="ltr"><<a href="mailto:anuj.phogat@gmail.com" target="_blank">anuj.phogat@gmail.com</a>></span> wrote:<br><div class="gmail_extra"><div class="gmail_quote"><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
These changes are required to implement scaled blitting in blorp<br>
in my next patch.<br>
<br>
No regressions observed in piglit quick-driver.tests with this patch.<br>
<br>
Signed-off-by: Anuj Phogat <<a href="mailto:anuj.phogat@gmail.com">anuj.phogat@gmail.com</a>><br>
---<br>
src/mesa/drivers/dri/i965/brw_blorp.h | 15 ++--<br>
src/mesa/drivers/dri/i965/brw_blorp_blit.cpp | 120 +++++++++++++++++----------<br>
src/mesa/drivers/dri/i965/brw_reg.h | 7 ++<br>
3 files changed, 90 insertions(+), 52 deletions(-)<br>
<br>
diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h b/src/mesa/drivers/dri/i965/brw_blorp.h<br>
index 8915080..70e3933 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_blorp.h<br>
+++ b/src/mesa/drivers/dri/i965/brw_blorp.h<br>
@@ -161,22 +161,19 @@ struct brw_blorp_coord_transform_params<br>
void setup(GLuint src0, GLuint dst0, GLuint dst1,<br>
bool mirror);<br>
<br>
- int16_t multiplier;<br>
- int16_t offset;<br>
+ int32_t multiplier;<br>
+ int32_t offset;<br>
};<br>
<br>
<br>
struct brw_blorp_wm_push_constants<br>
{<br>
- uint16_t dst_x0;<br>
- uint16_t dst_x1;<br>
- uint16_t dst_y0;<br>
- uint16_t dst_y1;<br>
+ uint32_t dst_x0;<br>
+ uint32_t dst_x1;<br>
+ uint32_t dst_y0;<br>
+ uint32_t dst_y1;<br>
brw_blorp_coord_transform_params x_transform;<br>
brw_blorp_coord_transform_params y_transform;<br>
-<br>
- /* Pad out to an integral number of registers */<br>
- uint16_t pad[8];<br>
};<br>
<br>
/* Every 32 bytes of push constant data constitutes one GEN register. */<br>
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp<br>
index c3ef054..b7ee92b 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp<br>
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp<br>
@@ -590,13 +590,12 @@ private:<br>
void encode_msaa(unsigned num_samples, intel_msaa_layout layout);<br>
void decode_msaa(unsigned num_samples, intel_msaa_layout layout);<br>
void kill_if_outside_dst_rect();<br>
- void translate_dst_to_src();<br>
+ void translate_dst_to_src(unsigned intel_gen);<br>
void single_to_blend();<br>
void manual_blend(unsigned num_samples);<br>
void sample(struct brw_reg dst);<br>
void texel_fetch(struct brw_reg dst);<br>
void mcs_fetch();<br>
- void expand_to_32_bits(struct brw_reg src, struct brw_reg dst);<br>
void texture_lookup(struct brw_reg dst, GLuint msg_type,<br>
const sampler_message_arg *args, int num_args);<br>
void render_target_write();<br>
@@ -773,7 +772,7 @@ brw_blorp_blit_program::compile(struct brw_context *brw,<br>
kill_if_outside_dst_rect();<br>
<br>
/* Next, apply a translation to obtain coordinates in the source image. */<br>
- translate_dst_to_src();<br>
+ translate_dst_to_src(brw->intel.gen);<br>
<br>
/* If the source image is not multisampled, then we want to fetch sample<br>
* number 0, because that's the only sample there is.<br>
@@ -845,7 +844,7 @@ brw_blorp_blit_program::alloc_push_const_regs(int base_reg)<br>
#define CONST_LOC(name) offsetof(brw_blorp_wm_push_constants, name)<br>
#define ALLOC_REG(name) \<br>
this->name = \<br>
- brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, base_reg, CONST_LOC(name) / 2)<br>
+ brw_ud1_reg(BRW_GENERAL_REGISTER_FILE, base_reg, CONST_LOC(name) / 4)<br>
<br>
ALLOC_REG(dst_x0);<br>
ALLOC_REG(dst_x1);<br>
@@ -875,17 +874,23 @@ brw_blorp_blit_program::alloc_regs()<br>
}<br>
this->mcs_data =<br>
retype(brw_vec8_grf(reg, 0), BRW_REGISTER_TYPE_UD); reg += 8;<br>
+<br>
for (int i = 0; i < 2; ++i) {<br>
this->x_coords[i]<br>
- = vec16(retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW));<br>
+ = vec8(retype(brw_vec8_grf(reg, 0), BRW_REGISTER_TYPE_UD));<br></blockquote><div><br></div><div>It should be sufficient to say "this->x_coords[i] = retype(brw_vec8_grf(reg, 0), BRW_REGISTER_TYPE_UD)", since the register returned by brw_vec8_grf() is already a vec8. This applies to y_coords[i], sample_index, t1, and t2 below.<br>
<br>Regardless of whether you decide to change that, this patch is:<br><br>Reviewed-by: Paul Berry <<a href="mailto:stereotype441@gmail.com">stereotype441@gmail.com</a>><br><br>Nice work, BTW. Some day soon I want to port blorp over to share more code with the FS back-end (so that it's easier to port to future chipsets). Your work here paves the way for that nicely.<br>
</div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
+ reg += 2;<br>
this->y_coords[i]<br>
- = vec16(retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW));<br>
+ = vec8(retype(brw_vec8_grf(reg, 0), BRW_REGISTER_TYPE_UD));<br>
+ reg += 2;<br>
}<br>
this->xy_coord_index = 0;<br>
this->sample_index<br>
- = vec16(retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW));<br>
- this->t1 = vec16(retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW));<br>
- this->t2 = vec16(retype(brw_vec8_grf(reg++, 0), BRW_REGISTER_TYPE_UW));<br>
+ = vec8(retype(brw_vec8_grf(reg, 0), BRW_REGISTER_TYPE_UD));<br>
+ reg += 2;<br>
+ this->t1 = vec8(retype(brw_vec8_grf(reg, 0), BRW_REGISTER_TYPE_UD));<br>
+ reg += 2;<br>
+ this->t2 = vec8(retype(brw_vec8_grf(reg, 0), BRW_REGISTER_TYPE_UD));<br>
+ reg += 2;<br>
<br>
/* Make sure we didn't run out of registers */<br>
assert(reg <= GEN7_MRF_HACK_START);<br>
@@ -942,7 +947,8 @@ brw_blorp_blit_program::compute_frag_coords()<br>
* Then, we need to add the repeating sequence (0, 1, 0, 1, ...) to the<br>
* result, since pixels n+1 and n+3 are in the right half of the subspan.<br>
*/<br>
- brw_ADD(&func, X, stride(suboffset(R1, 4), 2, 4, 0), brw_imm_v(0x10101010));<br>
+ brw_ADD(&func, vec16(retype(X, BRW_REGISTER_TYPE_UW)),<br>
+ stride(suboffset(R1, 4), 2, 4, 0), brw_imm_v(0x10101010));<br>
<br>
/* Similarly, Y coordinates for subspans come from R1.2[31:16] through<br>
* R1.5[31:16], so to get pixel Y coordinates we need to start at the 5th<br>
@@ -952,11 +958,17 @@ brw_blorp_blit_program::compute_frag_coords()<br>
* And we need to add the repeating sequence (0, 0, 1, 1, ...), since<br>
* pixels n+2 and n+3 are in the bottom half of the subspan.<br>
*/<br>
- brw_ADD(&func, Y, stride(suboffset(R1, 5), 2, 4, 0), brw_imm_v(0x11001100));<br>
+ brw_ADD(&func, vec16(retype(Y, BRW_REGISTER_TYPE_UW)),<br>
+ stride(suboffset(R1, 5), 2, 4, 0), brw_imm_v(0x11001100));<br>
+<br>
+ /* Move the coordinates to UD registers. */<br>
+ brw_MOV(&func, vec16(Xp), retype(X, BRW_REGISTER_TYPE_UW));<br>
+ brw_MOV(&func, vec16(Yp), retype(Y, BRW_REGISTER_TYPE_UW));<br>
+ SWAP_XY_AND_XPYP();<br>
<br>
if (key->persample_msaa_dispatch) {<br>
switch (key->rt_samples) {<br>
- case 4:<br>
+ case 4: {<br>
/* The WM will be run in MSDISPMODE_PERSAMPLE with num_samples == 4.<br>
* Therefore, subspan 0 will represent sample 0, subspan 1 will<br>
* represent sample 1, and so on.<br>
@@ -966,9 +978,13 @@ brw_blorp_blit_program::compute_frag_coords()<br>
* populate a temporary variable with the sequence (0, 1, 2, 3), and<br>
* then copy from it using vstride=1, width=4, hstride=0.<br>
*/<br>
- brw_MOV(&func, t1, brw_imm_v(0x3210));<br>
- brw_MOV(&func, S, stride(t1, 1, 4, 0));<br>
+ struct brw_reg t1_uw1 = retype(t1, BRW_REGISTER_TYPE_UW);<br>
+ brw_MOV(&func, vec16(t1_uw1), brw_imm_v(0x3210));<br>
+ /* Move to UD sample_index register. */<br>
+ brw_MOV(&func, S, stride(t1_uw1, 1, 4, 0));<br>
+ brw_MOV(&func, offset(S, 1), suboffset(stride(t1_uw1, 1, 4, 0), 2));<br>
break;<br>
+ }<br>
case 8: {<br>
/* The WM will be run in MSDISPMODE_PERSAMPLE with num_samples == 8.<br>
* Therefore, subspan 0 will represent sample N (where N is 0 or 4),<br>
@@ -984,12 +1000,16 @@ brw_blorp_blit_program::compute_frag_coords()<br>
* using vstride=1, width=4, hstride=0.<br>
*/<br>
struct brw_reg t1_ud1 = vec1(retype(t1, BRW_REGISTER_TYPE_UD));<br>
+ struct brw_reg t2_uw1 = retype(t2, BRW_REGISTER_TYPE_UW);<br>
struct brw_reg r0_ud1 = vec1(retype(R0, BRW_REGISTER_TYPE_UD));<br>
brw_AND(&func, t1_ud1, r0_ud1, brw_imm_ud(0xc0));<br>
brw_SHR(&func, t1_ud1, t1_ud1, brw_imm_ud(5));<br>
- brw_MOV(&func, t2, brw_imm_v(0x3210));<br>
- brw_ADD(&func, S, retype(t1_ud1, BRW_REGISTER_TYPE_UW),<br>
- stride(t2, 1, 4, 0));<br>
+ brw_MOV(&func, vec16(t2_uw1), brw_imm_v(0x3210));<br>
+ brw_ADD(&func, vec16(S), retype(t1_ud1, BRW_REGISTER_TYPE_UW),<br>
+ stride(t2_uw1, 1, 4, 0));<br>
+ brw_ADD(&func, offset(S, 1),<br>
+ retype(t1_ud1, BRW_REGISTER_TYPE_UW),<br>
+ suboffset(stride(t2_uw1, 1, 4, 0), 2));<br>
break;<br>
}<br>
default:<br>
@@ -1031,6 +1051,7 @@ brw_blorp_blit_program::translate_tiling(bool old_tiled_w, bool new_tiled_w)<br>
*/<br>
assert(s_is_zero);<br>
<br>
+ brw_set_compression_control(&func, BRW_COMPRESSION_COMPRESSED);<br>
if (new_tiled_w) {<br>
/* Given X and Y coordinates that describe an address using Y tiling,<br>
* translate to the X and Y coordinates that describe the same address<br>
@@ -1100,6 +1121,7 @@ brw_blorp_blit_program::translate_tiling(bool old_tiled_w, bool new_tiled_w)<br>
brw_OR(&func, Yp, t1, t2);<br>
SWAP_XY_AND_XPYP();<br>
}<br>
+ brw_set_compression_control(&func, BRW_COMPRESSION_NONE);<br>
}<br>
<br>
/**<br>
@@ -1116,6 +1138,7 @@ void<br>
brw_blorp_blit_program::encode_msaa(unsigned num_samples,<br>
intel_msaa_layout layout)<br>
{<br>
+ brw_set_compression_control(&func, BRW_COMPRESSION_COMPRESSED);<br>
switch (layout) {<br>
case INTEL_MSAA_LAYOUT_NONE:<br>
/* No translation necessary, and S should already be zero. */<br>
@@ -1187,6 +1210,7 @@ brw_blorp_blit_program::encode_msaa(unsigned num_samples,<br>
s_is_zero = true;<br>
break;<br>
}<br>
+ brw_set_compression_control(&func, BRW_COMPRESSION_NONE);<br>
}<br>
<br>
/**<br>
@@ -1203,6 +1227,7 @@ void<br>
brw_blorp_blit_program::decode_msaa(unsigned num_samples,<br>
intel_msaa_layout layout)<br>
{<br>
+ brw_set_compression_control(&func, BRW_COMPRESSION_COMPRESSED);<br>
switch (layout) {<br>
case INTEL_MSAA_LAYOUT_NONE:<br>
/* No translation necessary, and S should already be zero. */<br>
@@ -1265,6 +1290,7 @@ brw_blorp_blit_program::decode_msaa(unsigned num_samples,<br>
SWAP_XY_AND_XPYP();<br>
break;<br>
}<br>
+ brw_set_compression_control(&func, BRW_COMPRESSION_NONE);<br>
}<br>
<br>
/**<br>
@@ -1277,12 +1303,12 @@ brw_blorp_blit_program::kill_if_outside_dst_rect()<br>
{<br>
struct brw_reg f0 = brw_flag_reg(0, 0);<br>
struct brw_reg g1 = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);<br>
- struct brw_reg null16 = vec16(retype(brw_null_reg(), BRW_REGISTER_TYPE_UW));<br>
+ struct brw_reg null32 = vec16(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));<br>
<br>
- brw_CMP(&func, null16, BRW_CONDITIONAL_GE, X, dst_x0);<br>
- brw_CMP(&func, null16, BRW_CONDITIONAL_GE, Y, dst_y0);<br>
- brw_CMP(&func, null16, BRW_CONDITIONAL_L, X, dst_x1);<br>
- brw_CMP(&func, null16, BRW_CONDITIONAL_L, Y, dst_y1);<br>
+ brw_CMP(&func, null32, BRW_CONDITIONAL_GE, X, dst_x0);<br>
+ brw_CMP(&func, null32, BRW_CONDITIONAL_GE, Y, dst_y0);<br>
+ brw_CMP(&func, null32, BRW_CONDITIONAL_L, X, dst_x1);<br>
+ brw_CMP(&func, null32, BRW_CONDITIONAL_L, Y, dst_y1);<br>
<br>
brw_set_predicate_control(&func, BRW_PREDICATE_NONE);<br>
brw_push_insn_state(&func);<br>
@@ -1296,12 +1322,28 @@ brw_blorp_blit_program::kill_if_outside_dst_rect()<br>
* coordinates.<br>
*/<br>
void<br>
-brw_blorp_blit_program::translate_dst_to_src()<br>
+brw_blorp_blit_program::translate_dst_to_src(unsigned intel_gen)<br>
{<br>
- brw_MUL(&func, Xp, X, x_transform.multiplier);<br>
- brw_MUL(&func, Yp, Y, y_transform.multiplier);<br>
+ brw_set_compression_control(&func, BRW_COMPRESSION_COMPRESSED);<br>
+ /* For mul instruction:<br>
+ * On SNB when both src0 and src1 are of type D or UD, only the low 16 bits<br>
+ * of each element of src0 are used.<br>
+ * On IVB when both src0 and src1 are of type D or UD, only the low 16 bits<br>
+ * of each element of src1 are used.<br>
+ * multiplier can be positive or negative. So keep the multiplier in a src<br>
+ * register which don't get truncated during multiplication.<br>
+ */<br>
+ if (intel_gen == 6) {<br>
+ brw_MUL(&func, Xp, X, x_transform.multiplier);<br>
+ brw_MUL(&func, Yp, Y, y_transform.multiplier);<br>
+ }<br>
+ else {<br>
+ brw_MUL(&func, Xp, x_transform.multiplier, X);<br>
+ brw_MUL(&func, Yp, y_transform.multiplier, Y);<br>
+ }<br>
brw_ADD(&func, Xp, Xp, x_transform.offset);<br>
brw_ADD(&func, Yp, Yp, y_transform.offset);<br>
+ brw_set_compression_control(&func, BRW_COMPRESSION_NONE);<br>
SWAP_XY_AND_XPYP();<br>
}<br>
<br>
@@ -1318,10 +1360,12 @@ brw_blorp_blit_program::single_to_blend()<br>
* that maxe up a pixel). So we need to multiply our X and Y coordinates<br>
* each by 2 and then add 1.<br>
*/<br>
+ brw_set_compression_control(&func, BRW_COMPRESSION_COMPRESSED);<br>
brw_SHL(&func, t1, X, brw_imm_w(1));<br>
brw_SHL(&func, t2, Y, brw_imm_w(1));<br>
brw_ADD(&func, Xp, t1, brw_imm_w(1));<br>
brw_ADD(&func, Yp, t2, brw_imm_w(1));<br>
+ brw_set_compression_control(&func, BRW_COMPRESSION_NONE);<br>
SWAP_XY_AND_XPYP();<br>
}<br>
<br>
@@ -1394,7 +1438,7 @@ brw_blorp_blit_program::manual_blend(unsigned num_samples)<br>
s_is_zero = true;<br>
} else {<br>
s_is_zero = false;<br>
- brw_MOV(&func, S, brw_imm_uw(i));<br>
+ brw_MOV(&func, vec16(S), brw_imm_ud(i));<br>
}<br>
texel_fetch(texture_data[stack_depth++]);<br>
<br>
@@ -1546,16 +1590,6 @@ brw_blorp_blit_program::mcs_fetch()<br>
}<br>
<br>
void<br>
-brw_blorp_blit_program::expand_to_32_bits(struct brw_reg src,<br>
- struct brw_reg dst)<br>
-{<br>
- brw_MOV(&func, vec8(dst), vec8(src));<br>
- brw_set_compression_control(&func, BRW_COMPRESSION_2NDHALF);<br>
- brw_MOV(&func, offset(vec8(dst), 1), suboffset(vec8(src), 8));<br>
- brw_set_compression_control(&func, BRW_COMPRESSION_NONE);<br>
-}<br>
-<br>
-void<br>
brw_blorp_blit_program::texture_lookup(struct brw_reg dst,<br>
GLuint msg_type,<br>
const sampler_message_arg *args,<br>
@@ -1566,16 +1600,16 @@ brw_blorp_blit_program::texture_lookup(struct brw_reg dst,<br>
for (int arg = 0; arg < num_args; ++arg) {<br>
switch (args[arg]) {<br>
case SAMPLER_MESSAGE_ARG_U_FLOAT:<br>
- expand_to_32_bits(X, retype(mrf, BRW_REGISTER_TYPE_F));<br>
+ brw_MOV(&func, retype(mrf, BRW_REGISTER_TYPE_F), X);<br>
break;<br>
case SAMPLER_MESSAGE_ARG_V_FLOAT:<br>
- expand_to_32_bits(Y, retype(mrf, BRW_REGISTER_TYPE_F));<br>
+ brw_MOV(&func, retype(mrf, BRW_REGISTER_TYPE_F), Y);<br>
break;<br>
case SAMPLER_MESSAGE_ARG_U_INT:<br>
- expand_to_32_bits(X, mrf);<br>
+ brw_MOV(&func, mrf, X);<br>
break;<br>
case SAMPLER_MESSAGE_ARG_V_INT:<br>
- expand_to_32_bits(Y, mrf);<br>
+ brw_MOV(&func, mrf, Y);<br>
break;<br>
case SAMPLER_MESSAGE_ARG_SI_INT:<br>
/* Note: on Gen7, this code may be reached with s_is_zero==true<br>
@@ -1586,7 +1620,7 @@ brw_blorp_blit_program::texture_lookup(struct brw_reg dst,<br>
if (s_is_zero)<br>
brw_MOV(&func, mrf, brw_imm_ud(0));<br>
else<br>
- expand_to_32_bits(S, mrf);<br>
+ brw_MOV(&func, mrf, S);<br>
break;<br>
case SAMPLER_MESSAGE_ARG_MCS_INT:<br>
switch (key->tex_layout) {<br>
@@ -1614,7 +1648,7 @@ brw_blorp_blit_program::texture_lookup(struct brw_reg dst,<br>
}<br>
<br>
brw_SAMPLE(&func,<br>
- retype(dst, BRW_REGISTER_TYPE_UW) /* dest */,<br>
+ retype(dst, BRW_REGISTER_TYPE_F) /* dest */,<br>
base_mrf /* msg_reg_nr */,<br>
brw_message_reg(base_mrf) /* src0 */,<br>
BRW_BLORP_TEXTURE_BINDING_TABLE_INDEX,<br>
@@ -1685,7 +1719,7 @@ brw_blorp_coord_transform_params::setup(GLuint src0, GLuint dst0, GLuint dst1,<br>
* x' = 1*x + (src_x0 - dst_x0)<br>
*/<br>
multiplier = 1;<br>
- offset = src0 - dst0;<br>
+ offset = (int) (src0 - dst0);<br>
} else {<br>
/* When mirroring X we need:<br>
* x' - src_x0 = dst_x1 - x - 1<br>
diff --git a/src/mesa/drivers/dri/i965/brw_reg.h b/src/mesa/drivers/dri/i965/brw_reg.h<br>
index 9ac2544..972ccf6 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_reg.h<br>
+++ b/src/mesa/drivers/dri/i965/brw_reg.h<br>
@@ -344,6 +344,13 @@ brw_uw1_reg(unsigned file, unsigned nr, unsigned subnr)<br>
return suboffset(retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr);<br>
}<br>
<br>
+/** Construct unsigned dword[1] register */<br>
+static inline struct brw_reg<br>
+brw_ud1_reg(unsigned file, unsigned nr, unsigned subnr)<br>
+{<br>
+ return suboffset(retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UD), subnr);<br>
+}<br>
+<br>
static inline struct brw_reg<br>
brw_imm_reg(unsigned type)<br>
{<br>
<span class=""><font color="#888888">--<br>
1.8.1.4<br>
<br>
</font></span></blockquote></div><br></div></div>