[Mesa-dev] [PATCH 11/17] intel/compiler/fs: Simplify ddx/ddy code generation
Matt Turner
mattst88 at gmail.com
Mon Feb 26 19:27:56 UTC 2018
On Fri, Feb 23, 2018 at 4:37 PM, Kenneth Graunke <kenneth at whitecape.org> wrote:
> On Tuesday, February 20, 2018 9:15:18 PM PST Matt Turner wrote:
>> The brw_reg() constructor just obfuscates things here, in my opinion.
>> ---
>> src/intel/compiler/brw_fs_generator.cpp | 77 +++++++++++++++------------------
>> 1 file changed, 35 insertions(+), 42 deletions(-)
>>
>> diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp
>> index e5a5a76a932..013d2c820a0 100644
>> --- a/src/intel/compiler/brw_fs_generator.cpp
>> +++ b/src/intel/compiler/brw_fs_generator.cpp
>> @@ -1168,20 +1168,17 @@ fs_generator::generate_ddx(const fs_inst *inst,
>> width = BRW_WIDTH_4;
>> }
>>
>> - struct brw_reg src0 = brw_reg(src.file, src.nr, 1,
>> - src.negate, src.abs,
>> - BRW_REGISTER_TYPE_F,
>> - vstride,
>> - width,
>> - BRW_HORIZONTAL_STRIDE_0,
>> - BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
>> - struct brw_reg src1 = brw_reg(src.file, src.nr, 0,
>> - src.negate, src.abs,
>> - BRW_REGISTER_TYPE_F,
>> - vstride,
>> - width,
>> - BRW_HORIZONTAL_STRIDE_0,
>> - BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
>> + struct brw_reg src0 = src;
>> + struct brw_reg src1 = src;
>> +
>> + src0.subnr = sizeof(float);
>> + src0.vstride = vstride;
>> + src0.width = width;
>> + src0.hstride = BRW_HORIZONTAL_STRIDE_0;
>> + src1.vstride = vstride;
>> + src1.width = width;
>> + src1.hstride = BRW_HORIZONTAL_STRIDE_0;
>> +
>
> I would like to see an explicit src1.subnr = 0 * sizeof(float) here.
>
>> brw_ADD(p, dst, src0, negate(src1));
>> }
>>
>> @@ -1195,40 +1192,36 @@ fs_generator::generate_ddy(const fs_inst *inst,
>> {
>> if (inst->opcode == FS_OPCODE_DDY_FINE) {
>> /* produce accurate derivatives */
>> - struct brw_reg src0 = brw_reg(src.file, src.nr, 0,
>> - src.negate, src.abs,
>> - BRW_REGISTER_TYPE_F,
>> - BRW_VERTICAL_STRIDE_4,
>> - BRW_WIDTH_4,
>> - BRW_HORIZONTAL_STRIDE_1,
>> - BRW_SWIZZLE_XYXY, WRITEMASK_XYZW);
>> - struct brw_reg src1 = brw_reg(src.file, src.nr, 0,
>> - src.negate, src.abs,
>> - BRW_REGISTER_TYPE_F,
>> - BRW_VERTICAL_STRIDE_4,
>> - BRW_WIDTH_4,
>> - BRW_HORIZONTAL_STRIDE_1,
>> - BRW_SWIZZLE_ZWZW, WRITEMASK_XYZW);
>> + struct brw_reg src0 = src;
>> + struct brw_reg src1 = src;
>> +
>> + src0.swizzle = BRW_SWIZZLE_XYXY;
>> + src0.vstride = BRW_VERTICAL_STRIDE_4;
>> + src0.width = BRW_WIDTH_4;
>> + src0.hstride = BRW_HORIZONTAL_STRIDE_1;
>> +
>> + src1.swizzle = BRW_SWIZZLE_ZWZW;
>> + src1.vstride = BRW_VERTICAL_STRIDE_4;
>> + src1.width = BRW_WIDTH_4;
>> + src1.hstride = BRW_HORIZONTAL_STRIDE_1;
>> +
>
> I agree that the full brw_reg constructor is clunky here, but it might
> look a bit nicer still as:
>
> struct brw_reg src0 = stride(src, 4, 4, 1);
> struct brw_reg src1 = stride(src, 4, 4, 1);
> src0.swizzle = BRW_SWIZZLE_XYXY;
> src1.swizzle = BRW_SWIZZLE_ZWZW;
>
>> brw_push_insn_state(p);
>> brw_set_default_access_mode(p, BRW_ALIGN_16);
>> brw_ADD(p, dst, negate(src0), src1);
>> brw_pop_insn_state(p);
>> } else {
>> /* replicate the derivative at the top-left pixel to other pixels */
>> - struct brw_reg src0 = brw_reg(src.file, src.nr, 0,
>> - src.negate, src.abs,
>> - BRW_REGISTER_TYPE_F,
>> - BRW_VERTICAL_STRIDE_4,
>> - BRW_WIDTH_4,
>> - BRW_HORIZONTAL_STRIDE_0,
>> - BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
>> - struct brw_reg src1 = brw_reg(src.file, src.nr, 2,
>> - src.negate, src.abs,
>> - BRW_REGISTER_TYPE_F,
>> - BRW_VERTICAL_STRIDE_4,
>> - BRW_WIDTH_4,
>> - BRW_HORIZONTAL_STRIDE_0,
>> - BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
>> + struct brw_reg src0 = src;
>> + struct brw_reg src1 = src;
>> +
>> + src0.vstride = BRW_VERTICAL_STRIDE_4;
>> + src0.width = BRW_WIDTH_4;
>> + src0.hstride = BRW_HORIZONTAL_STRIDE_0;
>> + src1.vstride = BRW_VERTICAL_STRIDE_4;
>> + src1.width = BRW_WIDTH_4;
>> + src1.hstride = BRW_HORIZONTAL_STRIDE_0;
>> + src1.subnr = 2 * sizeof(float);
>
> Could use stride() here too, if you like. I would like to see an
> explicit src0.subnr = 0 * sizeof(float) here as well.
>
> With the explicit subnr = 0's added, patches 10-11 are:
> Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>
Thanks. All good suggestions.
More information about the mesa-dev
mailing list