[Mesa-dev] [PATCH 21/32] i965/vec4: Fix the scheduler to take into account reads and writes of multiple registers.
Francisco Jerez
currojerez at riseup.net
Mon Feb 9 05:38:02 PST 2015
Matt Turner <mattst88 at gmail.com> writes:
> On Fri, Feb 6, 2015 at 6:43 AM, Francisco Jerez <currojerez at riseup.net> wrote:
>> ---
>> src/mesa/drivers/dri/i965/brw_ir_vec4.h | 1 +
>> src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp | 15 ++++++++++-----
>> src/mesa/drivers/dri/i965/brw_vec4.cpp | 17 +++++++++++++++++
>> 3 files changed, 28 insertions(+), 5 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_ir_vec4.h b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
>> index f11a2d2..941086f 100644
>> --- a/src/mesa/drivers/dri/i965/brw_ir_vec4.h
>> +++ b/src/mesa/drivers/dri/i965/brw_ir_vec4.h
>> @@ -171,6 +171,7 @@ public:
>> unsigned sol_vertex; /**< gen6: used for setting dst index in SVB header */
>>
>> bool is_send_from_grf();
>> + unsigned regs_read(unsigned arg) const;
>> bool can_reswizzle(int dst_writemask, int swizzle, int swizzle_mask);
>> void reswizzle(int dst_writemask, int swizzle);
>> bool can_do_source_mods(struct brw_context *brw);
>> diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
>> index 88feeca..2b22b2c 100644
>> --- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
>> @@ -1063,7 +1063,8 @@ vec4_instruction_scheduler::calculate_deps()
>> /* read-after-write deps. */
>> for (int i = 0; i < 3; i++) {
>> if (inst->src[i].file == GRF) {
>> - add_dep(last_grf_write[inst->src[i].reg], n);
>> + for (unsigned j = 0; j < inst->regs_read(i); ++j)
>> + add_dep(last_grf_write[inst->src[i].reg + j], n);
>> } else if (inst->src[i].file == HW_REG &&
>> (inst->src[i].fixed_hw_reg.file ==
>> BRW_GENERAL_REGISTER_FILE)) {
>> @@ -1103,8 +1104,10 @@ vec4_instruction_scheduler::calculate_deps()
>>
>> /* write-after-write deps. */
>> if (inst->dst.file == GRF) {
>> - add_dep(last_grf_write[inst->dst.reg], n);
>> - last_grf_write[inst->dst.reg] = n;
>> + for (unsigned j = 0; j < inst->regs_written; ++j) {
>> + add_dep(last_grf_write[inst->dst.reg + j], n);
>> + last_grf_write[inst->dst.reg + j] = n;
>> + }
>> } else if (inst->dst.file == MRF) {
>> add_dep(last_mrf_write[inst->dst.reg], n);
>> last_mrf_write[inst->dst.reg] = n;
>> @@ -1156,7 +1159,8 @@ vec4_instruction_scheduler::calculate_deps()
>> /* write-after-read deps. */
>> for (int i = 0; i < 3; i++) {
>> if (inst->src[i].file == GRF) {
>> - add_dep(n, last_grf_write[inst->src[i].reg]);
>> + for (unsigned j = 0; j < inst->regs_read(i); ++j)
>> + add_dep(n, last_grf_write[inst->src[i].reg + j]);
>> } else if (inst->src[i].file == HW_REG &&
>> (inst->src[i].fixed_hw_reg.file ==
>> BRW_GENERAL_REGISTER_FILE)) {
>> @@ -1194,7 +1198,8 @@ vec4_instruction_scheduler::calculate_deps()
>> * can mark this as WAR dependency.
>> */
>> if (inst->dst.file == GRF) {
>> - last_grf_write[inst->dst.reg] = n;
>> + for (unsigned j = 0; j < inst->regs_written; ++j)
>> + last_grf_write[inst->dst.reg + j] = n;
>> } else if (inst->dst.file == MRF) {
>> last_mrf_write[inst->dst.reg] = n;
>> } else if (inst->dst.file == HW_REG &&
>> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
>> index a4fd136..b2f3151 100644
>> --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
>> @@ -262,6 +262,23 @@ vec4_instruction::is_send_from_grf()
>> }
>> }
>>
>> +unsigned
>> +vec4_instruction::regs_read(unsigned arg) const
>> +{
>> + switch (opcode) {
>> + case SHADER_OPCODE_SHADER_TIME_ADD:
>> + return (arg == 0 ? mlen :
>> + src[arg].file != BAD_FILE ? 1 : 0);
>> +
>> + case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
>> + return (arg == 1 ? mlen :
>> + src[arg].file != BAD_FILE ? 1 : 0);
>> +
>> + default:
>> + return (src[arg].file != BAD_FILE ? 1 : 0);
>> + }
>> +}
>
> Better to just do
>
> if (src[arg].file == BAD_FILE)
> return 0;
>
> before the switch statement to avoid the nested ternary, or do you
> actually need to test arg == 0/1 before checking .file in those two
> cases?
Yeah, I've fixed that locally, thanks.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 212 bytes
Desc: not available
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20150209/017dc3c3/attachment.sig>
More information about the mesa-dev
mailing list