[Mesa-dev] [PATCH 7/7] i965/fs: Add empirically-determined instruction latencies for gen7.
Matt Turner
mattst88 at gmail.com
Fri Dec 7 16:38:37 PST 2012
On Fri, Dec 7, 2012 at 2:58 PM, Eric Anholt <eric at anholt.net> wrote:
> The limited performance testing I've done on this hasn't shown any
> statistically significant differences yet.
> ---
> .../dri/i965/brw_fs_schedule_instructions.cpp | 150 +++++++++++++++++++-
> 1 file changed, 147 insertions(+), 3 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
> index 3623c13..f3f0079 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
> @@ -57,7 +57,7 @@ static bool debug = false;
> class schedule_node : public exec_node
> {
> public:
> - schedule_node(fs_inst *inst)
> + schedule_node(fs_inst *inst, int gen)
> {
> this->inst = inst;
> this->child_array_size = 0;
> @@ -67,10 +67,14 @@ public:
> this->parent_count = 0;
> this->unblocked_time = 0;
>
> - set_latency_gen4();
> + if (gen >= 7)
> + set_latency_gen7();
> + else
> + set_latency_gen4();
> }
>
> void set_latency_gen4();
> + void set_latency_gen7();
>
> fs_inst *inst;
> schedule_node **children;
> @@ -120,6 +124,146 @@ schedule_node::set_latency_gen4()
> }
> }
>
> +void
> +schedule_node::set_latency_gen7()
> +{
> + switch (inst->opcode) {
> + case BRW_OPCODE_MAD:
> + /* 3 cycles (this is said to be 4 cycles sometimes depending on the
> + * register numbers in the sources):
> + * mad(8) g4<1>F g2.2<4,1,1>F.x g2<4,1,1>F.x g2.1<4,1,1>F.x { align16 WE_normal 1Q };
> + *
> + * 20 cycles:
> + * mad(8) g4<1>F g2.2<4,1,1>F.x g2<4,1,1>F.x g2.1<4,1,1>F.x { align16 WE_normal 1Q };
> + * mov(8) null g4<4,4,1>F { align16 WE_normal 1Q };
> + */
> + latency = 17;
> + break;
> +
> + case SHADER_OPCODE_RCP:
> + /* 2 cycles:
> + * math inv(8) g4<1>F g2<0,1,0>F null { align1 WE_normal 1Q };
> + *
> + * 18 cycles:
> + * math inv(8) g4<1>F g2<0,1,0>F null { align1 WE_normal 1Q };
> + * mov(8) null g4<8,8,1>F { align1 WE_normal 1Q };
> + *
> + * Same for exp2, log2, rsq, sqrt, sin, cos.
Should those be in the switch as well?
More information about the mesa-dev
mailing list