[Mesa-dev] [PATCH 09/10] i965/fs: Calculate delta_x and delta_y together.
Kenneth Graunke
kenneth at whitecape.org
Fri Apr 17 11:58:07 PDT 2015
On Friday, April 17, 2015 11:45:57 AM Jason Ekstrand wrote:
> On Tue, Apr 14, 2015 at 4:15 PM, Matt Turner <mattst88 at gmail.com> wrote:
> > This lets SIMD16 programs on G45 and Gen5 use the PLN instruction.
>
> This patch also changes the meaning of FS_OPCODE_LINTERP. Those
> chainges should be described in the commit message as well.
>
> > On Ironlake:
> >
> > total instructions in shared programs: 5634757 -> 5518055 (-2.07%)
> > instructions in affected programs: 1745837 -> 1629135 (-6.68%)
> > helped: 11439
> > HURT: 4
> > ---
> > src/mesa/drivers/dri/i965/brw_fs.cpp | 46 +++++++-------------
> > src/mesa/drivers/dri/i965/brw_fs.h | 3 +-
> > src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 5 +--
> > src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 13 +++---
> > src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 8 ++--
> > src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 51 +++++++++++------------
> > src/mesa/drivers/dri/i965/brw_reg.h | 7 ++++
> > 7 files changed, 59 insertions(+), 74 deletions(-)
> >
> > diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
> > index 5ab8701..6e55f67 100644
> > --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> > +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> > @@ -1259,8 +1259,7 @@ fs_visitor::emit_fragcoord_interpolation(bool pixel_center_integer,
> > emit(MOV(wpos, fs_reg(brw_vec8_grf(payload.source_depth_reg, 0))));
> > } else {
> > emit(FS_OPCODE_LINTERP, wpos,
> > - this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
> > - this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
> > + this->delta_xy[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
> > interp_reg(VARYING_SLOT_POS, 2));
> > }
> > wpos = offset(wpos, 1);
> > @@ -1302,8 +1301,7 @@ fs_visitor::emit_linterp(const fs_reg &attr, const fs_reg &interp,
> > barycoord_mode = BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC;
> > }
> > return emit(FS_OPCODE_LINTERP, attr,
> > - this->delta_x[barycoord_mode],
> > - this->delta_y[barycoord_mode], interp);
> > + this->delta_xy[barycoord_mode], interp);
> > }
> >
> > void
> > @@ -1851,8 +1849,8 @@ fs_visitor::assign_urb_setup()
> > */
> > foreach_block_and_inst(block, fs_inst, inst, cfg) {
> > if (inst->opcode == FS_OPCODE_LINTERP) {
> > - assert(inst->src[2].file == HW_REG);
> > - inst->src[2].fixed_hw_reg.nr += urb_start;
> > + assert(inst->src[1].file == HW_REG);
> > + inst->src[1].fixed_hw_reg.nr += urb_start;
> > }
> >
> > if (inst->opcode == FS_OPCODE_CINTERP) {
> > @@ -2106,25 +2104,16 @@ fs_visitor::compact_virtual_grfs()
> > }
> > }
> >
> > - /* Patch all the references to delta_x/delta_y, since they're used in
> > - * register allocation. If they're unused, switch them to BAD_FILE so
> > - * we don't think some random VGRF is delta_x/delta_y.
> > + /* Patch all the references to delta_xy, since they're used in register
> > + * allocation. If they're unused, switch them to BAD_FILE so we don't
> > + * think some random VGRF is delta_xy.
> > */
> > - for (unsigned i = 0; i < ARRAY_SIZE(delta_x); i++) {
> > - if (delta_x[i].file == GRF) {
> > - if (remap_table[delta_x[i].reg] != -1) {
> > - delta_x[i].reg = remap_table[delta_x[i].reg];
> > + for (unsigned i = 0; i < ARRAY_SIZE(delta_xy); i++) {
> > + if (delta_xy[i].file == GRF) {
> > + if (remap_table[delta_xy[i].reg] != -1) {
> > + delta_xy[i].reg = remap_table[delta_xy[i].reg];
> > } else {
> > - delta_x[i].file = BAD_FILE;
> > - }
> > - }
> > - }
> > - for (unsigned i = 0; i < ARRAY_SIZE(delta_y); i++) {
> > - if (delta_y[i].file == GRF) {
> > - if (remap_table[delta_y[i].reg] != -1) {
> > - delta_y[i].reg = remap_table[delta_y[i].reg];
> > - } else {
> > - delta_y[i].file = BAD_FILE;
> > + delta_xy[i].file = BAD_FILE;
> > }
> > }
> > }
> > @@ -2589,14 +2578,9 @@ fs_visitor::opt_register_renaming()
> > if (progress) {
> > invalidate_live_intervals();
> >
> > - for (unsigned i = 0; i < ARRAY_SIZE(delta_x); i++) {
> > - if (delta_x[i].file == GRF && remap[delta_x[i].reg] != -1) {
> > - delta_x[i].reg = remap[delta_x[i].reg];
> > - }
> > - }
> > - for (unsigned i = 0; i < ARRAY_SIZE(delta_y); i++) {
> > - if (delta_y[i].file == GRF && remap[delta_y[i].reg] != -1) {
> > - delta_y[i].reg = remap[delta_y[i].reg];
> > + for (unsigned i = 0; i < ARRAY_SIZE(delta_xy); i++) {
> > + if (delta_xy[i].file == GRF && remap[delta_xy[i].reg] != -1) {
> > + delta_xy[i].reg = remap[delta_xy[i].reg];
> > }
> > }
> > }
> > diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
> > index b7c1c39..e04691c 100644
> > --- a/src/mesa/drivers/dri/i965/brw_fs.h
> > +++ b/src/mesa/drivers/dri/i965/brw_fs.h
> > @@ -512,8 +512,7 @@ public:
> > fs_reg pixel_y;
> > fs_reg wpos_w;
> > fs_reg pixel_w;
> > - fs_reg delta_x[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT];
> > - fs_reg delta_y[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT];
> > + fs_reg delta_xy[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT];
> > fs_reg shader_start_time;
> > fs_reg userplane[MAX_CLIP_PLANES];
> >
> > diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> > index fc9597e..dba3286 100644
> > --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> > +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> > @@ -392,11 +392,10 @@ fs_generator::generate_linterp(fs_inst *inst,
> > struct brw_reg dst, struct brw_reg *src)
> > {
> > struct brw_reg delta_x = src[0];
> > - struct brw_reg delta_y = src[1];
> > - struct brw_reg interp = src[2];
> > + struct brw_reg delta_y = offset(src[0], dispatch_width / 8);
>
> This is an awkward use of offset(). It's supposed to take the width
> into account for you. Why do you need to do it explicitly? If
> there's something funny going on here, it deserves a comment.
Oh the confusion!
I believe Matt is using brw_reg.h's offset(), which works on brw_regs:
static inline struct brw_reg
offset(struct brw_reg reg, unsigned delta)
{
reg.nr += delta;
return reg;
}
while you're talking about brw_ir_fs.h's offset(), which works on
fs_regs:
static inline fs_reg
offset(fs_reg reg, unsigned delta)
{
...piles of code...
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: This is a digitally signed message part.
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20150417/565f7846/attachment.sig>
More information about the mesa-dev
mailing list