[Mesa-dev] [PATCH 09/22] i965/fs: add lowering x2d step for IVB/VLV
Samuel Iglesias Gonsálvez
siglesias at igalia.com
Thu Jan 5 13:07:29 UTC 2017
From: "Juan A. Suarez Romero" <jasuarez at igalia.com>
On Ivybridge/Valleyview, when converting a float (F) to a double
precision float (DF), the hardware automatically duplicates the source
horizontal stride, hence converting only the values in odd positions.
This commit adds a new lowering step, exclusively for IVB/VLV, where the
sources are first copied in a temporal register with stride 2, and
then converted from this temporal register. Thus, we do not lose any
value.
---
src/mesa/drivers/dri/i965/Makefile.sources | 1 +
src/mesa/drivers/dri/i965/brw_fs.cpp | 4 +-
src/mesa/drivers/dri/i965/brw_fs.h | 1 +
src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 24 ++++++-
src/mesa/drivers/dri/i965/brw_fs_lower_ivb_x2d.cpp | 80 ++++++++++++++++++++++
5 files changed, 107 insertions(+), 3 deletions(-)
create mode 100644 src/mesa/drivers/dri/i965/brw_fs_lower_ivb_x2d.cpp
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index dd54682..1366fe9 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -25,6 +25,7 @@ i965_compiler_FILES = \
brw_fs_live_variables.cpp \
brw_fs_live_variables.h \
brw_fs_lower_d2x.cpp \
+ brw_fs_lower_ivb_x2d.cpp \
brw_fs_lower_pack.cpp \
brw_fs_nir.cpp \
brw_fs_reg_allocate.cpp \
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 45d320d..9afab4d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -5775,8 +5775,10 @@ fs_visitor::optimize()
* code has a bug in this hardware that is fixed later in the
* lower_simd_width step.
*/
- if (devinfo->gen == 7 && !devinfo->is_haswell)
+ if (devinfo->gen == 7 && !devinfo->is_haswell) {
+ OPT(lower_ivb_x2d);
OPT(lower_ivb_64bit_scalar);
+ }
OPT(lower_simd_width);
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 801e354..b5a67ad 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -131,6 +131,7 @@ public:
void validate();
bool opt_algebraic();
bool lower_ivb_64bit_scalar();
+ bool lower_ivb_x2d();
bool opt_redundant_discard_jumps();
bool opt_cse();
bool opt_cse_local(bblock_t *block);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 6967584..1e7eccc 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -67,6 +67,26 @@ brw_reg_from_fs_reg(const struct brw_compiler *compiler, fs_inst *inst,
if (reg->stride == 0) {
brw_reg = brw_vec1_reg(brw_file_from_reg(reg), reg->nr, 0);
} else {
+ unsigned reg_stride;
+
+ /* When converting from F->DF, in IVB/VLV the source is strided
+ * 2. But now we set it to 1 because the hardware will already double
+ * it internally.
+ */
+ if (compiler->devinfo->gen == 7 &&
+ !compiler->devinfo->is_haswell &&
+ inst->opcode == BRW_OPCODE_MOV &&
+ inst->dst.type == BRW_REGISTER_TYPE_DF &&
+ reg->file != BRW_IMMEDIATE_VALUE &&
+ (reg->type == BRW_REGISTER_TYPE_F ||
+ reg->type == BRW_REGISTER_TYPE_D ||
+ reg->type == BRW_REGISTER_TYPE_UD)) {
+ assert(reg->stride == 2);
+ reg_stride = 1;
+ } else {
+ reg_stride = reg->stride;
+ }
+
/* From the Haswell PRM:
*
* "VertStride must be used to cross GRF register boundaries. This
@@ -75,7 +95,7 @@ brw_reg_from_fs_reg(const struct brw_compiler *compiler, fs_inst *inst,
*
* The maximum width value that could satisfy this restriction is:
*/
- const unsigned reg_width = REG_SIZE / (reg->stride * type_sz(reg->type));
+ const unsigned reg_width = REG_SIZE / (reg_stride * type_sz(reg->type));
/* Because the hardware can only split source regions at a whole
* multiple of width during decompression (i.e. vertically), clamp
@@ -93,7 +113,7 @@ brw_reg_from_fs_reg(const struct brw_compiler *compiler, fs_inst *inst,
*/
const unsigned width = MIN2(reg_width, phys_width);
brw_reg = brw_vecn_reg(width, brw_file_from_reg(reg), reg->nr, 0);
- brw_reg = stride(brw_reg, width * reg->stride, width, reg->stride);
+ brw_reg = stride(brw_reg, width * reg_stride, width, reg_stride);
/* From the Ivy PRM (EU Changes by Processor Generation, page 13):
* "Each DF (Double Float) operand uses an element size of 4 rather
* than 8 and all regioning parameters are twice what the values
diff --git a/src/mesa/drivers/dri/i965/brw_fs_lower_ivb_x2d.cpp b/src/mesa/drivers/dri/i965/brw_fs_lower_ivb_x2d.cpp
new file mode 100644
index 0000000..7b47fff
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_fs_lower_ivb_x2d.cpp
@@ -0,0 +1,80 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_fs.h"
+#include "brw_cfg.h"
+#include "brw_fs_builder.h"
+
+using namespace brw;
+
+bool
+fs_visitor::lower_ivb_x2d()
+{
+ bool progress = false;
+
+ assert(devinfo->gen == 7 && !devinfo->is_haswell);
+
+ foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {
+ if (inst->opcode != BRW_OPCODE_MOV)
+ continue;
+
+ if (inst->dst.type != BRW_REGISTER_TYPE_DF)
+ continue;
+
+ if (inst->src[0].type != BRW_REGISTER_TYPE_F &&
+ inst->src[0].type != BRW_REGISTER_TYPE_D &&
+ inst->src[0].type != BRW_REGISTER_TYPE_UD)
+ continue;
+
+ assert(inst->dst.file == VGRF);
+ assert(inst->saturate == false);
+
+ fs_reg dst = inst->dst;
+
+ const fs_builder ibld(this, block, inst);
+
+ /* In Ivybridge, converting 4 single-precision type values to 4
+ * double-precision type values require to set exec_size to 8 in the
+ * generated assembler:
+ *
+ * mov(8) g9<1>:DF g5<4,4,1>
+ *
+ * Internally, the hardware duplicates the horizontal stride, hence
+ * converting just one out of two values. To avoid missing values, we
+ * copy first the values in a temporal register strided to 2, and then
+ * perform the conversion from there.
+ */
+ fs_reg temp = ibld.vgrf(inst->dst.type, 1);
+ fs_reg strided_temp = subscript(temp, inst->src[0].type, 0);
+ ibld.MOV(strided_temp, inst->src[0]);
+ ibld.MOV(dst, strided_temp);
+
+ inst->remove(block);
+ progress = true;
+ }
+
+ if (progress)
+ invalidate_live_intervals();
+
+ return progress;
+}
--
2.9.3
More information about the mesa-dev
mailing list