[Mesa-dev] [PATCH 29/51] intel/compiler/fs: Add register padding support
Topi Pohjolainen
topi.pohjolainen at gmail.com
Fri Nov 24 12:26:56 UTC 2017
Signed-off-by: Topi Pohjolainen <topi.pohjolainen at intel.com>
---
src/intel/compiler/brw_fs.cpp | 3 ++-
src/intel/compiler/brw_fs.h | 3 ++-
src/intel/compiler/brw_fs_builder.h | 25 ++++++++++++++++++-------
src/intel/compiler/brw_fs_copy_propagation.cpp | 1 +
src/intel/compiler/brw_fs_nir.cpp | 9 +++++++--
src/intel/compiler/brw_ir_fs.h | 3 +++
6 files changed, 33 insertions(+), 11 deletions(-)
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index cedfde5096..9c3410b698 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -440,6 +440,7 @@ fs_reg::fs_reg(struct ::brw_reg reg) :
{
this->offset = 0;
this->stride = 1;
+ this->pad_per_component = 0;
if (this->file == IMM &&
(this->type != BRW_REGISTER_TYPE_V &&
this->type != BRW_REGISTER_TYPE_UV &&
@@ -467,7 +468,7 @@ fs_reg::component_size(unsigned width) const
const unsigned stride = ((file != ARF && file != FIXED_GRF) ? this->stride :
hstride == 0 ? 0 :
1 << (hstride - 1));
- return MAX2(width * stride, 1) * type_sz(type);
+ return (MAX2(width * stride, 1) * (type_sz(type)) + pad_per_component);
}
/**
diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
index 30557324d5..d9c4f737e6 100644
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -231,7 +231,8 @@ public:
nir_jump_instr *instr);
fs_reg get_nir_src(const nir_src &src);
fs_reg get_nir_src_imm(const nir_src &src);
- fs_reg get_nir_dest(const nir_dest &dest);
+ fs_reg get_nir_dest(const nir_dest &dest,
+ bool pad_components_to_full_registers = false);
fs_reg get_nir_image_deref(const nir_deref_var *deref);
fs_reg get_indirect_offset(nir_intrinsic_instr *instr);
void emit_percomp(const brw::fs_builder &bld, const fs_inst &inst,
diff --git a/src/intel/compiler/brw_fs_builder.h b/src/intel/compiler/brw_fs_builder.h
index 633086c64b..804d52e5df 100644
--- a/src/intel/compiler/brw_fs_builder.h
+++ b/src/intel/compiler/brw_fs_builder.h
@@ -182,17 +182,28 @@ namespace brw {
* component in this IR).
*/
dst_reg
- vgrf(enum brw_reg_type type, unsigned n = 1) const
+ vgrf(enum brw_reg_type type,
+ unsigned n = 1,
+ bool pad_components_to_full_registers = false) const
{
assert(dispatch_width() <= 32);
- if (n > 0)
- return dst_reg(VGRF, shader->alloc.allocate(
- DIV_ROUND_UP(n * type_sz(type) * dispatch_width(),
- REG_SIZE)),
- type);
- else
+ if (n == 0)
return retype(null_reg_ud(), type);
+
+ const unsigned pad_per_component =
+ (pad_components_to_full_registers &&
+ type_sz(type) == 2 &&
+ dispatch_width() == 8) ? (REG_SIZE / 2) : 0;
+ const unsigned size =
+ n * ((type_sz(type) * dispatch_width()) + pad_per_component);
+ const unsigned nr = shader->alloc.allocate(
+ DIV_ROUND_UP(size, REG_SIZE));
+
+ dst_reg dst = dst_reg(VGRF, nr, type);
+ dst.pad_per_component = pad_per_component;
+
+ return dst;
}
/**
diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp b/src/intel/compiler/brw_fs_copy_propagation.cpp
index ed2511ecfa..637a1de6ae 100644
--- a/src/intel/compiler/brw_fs_copy_propagation.cpp
+++ b/src/intel/compiler/brw_fs_copy_propagation.cpp
@@ -447,6 +447,7 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
inst->src[arg].file = entry->src.file;
inst->src[arg].nr = entry->src.nr;
inst->src[arg].stride *= entry->src.stride;
+ inst->src[arg].pad_per_component = entry->src.pad_per_component;
inst->saturate = inst->saturate || entry->saturate;
/* Compute the offset of inst->src[arg] relative to entry->dst */
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp
index 16e8dfc186..35e78b134a 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -357,6 +357,9 @@ fs_visitor::nir_emit_impl(nir_function_impl *impl)
unsigned size = array_elems * reg->num_components;
const brw_reg_type reg_type =
brw_reg_type_from_bit_size(reg->bit_size, BRW_REGISTER_TYPE_F);
+
+ /* TODO: Consider if 16-bit component padding is needed. */
+
nir_locals[reg->index] = bld.vgrf(reg_type, size);
}
@@ -1602,13 +1605,15 @@ fs_visitor::get_nir_src_imm(const nir_src &src)
}
fs_reg
-fs_visitor::get_nir_dest(const nir_dest &dest)
+fs_visitor::get_nir_dest(const nir_dest &dest,
+ bool pad_components_to_full_registers)
{
if (dest.is_ssa) {
const brw_reg_type reg_type =
brw_reg_type_from_bit_size(dest.ssa.bit_size, BRW_REGISTER_TYPE_F);
nir_ssa_values[dest.ssa.index] =
- bld.vgrf(reg_type, dest.ssa.num_components);
+ bld.vgrf(reg_type, dest.ssa.num_components,
+ pad_components_to_full_registers);
return nir_ssa_values[dest.ssa.index];
} else {
/* We don't handle indirects on locals */
diff --git a/src/intel/compiler/brw_ir_fs.h b/src/intel/compiler/brw_ir_fs.h
index cd603630a4..b4a1d7ef5a 100644
--- a/src/intel/compiler/brw_ir_fs.h
+++ b/src/intel/compiler/brw_ir_fs.h
@@ -51,6 +51,9 @@ public:
/** Register region horizontal stride */
uint8_t stride;
+
+ /* Needed, for example, for SIMD8 half float payloads. */
+ uint8_t pad_per_component;
};
static inline fs_reg
--
2.11.0
More information about the mesa-dev
mailing list