[Mesa-dev] [PATCH 29/51] intel/compiler/fs: Add register padding support

Topi Pohjolainen topi.pohjolainen at gmail.com
Fri Nov 24 12:26:56 UTC 2017


Signed-off-by: Topi Pohjolainen <topi.pohjolainen at intel.com>
---
 src/intel/compiler/brw_fs.cpp                  |  3 ++-
 src/intel/compiler/brw_fs.h                    |  3 ++-
 src/intel/compiler/brw_fs_builder.h            | 25 ++++++++++++++++++-------
 src/intel/compiler/brw_fs_copy_propagation.cpp |  1 +
 src/intel/compiler/brw_fs_nir.cpp              |  9 +++++++--
 src/intel/compiler/brw_ir_fs.h                 |  3 +++
 6 files changed, 33 insertions(+), 11 deletions(-)

diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index cedfde5096..9c3410b698 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -440,6 +440,7 @@ fs_reg::fs_reg(struct ::brw_reg reg) :
 {
    this->offset = 0;
    this->stride = 1;
+   this->pad_per_component = 0;
    if (this->file == IMM &&
        (this->type != BRW_REGISTER_TYPE_V &&
         this->type != BRW_REGISTER_TYPE_UV &&
@@ -467,7 +468,7 @@ fs_reg::component_size(unsigned width) const
    const unsigned stride = ((file != ARF && file != FIXED_GRF) ? this->stride :
                             hstride == 0 ? 0 :
                             1 << (hstride - 1));
-   return MAX2(width * stride, 1) * type_sz(type);
+   return (MAX2(width * stride, 1) * (type_sz(type)) + pad_per_component);
 }
 
 /**
diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
index 30557324d5..d9c4f737e6 100644
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -231,7 +231,8 @@ public:
                       nir_jump_instr *instr);
    fs_reg get_nir_src(const nir_src &src);
    fs_reg get_nir_src_imm(const nir_src &src);
-   fs_reg get_nir_dest(const nir_dest &dest);
+   fs_reg get_nir_dest(const nir_dest &dest,
+                       bool pad_components_to_full_registers = false);
    fs_reg get_nir_image_deref(const nir_deref_var *deref);
    fs_reg get_indirect_offset(nir_intrinsic_instr *instr);
    void emit_percomp(const brw::fs_builder &bld, const fs_inst &inst,
diff --git a/src/intel/compiler/brw_fs_builder.h b/src/intel/compiler/brw_fs_builder.h
index 633086c64b..804d52e5df 100644
--- a/src/intel/compiler/brw_fs_builder.h
+++ b/src/intel/compiler/brw_fs_builder.h
@@ -182,17 +182,28 @@ namespace brw {
        * component in this IR).
        */
       dst_reg
-      vgrf(enum brw_reg_type type, unsigned n = 1) const
+      vgrf(enum brw_reg_type type,
+           unsigned n = 1,
+           bool pad_components_to_full_registers = false) const
       {
          assert(dispatch_width() <= 32);
 
-         if (n > 0)
-            return dst_reg(VGRF, shader->alloc.allocate(
-                              DIV_ROUND_UP(n * type_sz(type) * dispatch_width(),
-                                           REG_SIZE)),
-                           type);
-         else
+         if (n == 0)
             return retype(null_reg_ud(), type);
+
+         const unsigned pad_per_component =
+            (pad_components_to_full_registers &&
+             type_sz(type) == 2 &&
+             dispatch_width() == 8) ? (REG_SIZE / 2) : 0;
+         const unsigned size =
+            n * ((type_sz(type) * dispatch_width()) + pad_per_component);
+         const unsigned nr = shader->alloc.allocate(
+                                DIV_ROUND_UP(size, REG_SIZE));
+
+         dst_reg dst = dst_reg(VGRF, nr, type);
+         dst.pad_per_component = pad_per_component;
+
+         return dst;
       }
 
       /**
diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp b/src/intel/compiler/brw_fs_copy_propagation.cpp
index ed2511ecfa..637a1de6ae 100644
--- a/src/intel/compiler/brw_fs_copy_propagation.cpp
+++ b/src/intel/compiler/brw_fs_copy_propagation.cpp
@@ -447,6 +447,7 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
    inst->src[arg].file = entry->src.file;
    inst->src[arg].nr = entry->src.nr;
    inst->src[arg].stride *= entry->src.stride;
+   inst->src[arg].pad_per_component = entry->src.pad_per_component;
    inst->saturate = inst->saturate || entry->saturate;
 
    /* Compute the offset of inst->src[arg] relative to entry->dst */
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp
index 16e8dfc186..35e78b134a 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -357,6 +357,9 @@ fs_visitor::nir_emit_impl(nir_function_impl *impl)
       unsigned size = array_elems * reg->num_components;
       const brw_reg_type reg_type =
          brw_reg_type_from_bit_size(reg->bit_size, BRW_REGISTER_TYPE_F);
+
+      /* TODO: Consider if 16-bit component padding is needed. */
+
       nir_locals[reg->index] = bld.vgrf(reg_type, size);
    }
 
@@ -1602,13 +1605,15 @@ fs_visitor::get_nir_src_imm(const nir_src &src)
 }
 
 fs_reg
-fs_visitor::get_nir_dest(const nir_dest &dest)
+fs_visitor::get_nir_dest(const nir_dest &dest,
+                          bool pad_components_to_full_registers)
 {
    if (dest.is_ssa) {
       const brw_reg_type reg_type =
          brw_reg_type_from_bit_size(dest.ssa.bit_size, BRW_REGISTER_TYPE_F);
       nir_ssa_values[dest.ssa.index] =
-         bld.vgrf(reg_type, dest.ssa.num_components);
+         bld.vgrf(reg_type, dest.ssa.num_components,
+                  pad_components_to_full_registers);
       return nir_ssa_values[dest.ssa.index];
    } else {
       /* We don't handle indirects on locals */
diff --git a/src/intel/compiler/brw_ir_fs.h b/src/intel/compiler/brw_ir_fs.h
index cd603630a4..b4a1d7ef5a 100644
--- a/src/intel/compiler/brw_ir_fs.h
+++ b/src/intel/compiler/brw_ir_fs.h
@@ -51,6 +51,9 @@ public:
 
    /** Register region horizontal stride */
    uint8_t stride;
+
+   /* Needed, for example, for SIMD8 half float payloads. */
+   uint8_t pad_per_component;
 };
 
 static inline fs_reg
-- 
2.11.0



More information about the mesa-dev mailing list