[Mesa-dev] [PATCH 42/51] i965: WIP: Support for uploading 16-bit uniforms from 32-bit store

Topi Pohjolainen topi.pohjolainen at gmail.com
Fri Nov 24 12:27:09 UTC 2017


At this point 16-bit uniforms still take full 32-bit slots in the
pull/push constant buffers and in shader deployment payload.

Signed-off-by: Topi Pohjolainen <topi.pohjolainen at intel.com>
---
 src/intel/compiler/brw_compiler.h               |  9 +++++++++
 src/intel/compiler/brw_fs.cpp                   | 12 ++++++++++++
 src/intel/compiler/brw_fs_nir.cpp               |  2 ++
 src/intel/compiler/brw_fs_visitor.cpp           |  1 +
 src/intel/compiler/brw_vec4.cpp                 |  8 ++++++++
 src/intel/compiler/brw_vec4_gs_visitor.cpp      |  8 ++++++++
 src/intel/compiler/brw_vec4_visitor.cpp         |  4 ++++
 src/mesa/drivers/dri/i965/brw_cs.c              |  2 ++
 src/mesa/drivers/dri/i965/brw_curbe.c           |  2 ++
 src/mesa/drivers/dri/i965/brw_disk_cache.c      | 14 ++++++++++++++
 src/mesa/drivers/dri/i965/brw_gs.c              |  2 ++
 src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp  | 10 ++++++++++
 src/mesa/drivers/dri/i965/brw_program.c         |  2 ++
 src/mesa/drivers/dri/i965/brw_state.h           |  1 +
 src/mesa/drivers/dri/i965/brw_tcs.c             |  2 ++
 src/mesa/drivers/dri/i965/brw_tes.c             |  2 ++
 src/mesa/drivers/dri/i965/brw_vs.c              |  2 ++
 src/mesa/drivers/dri/i965/brw_wm.c              |  2 ++
 src/mesa/drivers/dri/i965/gen6_constant_state.c | 17 ++++++++++++++++-
 19 files changed, 101 insertions(+), 1 deletion(-)

diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h
index cdd61aae6c..7b43c4a135 100644
--- a/src/intel/compiler/brw_compiler.h
+++ b/src/intel/compiler/brw_compiler.h
@@ -613,6 +613,12 @@ struct brw_stage_prog_data {
     */
    uint32_t *param;
    uint32_t *pull_param;
+
+   /* Tells for GLSL backend if conversion from 32-bit store to, for example,
+    * 16-bits is required.
+    */
+   unsigned char *param_type;      /* enum glsl_base_type */
+   unsigned char *pull_param_type; /* enum glsl_base_type */
 };
 
 static inline uint32_t *
@@ -621,6 +627,9 @@ brw_stage_prog_data_add_params(struct brw_stage_prog_data *prog_data,
 {
    unsigned old_nr_params = prog_data->nr_params;
    prog_data->nr_params += nr_new_params;
+   prog_data->param_type = reralloc(ralloc_parent(prog_data->param_type),
+                                    prog_data->param_type, unsigned char,
+                                    prog_data->nr_params);
    prog_data->param = reralloc(ralloc_parent(prog_data->param),
                                prog_data->param, uint32_t,
                                prog_data->nr_params);
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 8e77248470..3ca1d4cbc7 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -2102,19 +2102,26 @@ fs_visitor::assign_constant_locations()
     * create two new arrays for push/pull params.
     */
    uint32_t *param = stage_prog_data->param;
+   unsigned char *param_type = stage_prog_data->param_type;
    stage_prog_data->nr_params = num_push_constants;
    if (num_push_constants) {
       stage_prog_data->param = ralloc_array(mem_ctx, uint32_t,
                                             num_push_constants);
+      stage_prog_data->param_type = ralloc_array(mem_ctx, unsigned char,
+                                                 num_push_constants);
    } else {
       stage_prog_data->param = NULL;
+      stage_prog_data->param_type = NULL;
    }
    assert(stage_prog_data->nr_pull_params == 0);
    assert(stage_prog_data->pull_param == NULL);
+   assert(stage_prog_data->pull_param_type == NULL);
    if (num_pull_constants > 0) {
       stage_prog_data->nr_pull_params = num_pull_constants;
       stage_prog_data->pull_param = ralloc_array(mem_ctx, uint32_t,
                                                  num_pull_constants);
+      stage_prog_data->pull_param_type = ralloc_array(NULL, unsigned char,
+                                                      num_pull_constants);
    }
 
    /* Now that we know how many regular uniforms we'll push, reduce the
@@ -2143,11 +2150,16 @@ fs_visitor::assign_constant_locations()
       uint32_t value = param[i];
       if (pull_constant_loc[i] != -1) {
          stage_prog_data->pull_param[pull_constant_loc[i]] = value;
+         stage_prog_data->pull_param_type[pull_constant_loc[i]] = 
+            param_type[i];
       } else if (push_constant_loc[i] != -1) {
          stage_prog_data->param[push_constant_loc[i]] = value;
+         stage_prog_data->param_type[push_constant_loc[i]] =
+            param_type[i];
       }
    }
    ralloc_free(param);
+   ralloc_free(param_type);
 }
 
 bool
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp
index 43127e00e8..2060a3139d 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -120,9 +120,11 @@ fs_visitor::nir_setup_uniforms()
        * on the list.
        */
       assert(uniforms == prog_data->nr_params);
+      const unsigned old_nr_params = prog_data->nr_params;
       uint32_t *param = brw_stage_prog_data_add_params(prog_data, 1);
       *param = BRW_PARAM_BUILTIN_SUBGROUP_ID;
       subgroup_id = fs_reg(UNIFORM, uniforms++, BRW_REGISTER_TYPE_UD);
+      prog_data->param_type[old_nr_params] = GLSL_TYPE_FLOAT;
    }
 }
 
diff --git a/src/intel/compiler/brw_fs_visitor.cpp b/src/intel/compiler/brw_fs_visitor.cpp
index 01e75ff7fc..5dc590be4c 100644
--- a/src/intel/compiler/brw_fs_visitor.cpp
+++ b/src/intel/compiler/brw_fs_visitor.cpp
@@ -488,6 +488,7 @@ fs_visitor::setup_uniform_clipplane_values()
       for (int j = 0; j < 4; ++j) {
          stage_prog_data->param[uniforms + j] =
             BRW_PARAM_BUILTIN_CLIP_PLANE(i, j);
+         stage_prog_data->param_type[uniforms + j] = GLSL_TYPE_FLOAT;
       }
       uniforms += 4;
    }
diff --git a/src/intel/compiler/brw_vec4.cpp b/src/intel/compiler/brw_vec4.cpp
index 65bf8d52f9..b79c8e61f5 100644
--- a/src/intel/compiler/brw_vec4.cpp
+++ b/src/intel/compiler/brw_vec4.cpp
@@ -702,6 +702,11 @@ vec4_visitor::pack_uniform_registers()
    memcpy(param, stage_prog_data->param,
           sizeof(uint32_t) * stage_prog_data->nr_params);
 
+   unsigned char *param_type = ralloc_array(NULL, unsigned char,
+                                            stage_prog_data->nr_params);
+   memcpy(param_type, stage_prog_data->param_type,
+          sizeof(unsigned char) * stage_prog_data->nr_params);
+
    /* Now, figure out a packing of the live uniform vectors into our
     * push constants. Start with dvec{3,4} because they are aligned to
     * dvec4 size (2 vec4).
@@ -724,6 +729,8 @@ vec4_visitor::pack_uniform_registers()
                                       new_chans_used);
       /* Move the references to the data */
       for (int j = 0; j < size; j++) {
+         stage_prog_data->param_type[dst * 4 + new_chan[src] + j] =
+            param_type[src * 4 + j];
          stage_prog_data->param[dst * 4 + new_chan[src] + j] =
             param[src * 4 + j];
       }
@@ -748,6 +755,7 @@ vec4_visitor::pack_uniform_registers()
    }
 
    ralloc_free(param);
+   ralloc_free(param_type);
    this->uniforms = new_uniform_count;
 
    /* Now, update the instructions for our repacked uniforms. */
diff --git a/src/intel/compiler/brw_vec4_gs_visitor.cpp b/src/intel/compiler/brw_vec4_gs_visitor.cpp
index 7848832e41..05308e7d4e 100644
--- a/src/intel/compiler/brw_vec4_gs_visitor.cpp
+++ b/src/intel/compiler/brw_vec4_gs_visitor.cpp
@@ -892,10 +892,15 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
          uint32_t *param = ralloc_array(NULL, uint32_t, param_count);
          memcpy(param, prog_data->base.base.param,
                 sizeof(uint32_t) * param_count);
+         unsigned char *param_type =
+            ralloc_array(NULL, unsigned char, param_count);
+         memcpy(param_type, prog_data->base.base.param_type,
+                sizeof(unsigned char) * param_count);
 
          if (v.run()) {
             /* Success! Backup is not needed */
             ralloc_free(param);
+            ralloc_free(param_type);
             return brw_vec4_generate_assembly(compiler, log_data, mem_ctx,
                                               shader, &prog_data->base, v.cfg,
                                               &prog_data->base.base.
@@ -910,9 +915,12 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
              */
             memcpy(prog_data->base.base.param, param,
                    sizeof(uint32_t) * param_count);
+            memcpy(prog_data->base.base.param_type, param_type,
+                   sizeof(unsigned char) * param_count);
             prog_data->base.base.nr_params = param_count;
             prog_data->base.base.nr_pull_params = 0;
             ralloc_free(param);
+            ralloc_free(param_type);
          }
       }
    }
diff --git a/src/intel/compiler/brw_vec4_visitor.cpp b/src/intel/compiler/brw_vec4_visitor.cpp
index 53f6a5ed54..5a55cf7fdf 100644
--- a/src/intel/compiler/brw_vec4_visitor.cpp
+++ b/src/intel/compiler/brw_vec4_visitor.cpp
@@ -1778,6 +1778,8 @@ vec4_visitor::move_uniform_array_access_to_pull_constants()
    assert(stage_prog_data->nr_pull_params == 0);
    stage_prog_data->pull_param = ralloc_array(mem_ctx, uint32_t,
                                               this->uniforms * 4);
+   stage_prog_data->pull_param_type = ralloc_array(mem_ctx, unsigned char,
+                                                   this->uniforms * 4);
 
    int pull_constant_loc[this->uniforms];
    memset(pull_constant_loc, -1, sizeof(pull_constant_loc));
@@ -1808,6 +1810,8 @@ vec4_visitor::move_uniform_array_access_to_pull_constants()
       pull_constant_loc[j] = stage_prog_data->nr_pull_params / 4;
 
       for (int i = 0; i < 4; i++) {
+         stage_prog_data->pull_param_type[stage_prog_data->nr_pull_params]
+            = stage_prog_data->param_type[j * 4 + i];
          stage_prog_data->pull_param[stage_prog_data->nr_pull_params++]
             = stage_prog_data->param[j * 4 + i];
       }
diff --git a/src/mesa/drivers/dri/i965/brw_cs.c b/src/mesa/drivers/dri/i965/brw_cs.c
index 71d0beaae9..5b83e9fca1 100644
--- a/src/mesa/drivers/dri/i965/brw_cs.c
+++ b/src/mesa/drivers/dri/i965/brw_cs.c
@@ -118,7 +118,9 @@ brw_codegen_cs_prog(struct brw_context *brw,
 
    /* The param and pull_param arrays will be freed by the shader cache. */
    ralloc_steal(NULL, prog_data.base.param);
+   ralloc_steal(NULL, prog_data.base.param_type);
    ralloc_steal(NULL, prog_data.base.pull_param);
+   ralloc_steal(NULL, prog_data.base.pull_param_type);
    brw_upload_cache(&brw->cache, BRW_CACHE_CS_PROG,
                     key, sizeof(*key),
                     program, prog_data.base.program_size,
diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c
index c747110e31..294b4a36d8 100644
--- a/src/mesa/drivers/dri/i965/brw_curbe.c
+++ b/src/mesa/drivers/dri/i965/brw_curbe.c
@@ -229,6 +229,7 @@ brw_upload_constant_buffer(struct brw_context *brw)
       /* BRW_NEW_FS_PROG_DATA | _NEW_PROGRAM_CONSTANTS: copy uniform values */
       brw_populate_constant_data(brw, fp, &brw->wm.base, &buf[offset],
                                  brw->wm.base.prog_data->param,
+                                 brw->wm.base.prog_data->param_type,
                                  brw->wm.base.prog_data->nr_params);
    }
 
@@ -270,6 +271,7 @@ brw_upload_constant_buffer(struct brw_context *brw)
       /* BRW_NEW_VS_PROG_DATA | _NEW_PROGRAM_CONSTANTS: copy uniform values */
       brw_populate_constant_data(brw, vp, &brw->vs.base, &buf[offset],
                                  brw->vs.base.prog_data->param,
+                                 brw->vs.base.prog_data->param_type,
                                  brw->vs.base.prog_data->nr_params);
    }
 
diff --git a/src/mesa/drivers/dri/i965/brw_disk_cache.c b/src/mesa/drivers/dri/i965/brw_disk_cache.c
index 853ea98af0..cae0fab4c0 100644
--- a/src/mesa/drivers/dri/i965/brw_disk_cache.c
+++ b/src/mesa/drivers/dri/i965/brw_disk_cache.c
@@ -96,10 +96,14 @@ write_blob_program_data(struct blob *binary, gl_shader_stage stage,
    /* Write push params */
    blob_write_bytes(binary, prog_data->param,
                     sizeof(uint32_t) * prog_data->nr_params);
+   blob_write_bytes(binary, prog_data->param_type,
+                    sizeof(unsigned char) * prog_data->nr_params);
 
    /* Write pull params */
    blob_write_bytes(binary, prog_data->pull_param,
                     sizeof(uint32_t) * prog_data->nr_pull_params);
+   blob_write_bytes(binary, prog_data->pull_param_type,
+                    sizeof(unsigned char) * prog_data->nr_pull_params);
 }
 
 static bool
@@ -120,12 +124,22 @@ read_blob_program_data(struct blob_reader *binary, struct gl_program *prog,
    blob_copy_bytes(binary, prog_data->param,
                    sizeof(uint32_t) * prog_data->nr_params);
 
+   prog_data->param_type = rzalloc_array(NULL, unsigned char,
+                                          prog_data->nr_params);
+   blob_copy_bytes(binary, prog_data->param_type,
+                   sizeof(unsigned char) * prog_data->nr_params);
+
    /* Read pull params */
    prog_data->pull_param = rzalloc_array(NULL, uint32_t,
                                          prog_data->nr_pull_params);
    blob_copy_bytes(binary, prog_data->pull_param,
                    sizeof(uint32_t) * prog_data->nr_pull_params);
 
+   prog_data->pull_param_type = rzalloc_array(NULL, unsigned char,
+                                              prog_data->nr_pull_params);
+   blob_copy_bytes(binary, prog_data->pull_param_type,
+                   sizeof(unsigned char) * prog_data->nr_pull_params);
+
    return (binary->current == binary->end && !binary->overrun);
 }
 
diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c
index f74ee178f2..98a3fbfe3f 100644
--- a/src/mesa/drivers/dri/i965/brw_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_gs.c
@@ -142,7 +142,9 @@ brw_codegen_gs_prog(struct brw_context *brw,
 
    /* The param and pull_param arrays will be freed by the shader cache. */
    ralloc_steal(NULL, prog_data.base.base.param);
+   ralloc_steal(NULL, prog_data.base.base.param_type);
    ralloc_steal(NULL, prog_data.base.base.pull_param);
+   ralloc_steal(NULL, prog_data.base.base.pull_param_type);
    brw_upload_cache(&brw->cache, BRW_CACHE_GS_PROG,
                     key, sizeof(*key),
                     program, prog_data.base.base.program_size,
diff --git a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
index 9e135cbb1a..31c93125f8 100644
--- a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
+++ b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp
@@ -158,18 +158,23 @@ brw_nir_setup_glsl_uniform(gl_shader_stage stage, nir_variable *var,
                                   storage->type->matrix_columns);
          unsigned vector_size = storage->type->vector_elements;
          unsigned max_vector_size = 4;
+         enum glsl_base_type component_type = storage->type->base_type;
          if (storage->type->base_type == GLSL_TYPE_DOUBLE ||
              storage->type->base_type == GLSL_TYPE_UINT64 ||
              storage->type->base_type == GLSL_TYPE_INT64) {
             vector_size *= 2;
             if (vector_size > 4)
                max_vector_size = 8;
+         } else if (var->type->get_scalar_type()->base_type ==
+                    GLSL_TYPE_FLOAT16) {
+            component_type = GLSL_TYPE_FLOAT16;
          }
 
          for (unsigned s = 0; s < vector_count; s++) {
             unsigned i;
             for (i = 0; i < vector_size; i++) {
                uint32_t idx = components - prog->sh.data->UniformDataSlots;
+               stage_prog_data->param_type[uniform_index] = component_type;
                stage_prog_data->param[uniform_index++] = BRW_PARAM_UNIFORM(idx);
                components++;
             }
@@ -177,6 +182,7 @@ brw_nir_setup_glsl_uniform(gl_shader_stage stage, nir_variable *var,
             if (!is_scalar) {
                /* Pad out with zeros if needed (only needed for vec4) */
                for (; i < max_vector_size; i++) {
+                  stage_prog_data->param_type[uniform_index] = component_type;
                   stage_prog_data->param[uniform_index++] =
                      BRW_PARAM_BUILTIN_ZERO;
                }
@@ -195,6 +201,8 @@ brw_nir_setup_glsl_uniforms(void *mem_ctx, nir_shader *shader,
    unsigned nr_params = shader->num_uniforms / 4;
    stage_prog_data->nr_params = nr_params;
    stage_prog_data->param = rzalloc_array(mem_ctx, uint32_t, nr_params);
+   stage_prog_data->param_type = rzalloc_array(mem_ctx, unsigned char,
+                                               nr_params);
 
    nir_foreach_variable(var, &shader->uniforms) {
       /* UBO's, atomics and samplers don't take up space in the
@@ -222,6 +230,8 @@ brw_nir_setup_arb_uniforms(void *mem_ctx, nir_shader *shader,
    unsigned nr_params = plist->NumParameters * 4;
    stage_prog_data->nr_params = nr_params;
    stage_prog_data->param = rzalloc_array(mem_ctx, uint32_t, nr_params);
+   stage_prog_data->param_type = rzalloc_array(mem_ctx, unsigned char,
+                                               nr_params);
 
    /* For ARB programs, prog_to_nir generates a single "parameters" variable
     * for all uniform data.  nir_lower_wpos_ytransform may also create an
diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c
index 4573d9d303..11e2321077 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -682,7 +682,9 @@ brw_stage_prog_data_free(const void *p)
    struct brw_stage_prog_data *prog_data = (struct brw_stage_prog_data *)p;
 
    ralloc_free(prog_data->param);
+   ralloc_free(prog_data->param_type);
    ralloc_free(prog_data->pull_param);
+   ralloc_free(prog_data->pull_param_type);
 }
 
 void
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index ad508950f7..8a3c08fb71 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -240,6 +240,7 @@ brw_populate_constant_data(struct brw_context *brw,
                            const struct brw_stage_state *stage_state,
                            void *dst,
                            const uint32_t *param,
+                           const unsigned char *param_type,
                            unsigned nr_params);
 void
 brw_upload_pull_constants(struct brw_context *brw,
diff --git a/src/mesa/drivers/dri/i965/brw_tcs.c b/src/mesa/drivers/dri/i965/brw_tcs.c
index 4424efea4f..8275b58cc6 100644
--- a/src/mesa/drivers/dri/i965/brw_tcs.c
+++ b/src/mesa/drivers/dri/i965/brw_tcs.c
@@ -263,7 +263,9 @@ brw_codegen_tcs_prog(struct brw_context *brw, struct brw_program *tcp,
 
    /* The param and pull_param arrays will be freed by the shader cache. */
    ralloc_steal(NULL, prog_data.base.base.param);
+   ralloc_steal(NULL, prog_data.base.base.param_type);
    ralloc_steal(NULL, prog_data.base.base.pull_param);
+   ralloc_steal(NULL, prog_data.base.base.pull_param_type);
    brw_upload_cache(&brw->cache, BRW_CACHE_TCS_PROG,
                     key, sizeof(*key),
                     program, prog_data.base.base.program_size,
diff --git a/src/mesa/drivers/dri/i965/brw_tes.c b/src/mesa/drivers/dri/i965/brw_tes.c
index 4222d4231f..73e7cd5e3f 100644
--- a/src/mesa/drivers/dri/i965/brw_tes.c
+++ b/src/mesa/drivers/dri/i965/brw_tes.c
@@ -133,7 +133,9 @@ brw_codegen_tes_prog(struct brw_context *brw,
 
    /* The param and pull_param arrays will be freed by the shader cache. */
    ralloc_steal(NULL, prog_data.base.base.param);
+   ralloc_steal(NULL, prog_data.base.base.param_type);
    ralloc_steal(NULL, prog_data.base.base.pull_param);
+   ralloc_steal(NULL, prog_data.base.base.pull_param_type);
    brw_upload_cache(&brw->cache, BRW_CACHE_TES_PROG,
                     key, sizeof(*key),
                     program, prog_data.base.base.program_size,
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index a56b256bc3..fe640fde44 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -252,7 +252,9 @@ brw_codegen_vs_prog(struct brw_context *brw,
 
    /* The param and pull_param arrays will be freed by the shader cache. */
    ralloc_steal(NULL, prog_data.base.base.param);
+   ralloc_steal(NULL, prog_data.base.base.param_type);
    ralloc_steal(NULL, prog_data.base.base.pull_param);
+   ralloc_steal(NULL, prog_data.base.base.pull_param_type);
    brw_upload_cache(&brw->cache, BRW_CACHE_VS_PROG,
                     key, sizeof(struct brw_vs_prog_key),
                     program, prog_data.base.base.program_size,
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
index 08bacebd57..849d31bca9 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -216,7 +216,9 @@ brw_codegen_wm_prog(struct brw_context *brw,
 
    /* The param and pull_param arrays will be freed by the shader cache. */
    ralloc_steal(NULL, prog_data.base.param);
+   ralloc_steal(NULL, prog_data.base.param_type);
    ralloc_steal(NULL, prog_data.base.pull_param);
+   ralloc_steal(NULL, prog_data.base.pull_param_type);
    brw_upload_cache(&brw->cache, BRW_CACHE_FS_PROG,
                     key, sizeof(struct brw_wm_prog_key),
                     program, prog_data.base.program_size,
diff --git a/src/mesa/drivers/dri/i965/gen6_constant_state.c b/src/mesa/drivers/dri/i965/gen6_constant_state.c
index 89b1202dd6..ca7bd11244 100644
--- a/src/mesa/drivers/dri/i965/gen6_constant_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_constant_state.c
@@ -29,6 +29,7 @@
 #include "intel_buffer_objects.h"
 #include "program/prog_parameter.h"
 #include "main/shaderapi.h"
+#include "util/half_float.h"
 
 static uint32_t
 f_as_u32(float f)
@@ -37,6 +38,13 @@ f_as_u32(float f)
    return fi.ui;
 }
 
+static float
+u32_as_f(uint32_t ui)
+{
+   union fi fi = { .ui = ui };
+   return fi.f;
+}
+
 static uint32_t
 brw_param_value(struct brw_context *brw,
                 const struct gl_program *prog,
@@ -98,11 +106,16 @@ brw_populate_constant_data(struct brw_context *brw,
                            const struct brw_stage_state *stage_state,
                            void *void_dst,
                            const uint32_t *param,
+                           const unsigned char *param_type,
                            unsigned nr_params)
 {
    uint32_t *dst = void_dst;
-   for (unsigned i = 0; i < nr_params; i++)
+   for (unsigned i = 0; i < nr_params; i++) {
       dst[i] = brw_param_value(brw, prog, stage_state, param[i]);
+
+      if ((enum glsl_base_type)param_type[i] == GLSL_TYPE_FLOAT16)
+         dst[i] = _mesa_float_to_half(u32_as_f(dst[i]));
+   }
 }
 
 
@@ -169,6 +182,7 @@ gen6_upload_push_constants(struct brw_context *brw,
        */
       brw_populate_constant_data(brw, prog, stage_state, param,
                                  prog_data->param,
+                                 prog_data->param_type,
                                  prog_data->nr_params);
 
       if (0) {
@@ -256,6 +270,7 @@ brw_upload_pull_constants(struct brw_context *brw,
 
    brw_populate_constant_data(brw, prog, stage_state, constants,
                               prog_data->pull_param,
+                              prog_data->pull_param_type,
                               prog_data->nr_pull_params);
 
    if (0) {
-- 
2.11.0



More information about the mesa-dev mailing list