[Mesa-dev] [PATCH v4 054/129] nir,spirv: Rework function calls
Bas Nieuwenhuizen
bas at basnieuwenhuizen.nl
Fri Jun 1 12:48:34 UTC 2018
Actually something post inlining that actually shows the problem:
shader: MESA_SHADER_COMPUTE
local-size: 1, 1, 1
shared-size: 1
inputs: 0
outputs: 0
uniforms: 0
shared: 0
decl_var uniform INTERP_MODE_NONE sampler2D @0 (0, 0, 0)
decl_var uniform INTERP_MODE_NONE sampler @1 (0, 0, 1)
decl_var system INTERP_MODE_NONE uvec3 @2
decl_function (null) (2 params)
impl (null) {
block block_0:
/* preds: */
vec1 32 ssa_7 = load_const (0x00000008 /* 0.000000 */)
vec1 32 ssa_5 = load_const (0x00000008 /* 0.000000 */)
vec1 32 ssa_0 = intrinsic load_param () (0) /* param_idx=0 */
vec1 32 ssa_1 = deref_cast (uint *)ssa_0 (local uint)
vec1 32 ssa_2 = intrinsic load_deref (ssa_1) ()
vec1 32 ssa_3 = intrinsic load_param () (1) /* param_idx=1 */
vec1 32 ssa_4 = deref_cast (sampler2D *)ssa_3 (local sampler2D)
vec1 32 ssa_6 = umod ssa_2, ssa_5
vec1 32 ssa_8 = udiv ssa_2, ssa_7
vec2 32 ssa_9 = vec2 ssa_6, ssa_8
vec2 32 ssa_13 = imov ssa_9
vec4 32 ssa_14 = txf ssa_4 (texture_deref), ssa_13 (coord),
vec1 32 ssa_15 = load_const (0x00000000 /* 0.000000 */)
vec1 32 ssa_16 = intrinsic vulkan_resource_index (ssa_15) (0, 2) /*
desc-set=0 */ /* binding=2 */
vec1 32 ssa_17 = load_const (0x00000000 /* 0.000000 */)
vec1 32 ssa_18 = load_const (0x00000000 /* 0.000000 */)
vec1 32 ssa_19 = iadd ssa_17, ssa_18
vec1 32 ssa_20 = load_const (0x00000010 /* 0.000000 */)
vec1 32 ssa_21 = imul ssa_2, ssa_20
vec1 32 ssa_22 = iadd ssa_19, ssa_21
intrinsic store_ssbo (ssa_14, ssa_16, ssa_22) (15) /* wrmask=xyzw */
/* succs: block_0 */
block block_0:
}
decl_function main (0 params)
impl main {
decl_var INTERP_MODE_NONE uint @3
decl_var INTERP_MODE_NONE uint arg_tmp
block block_0:
/* preds: */
vec1 32 ssa_0 = deref_var &@2 (system uvec3)
vec3 32 ssa_1 = intrinsic load_deref (ssa_0) ()
vec1 32 ssa_2 = fmov ssa_1.x
vec1 32 ssa_3 = deref_var &@3 (local uint)
intrinsic store_deref (ssa_3, ssa_2) (1) /* wrmask=x */
vec1 32 ssa_4 = deref_var &@2 (system uvec3)
vec3 32 ssa_5 = intrinsic load_deref (ssa_4) ()
vec1 32 ssa_6 = fmov ssa_5.x
vec1 32 ssa_7 = deref_var &arg_tmp (local uint)
intrinsic store_deref (ssa_7, ssa_6) (1) /* wrmask=x */
vec1 32 ssa_8 = deref_var &@0 (uniform sampler2D)
vec1 32 ssa_9 = load_const (0x00000008 /* 0.000000 */)
vec1 32 ssa_10 = load_const (0x00000008 /* 0.000000 */)
vec1 32 ssa_11 = deref_cast (uint *)ssa_7 (local uint)
vec1 32 ssa_12 = intrinsic load_deref (ssa_11) ()
vec1 32 ssa_13 = deref_cast (sampler2D *)ssa_8 (local sampler2D)
vec1 32 ssa_14 = umod ssa_12, ssa_10
vec1 32 ssa_15 = udiv ssa_12, ssa_9
vec2 32 ssa_16 = vec2 ssa_14, ssa_15
vec2 32 ssa_17 = imov ssa_16
vec4 32 ssa_18 = txf ssa_13 (texture_deref), ssa_17 (coord),
error: var->data.mode == deref->mode
(../mesa/src/compiler/nir/nir_validate.c:585)
vec1 32 ssa_19 = load_const (0x00000000 /* 0.000000 */)
vec1 32 ssa_20 = intrinsic vulkan_resource_index (ssa_19) (0, 2) /*
desc-set=0 */ /* binding=2 */
vec1 32 ssa_21 = load_const (0x00000000 /* 0.000000 */)
vec1 32 ssa_22 = load_const (0x00000000 /* 0.000000 */)
vec1 32 ssa_23 = iadd ssa_21, ssa_22
vec1 32 ssa_24 = load_const (0x00000010 /* 0.000000 */)
vec1 32 ssa_25 = imul ssa_12, ssa_24
vec1 32 ssa_26 = iadd ssa_23, ssa_25
intrinsic store_ssbo (ssa_18, ssa_20, ssa_26) (15) /* wrmask=xyzw */
/* succs: block_0 */
block block_0:
}
(The validation check it errors on was only added locally)
So I'm wondering what the semantic of the cast should be I tihnk we
have two cases here that we want to handle completely different:
1) We have noop casts that we add for metadata purposes when the src
is not a deref.
2) We have casts that actually cast between address spaces specified
by the app, where the app essentially says: trust me, I know what I'm
doing.
So in the inline case we can certainly fix up all the downstream deref
and casts from the param. However, that gets into issues when an
application has casts from case (2). Especially if we have a select or
phi between a param and an actual local variable, we need to convert
to the generic address space which (a) not all drivers may support and
(b) throws away any info given by the cast for (2).
For Vulkan we could do the following:
1) special case pointers to Samplers, and don't allow casting to/from
them typewise (or arrays to etc.). If we have that we can either in a
fixup pass only work on sampler derefs, or we can in spirv_to_nir
directly use uniform var mode already.
2) Store the Sampler2D in a local variable. In case we can't lower the
variable there are drivers which can't support it, and getting some of
the layout metadata might be impossible (e.g. multiplane images?)
3) Split the casting instruction into two, one to "just bring the data
into a deref" and another actual cast.
I think solution 1) is the only one that is not going to need advanced
driver support (generic address space or storing Samplers in
variables). Though I'd be happy to hear if something has a better
idea.
On Fri, Jun 1, 2018 at 12:48 PM, Bas Nieuwenhuizen
<bas at basnieuwenhuizen.nl> wrote:
> The casts are not lowered away in some of the CTS tests for radv:
>
> shader: MESA_SHADER_COMPUTE
> local-size: 1, 1, 1
> shared-size: 1
> inputs: 0
> outputs: 0
> uniforms: 0
> shared: 0
> decl_var uniform INTERP_MODE_NONE sampler2D @0 (0, 0, 0)
> decl_var uniform INTERP_MODE_NONE sampler2D @1 (0, 0, 1)
> decl_var uniform INTERP_MODE_NONE sampler @2 (0, 0, 0)
> decl_var uniform INTERP_MODE_NONE sampler @3 (0, 0, 1)
> decl_var system INTERP_MODE_NONE uvec3 @4
> decl_function (null) (3 params)
>
> impl (null) {
> block block_0:
> /* preds: */
> vec1 32 ssa_9 = load_const (0x00000008 /* 0.000000 */)
> vec1 32 ssa_7 = load_const (0x00000008 /* 0.000000 */)
> vec1 32 ssa_0 = intrinsic load_param () (0) /* param_idx=0 */
> vec1 32 ssa_1 = deref_cast (uint *)ssa_0 (local uint)
> vec1 32 ssa_2 = intrinsic load_deref (ssa_1) ()
> vec1 32 ssa_3 = intrinsic load_param () (1) /* param_idx=1 */
> vec1 32 ssa_4 = deref_cast (sampler2D *)ssa_3 (local sampler2D)
> vec1 32 ssa_8 = umod ssa_2, ssa_7
> vec1 32 ssa_10 = udiv ssa_2, ssa_9
> vec2 32 ssa_11 = vec2 ssa_8, ssa_10
> vec2 32 ssa_15 = imov ssa_11
> vec4 32 ssa_16 = txf ssa_4 (texture_deref), ssa_15 (coord),
> vec1 32 ssa_17 = load_const (0x00000000 /* 0.000000 */)
> vec1 32 ssa_18 = intrinsic vulkan_resource_index (ssa_17) (0, 2) /*
> desc-set=0 */ /* binding=2 */
> vec1 32 ssa_19 = load_const (0x00000000 /* 0.000000 */)
> vec1 32 ssa_20 = load_const (0x00000000 /* 0.000000 */)
> vec1 32 ssa_21 = iadd ssa_19, ssa_20
> vec1 32 ssa_22 = load_const (0x00000010 /* 0.000000 */)
> vec1 32 ssa_23 = imul ssa_2, ssa_22
> vec1 32 ssa_24 = iadd ssa_21, ssa_23
> intrinsic store_ssbo (ssa_16, ssa_18, ssa_24) (15) /* wrmask=xyzw */
> return
> /* succs: block_0 */
> block block_0:
> }
>
> decl_function main (0 params)
>
> impl main {
> decl_var INTERP_MODE_NONE uint @5
> decl_var INTERP_MODE_NONE uint arg_tmp
> block block_0:
> /* preds: */
> vec1 32 ssa_0 = deref_var &@4 (system uvec3)
> vec3 32 ssa_3 = intrinsic load_deref (ssa_0) ()
> vec1 32 ssa_4 = fmov ssa_3.x
> vec1 32 ssa_5 = deref_var &@5 (local uint)
> intrinsic store_deref (ssa_5, ssa_4) (1) /* wrmask=x */
> vec1 32 ssa_6 = deref_var &@4 (system uvec3)
> vec3 32 ssa_9 = intrinsic load_deref (ssa_6) ()
> vec1 32 ssa_10 = fmov ssa_9.x
> vec1 32 ssa_11 = deref_var &arg_tmp (local uint)
> intrinsic store_deref (ssa_11, ssa_10) (1) /* wrmask=x */
> vec1 32 ssa_12 = deref_var &@1 (uniform sampler2D)
> vec1 32 ssa_13 = deref_var &@2 (uniform sampler)
> call (null) ssa_11, ssa_12, ssa_13
> return
> /* succs: block_0 */
> block block_0:
> }
>
> The cast of the sampler to the local address space is obviously
> invalid as the underlying storage is uniform.
>
> (Also besides, do we really want to support casts & things not easily
> derivable from a var for textures/samplers?)
>
> On Fri, Jun 1, 2018 at 7:03 AM, Jason Ekstrand <jason at jlekstrand.net> wrote:
>> This commit completely reworks function calls in NIR. Instead of having
>> a set of variables for the parameters and return value, nir_call_instr
>> now has simply has a number of sources which get mapped to load_param
>> intrinsics inside the functions. It's up to the client API to build an
>> ABI on top of that. In SPIR-V, out parameters are handled by passing
>> the result of a deref through as an SSA value and storing to it.
>>
>> This virtue of this approach can be seen by how much it allows us to
>> delete from core NIR. In particular, nir_inline_functions gets halved
>> and goes from a fairly difficult pass to understand in detail to almost
>> trivial. It also simplifies spirv_to_nir somewhat because NIR functions
>> never were a good fit for SPIR-V.
>>
>> Unfortunately, there is no good way to do this without a mega-commit.
>> Core NIR and SPIR-V have to be changed at the same time. This also
>> requires changes to anv and radv because nir_inline_functions couldn't
>> handle deref instructions before this change and can't work without them
>> after this change.
>> ---
>> src/amd/vulkan/radv_shader.c | 5 +-
>> src/compiler/glsl/glsl_to_nir.cpp | 1 -
>> src/compiler/nir/nir.c | 55 +++----
>> src/compiler/nir/nir.h | 35 ++---
>> src/compiler/nir/nir_builder.h | 16 ++
>> src/compiler/nir/nir_clone.c | 15 +-
>> src/compiler/nir/nir_inline_functions.c | 193 ++++-------------------
>> src/compiler/nir/nir_intrinsics.py | 4 +
>> src/compiler/nir/nir_print.c | 63 +-------
>> src/compiler/nir/nir_remove_dead_variables.c | 20 +--
>> src/compiler/nir/nir_serialize.c | 44 ++----
>> src/compiler/nir/nir_sweep.c | 4 -
>> src/compiler/nir/nir_validate.c | 38 ++---
>> src/compiler/spirv/spirv_to_nir.c | 64 ++++----
>> src/compiler/spirv/vtn_cfg.c | 221 +++++++++++++--------------
>> src/compiler/spirv/vtn_private.h | 3 -
>> src/compiler/spirv/vtn_variables.c | 9 --
>> src/intel/vulkan/anv_pipeline.c | 5 +-
>> 18 files changed, 257 insertions(+), 538 deletions(-)
>>
>> diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
>> index 5c6c2a2..5897b2a 100644
>> --- a/src/amd/vulkan/radv_shader.c
>> +++ b/src/amd/vulkan/radv_shader.c
>> @@ -235,8 +235,6 @@ radv_shader_compile_to_nir(struct radv_device *device,
>>
>> free(spec_entries);
>>
>> - NIR_PASS_V(nir, nir_lower_deref_instrs, ~0);
>> -
>> /* We have to lower away local constant initializers right before we
>> * inline functions. That way they get properly initialized at the top
>> * of the function and not at the top of its caller.
>> @@ -244,6 +242,7 @@ radv_shader_compile_to_nir(struct radv_device *device,
>> NIR_PASS_V(nir, nir_lower_constant_initializers, nir_var_local);
>> NIR_PASS_V(nir, nir_lower_returns);
>> NIR_PASS_V(nir, nir_inline_functions);
>> + NIR_PASS_V(nir, nir_copy_prop);
>>
>> /* Pick off the single entrypoint that we want */
>> foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
>> @@ -253,6 +252,8 @@ radv_shader_compile_to_nir(struct radv_device *device,
>> assert(exec_list_length(&nir->functions) == 1);
>> entry_point->name = ralloc_strdup(entry_point, "main");
>>
>> + NIR_PASS_V(nir, nir_lower_deref_instrs, ~0);
>> +
>> /* Make sure we lower constant initializers on output variables so that
>> * nir_remove_dead_variables below sees the corresponding stores
>> */
>> diff --git a/src/compiler/glsl/glsl_to_nir.cpp b/src/compiler/glsl/glsl_to_nir.cpp
>> index 79af4ce..b2602ea 100644
>> --- a/src/compiler/glsl/glsl_to_nir.cpp
>> +++ b/src/compiler/glsl/glsl_to_nir.cpp
>> @@ -516,7 +516,6 @@ nir_visitor::visit(ir_function_signature *ir)
>>
>> assert(strcmp(func->name, "main") == 0);
>> assert(ir->parameters.is_empty());
>> - assert(func->return_type == glsl_type::void_type);
>>
>> this->is_global = false;
>>
>> diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c
>> index df8214d..801e569 100644
>> --- a/src/compiler/nir/nir.c
>> +++ b/src/compiler/nir/nir.c
>> @@ -127,10 +127,6 @@ nir_shader_add_variable(nir_shader *shader, nir_variable *var)
>> assert(!"nir_shader_add_variable cannot be used for local variables");
>> break;
>>
>> - case nir_var_param:
>> - assert(!"nir_shader_add_variable cannot be used for function parameters");
>> - break;
>> -
>> case nir_var_global:
>> exec_list_push_tail(&shader->globals, &var->node);
>> break;
>> @@ -207,7 +203,6 @@ nir_function_create(nir_shader *shader, const char *name)
>> func->shader = shader;
>> func->num_params = 0;
>> func->params = NULL;
>> - func->return_type = glsl_void_type();
>> func->impl = NULL;
>>
>> return func;
>> @@ -291,9 +286,6 @@ nir_function_impl_create_bare(nir_shader *shader)
>> exec_list_make_empty(&impl->body);
>> exec_list_make_empty(&impl->registers);
>> exec_list_make_empty(&impl->locals);
>> - impl->num_params = 0;
>> - impl->params = NULL;
>> - impl->return_var = NULL;
>> impl->reg_alloc = 0;
>> impl->ssa_alloc = 0;
>> impl->valid_metadata = nir_metadata_none;
>> @@ -322,26 +314,6 @@ nir_function_impl_create(nir_function *function)
>> function->impl = impl;
>> impl->function = function;
>>
>> - impl->num_params = function->num_params;
>> - impl->params = ralloc_array(function->shader,
>> - nir_variable *, impl->num_params);
>> -
>> - for (unsigned i = 0; i < impl->num_params; i++) {
>> - impl->params[i] = rzalloc(function->shader, nir_variable);
>> - impl->params[i]->type = function->params[i].type;
>> - impl->params[i]->data.mode = nir_var_param;
>> - impl->params[i]->data.location = i;
>> - }
>> -
>> - if (!glsl_type_is_void(function->return_type)) {
>> - impl->return_var = rzalloc(function->shader, nir_variable);
>> - impl->return_var->type = function->return_type;
>> - impl->return_var->data.mode = nir_var_param;
>> - impl->return_var->data.location = -1;
>> - } else {
>> - impl->return_var = NULL;
>> - }
>> -
>> return impl;
>> }
>>
>> @@ -539,13 +511,16 @@ nir_intrinsic_instr_create(nir_shader *shader, nir_intrinsic_op op)
>> nir_call_instr *
>> nir_call_instr_create(nir_shader *shader, nir_function *callee)
>> {
>> - nir_call_instr *instr = ralloc(shader, nir_call_instr);
>> - instr_init(&instr->instr, nir_instr_type_call);
>> + const unsigned num_params = callee->num_params;
>> + nir_call_instr *instr =
>> + rzalloc_size(shader, sizeof(*instr) +
>> + num_params * sizeof(instr->params[0]));
>>
>> + instr_init(&instr->instr, nir_instr_type_call);
>> instr->callee = callee;
>> - instr->num_params = callee->num_params;
>> - instr->params = ralloc_array(instr, nir_deref_var *, instr->num_params);
>> - instr->return_deref = NULL;
>> + instr->num_params = num_params;
>> + for (unsigned i = 0; i < num_params; i++)
>> + src_init(&instr->params[i]);
>>
>> return instr;
>> }
>> @@ -1441,6 +1416,17 @@ visit_intrinsic_src(nir_intrinsic_instr *instr, nir_foreach_src_cb cb,
>> }
>>
>> static bool
>> +visit_call_src(nir_call_instr *instr, nir_foreach_src_cb cb, void *state)
>> +{
>> + for (unsigned i = 0; i < instr->num_params; i++) {
>> + if (!visit_src(&instr->params[i], cb, state))
>> + return false;
>> + }
>> +
>> + return true;
>> +}
>> +
>> +static bool
>> visit_phi_src(nir_phi_instr *instr, nir_foreach_src_cb cb, void *state)
>> {
>> nir_foreach_phi_src(src, instr) {
>> @@ -1500,7 +1486,8 @@ nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state)
>> return false;
>> break;
>> case nir_instr_type_call:
>> - /* Call instructions have no regular sources */
>> + if (!visit_call_src(nir_instr_as_call(instr), cb, state))
>> + return false;
>> break;
>> case nir_instr_type_load_const:
>> /* Constant load instructions have no regular sources */
>> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
>> index c139dce..96c1a84 100644
>> --- a/src/compiler/nir/nir.h
>> +++ b/src/compiler/nir/nir.h
>> @@ -99,7 +99,6 @@ typedef enum {
>> nir_var_uniform = (1 << 4),
>> nir_var_shader_storage = (1 << 5),
>> nir_var_system_value = (1 << 6),
>> - nir_var_param = (1 << 7),
>> nir_var_shared = (1 << 8),
>> nir_var_all = ~0,
>> } nir_variable_mode;
>> @@ -387,7 +386,7 @@ typedef struct nir_variable {
>> static inline bool
>> nir_variable_is_global(const nir_variable *var)
>> {
>> - return var->data.mode != nir_var_local && var->data.mode != nir_var_param;
>> + return var->data.mode != nir_var_local;
>> }
>>
>> typedef struct nir_register {
>> @@ -1041,11 +1040,10 @@ nir_deref_instr_to_deref(nir_deref_instr *instr, void *mem_ctx);
>> typedef struct {
>> nir_instr instr;
>>
>> - unsigned num_params;
>> - nir_deref_var **params;
>> - nir_deref_var *return_deref;
>> -
>> struct nir_function *callee;
>> +
>> + unsigned num_params;
>> + nir_src params[];
>> } nir_call_instr;
>>
>> #include "nir_intrinsics.h"
>> @@ -1189,6 +1187,11 @@ typedef enum {
>> */
>> NIR_INTRINSIC_CLUSTER_SIZE = 11,
>>
>> + /**
>> + * Parameter index for a load_param intrinsic
>> + */
>> + NIR_INTRINSIC_PARAM_IDX = 12,
>> +
>> NIR_INTRINSIC_NUM_INDEX_FLAGS,
>>
>> } nir_intrinsic_index_flag;
>> @@ -1281,6 +1284,7 @@ INTRINSIC_IDX_ACCESSORS(component, COMPONENT, unsigned)
>> INTRINSIC_IDX_ACCESSORS(interp_mode, INTERP_MODE, unsigned)
>> INTRINSIC_IDX_ACCESSORS(reduction_op, REDUCTION_OP, unsigned)
>> INTRINSIC_IDX_ACCESSORS(cluster_size, CLUSTER_SIZE, unsigned)
>> +INTRINSIC_IDX_ACCESSORS(param_idx, PARAM_IDX, unsigned)
>>
>> /**
>> * \group texture information
>> @@ -1812,13 +1816,6 @@ typedef struct {
>> /** list for all local variables in the function */
>> struct exec_list locals;
>>
>> - /** array of variables used as parameters */
>> - unsigned num_params;
>> - nir_variable **params;
>> -
>> - /** variable used to hold the result of the function */
>> - nir_variable *return_var;
>> -
>> /** list of local registers in the function */
>> struct exec_list registers;
>>
>> @@ -1929,15 +1926,9 @@ nir_loop_last_block(nir_loop *loop)
>> return nir_cf_node_as_block(exec_node_data(nir_cf_node, tail, node));
>> }
>>
>> -typedef enum {
>> - nir_parameter_in,
>> - nir_parameter_out,
>> - nir_parameter_inout,
>> -} nir_parameter_type;
>> -
>> typedef struct {
>> - nir_parameter_type param_type;
>> - const struct glsl_type *type;
>> + uint8_t num_components;
>> + uint8_t bit_size;
>> } nir_parameter;
>>
>> typedef struct nir_function {
>> @@ -1948,7 +1939,6 @@ typedef struct nir_function {
>>
>> unsigned num_params;
>> nir_parameter *params;
>> - const struct glsl_type *return_type;
>>
>> /** The implementation of this function.
>> *
>> @@ -2111,7 +2101,6 @@ nir_shader_get_entrypoint(nir_shader *shader)
>> assert(exec_list_length(&shader->functions) == 1);
>> struct exec_node *func_node = exec_list_get_head(&shader->functions);
>> nir_function *func = exec_node_data(nir_function, func_node, node);
>> - assert(func->return_type == glsl_void_type());
>> assert(func->num_params == 0);
>> assert(func->impl);
>> return func->impl;
>> diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h
>> index da7a501..6a40e84 100644
>> --- a/src/compiler/nir/nir_builder.h
>> +++ b/src/compiler/nir/nir_builder.h
>> @@ -843,6 +843,22 @@ nir_copy_var(nir_builder *build, nir_variable *dest, nir_variable *src)
>> nir_builder_instr_insert(build, ©->instr);
>> }
>>
>> +static inline nir_ssa_def *
>> +nir_load_param(nir_builder *build, uint32_t param_idx)
>> +{
>> + assert(param_idx < build->impl->function->num_params);
>> + nir_parameter *param = &build->impl->function->params[param_idx];
>> +
>> + nir_intrinsic_instr *load =
>> + nir_intrinsic_instr_create(build->shader, nir_intrinsic_load_param);
>> + nir_intrinsic_set_param_idx(load, param_idx);
>> + load->num_components = param->num_components;
>> + nir_ssa_dest_init(&load->instr, &load->dest,
>> + param->num_components, param->bit_size, NULL);
>> + nir_builder_instr_insert(build, &load->instr);
>> + return &load->dest.ssa;
>> +}
>> +
>> #include "nir_builder_opcodes.h"
>>
>> static inline nir_ssa_def *
>> diff --git a/src/compiler/nir/nir_clone.c b/src/compiler/nir/nir_clone.c
>> index 76121d0..4769fbd 100644
>> --- a/src/compiler/nir/nir_clone.c
>> +++ b/src/compiler/nir/nir_clone.c
>> @@ -536,10 +536,7 @@ clone_call(clone_state *state, const nir_call_instr *call)
>> nir_call_instr *ncall = nir_call_instr_create(state->ns, ncallee);
>>
>> for (unsigned i = 0; i < ncall->num_params; i++)
>> - ncall->params[i] = clone_deref_var(state, call->params[i], &ncall->instr);
>> -
>> - ncall->return_deref = clone_deref_var(state, call->return_deref,
>> - &ncall->instr);
>> + __clone_src(state, ncall, &ncall->params[i], &call->params[i]);
>>
>> return ncall;
>> }
>> @@ -721,14 +718,6 @@ clone_function_impl(clone_state *state, const nir_function_impl *fi)
>> clone_reg_list(state, &nfi->registers, &fi->registers);
>> nfi->reg_alloc = fi->reg_alloc;
>>
>> - nfi->num_params = fi->num_params;
>> - nfi->params = ralloc_array(state->ns, nir_variable *, fi->num_params);
>> - for (unsigned i = 0; i < fi->num_params; i++) {
>> - nfi->params[i] = clone_variable(state, fi->params[i]);
>> - }
>> - if (fi->return_var)
>> - nfi->return_var = clone_variable(state, fi->return_var);
>> -
>> assert(list_empty(&state->phi_srcs));
>>
>> clone_cf_list(state, &nfi->body, &fi->body);
>> @@ -770,8 +759,6 @@ clone_function(clone_state *state, const nir_function *fxn, nir_shader *ns)
>> nfxn->params = ralloc_array(state->ns, nir_parameter, fxn->num_params);
>> memcpy(nfxn->params, fxn->params, sizeof(nir_parameter) * fxn->num_params);
>>
>> - nfxn->return_type = fxn->return_type;
>> -
>> /* At first glance, it looks like we should clone the function_impl here.
>> * However, call instructions need to be able to reference at least the
>> * function and those will get processed as we clone the function_impls.
>> diff --git a/src/compiler/nir/nir_inline_functions.c b/src/compiler/nir/nir_inline_functions.c
>> index b91e7bc..06c90d9 100644
>> --- a/src/compiler/nir/nir_inline_functions.c
>> +++ b/src/compiler/nir/nir_inline_functions.c
>> @@ -24,126 +24,10 @@
>> #include "nir.h"
>> #include "nir_builder.h"
>> #include "nir_control_flow.h"
>> +#include "nir_vla.h"
>>
>> static bool inline_function_impl(nir_function_impl *impl, struct set *inlined);
>>
>> -static void
>> -convert_deref_to_param_deref(nir_instr *instr, nir_deref_var **deref,
>> - nir_call_instr *call)
>> -{
>> - /* This isn't a parameter, just return the deref */
>> - if ((*deref)->var->data.mode != nir_var_param)
>> - return;
>> -
>> - int param_idx = (*deref)->var->data.location;
>> -
>> - nir_deref_var *call_deref;
>> - if (param_idx >= 0) {
>> - assert(param_idx < call->callee->num_params);
>> - call_deref = call->params[param_idx];
>> - } else {
>> - call_deref = call->return_deref;
>> - }
>> - assert(call_deref);
>> -
>> - /* Now we make a new deref by concatenating the deref in the call's
>> - * parameter with the deref we were given.
>> - */
>> - nir_deref_var *new_deref = nir_deref_var_clone(call_deref, instr);
>> - nir_deref *new_tail = nir_deref_tail(&new_deref->deref);
>> - new_tail->child = (*deref)->deref.child;
>> - ralloc_steal(new_tail, new_tail->child);
>> - *deref = new_deref;
>> -}
>> -
>> -static void
>> -rewrite_param_derefs(nir_instr *instr, nir_call_instr *call)
>> -{
>> - switch (instr->type) {
>> - case nir_instr_type_intrinsic: {
>> - nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
>> -
>> - for (unsigned i = 0;
>> - i < nir_intrinsic_infos[intrin->intrinsic].num_variables; i++) {
>> - convert_deref_to_param_deref(instr, &intrin->variables[i], call);
>> - }
>> - break;
>> - }
>> -
>> - case nir_instr_type_tex: {
>> - nir_tex_instr *tex = nir_instr_as_tex(instr);
>> - if (tex->texture)
>> - convert_deref_to_param_deref(&tex->instr, &tex->texture, call);
>> - if (tex->sampler)
>> - convert_deref_to_param_deref(&tex->instr, &tex->sampler, call);
>> - break;
>> - }
>> -
>> - default:
>> - break; /* Nothing else has derefs */
>> - }
>> -}
>> -
>> -static void
>> -lower_param_to_local(nir_variable *param, nir_function_impl *impl, bool write)
>> -{
>> - if (param->data.mode != nir_var_param)
>> - return;
>> -
>> - nir_parameter_type param_type;
>> - if (param->data.location >= 0) {
>> - assert(param->data.location < impl->num_params);
>> - param_type = impl->function->params[param->data.location].param_type;
>> - } else {
>> - /* Return variable */
>> - param_type = nir_parameter_out;
>> - }
>> -
>> - if ((write && param_type == nir_parameter_in) ||
>> - (!write && param_type == nir_parameter_out)) {
>> - /* In this case, we need a shadow copy. Turn it into a local */
>> - param->data.mode = nir_var_local;
>> - exec_list_push_tail(&impl->locals, ¶m->node);
>> - }
>> -}
>> -
>> -static bool
>> -lower_params_to_locals_block(nir_block *block, nir_function_impl *impl)
>> -{
>> - nir_foreach_instr(instr, block) {
>> - if (instr->type != nir_instr_type_intrinsic)
>> - continue;
>> -
>> - nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
>> -
>> - switch (intrin->intrinsic) {
>> - case nir_intrinsic_store_var:
>> - lower_param_to_local(intrin->variables[0]->var, impl, true);
>> - break;
>> -
>> - case nir_intrinsic_copy_var:
>> - lower_param_to_local(intrin->variables[0]->var, impl, true);
>> - lower_param_to_local(intrin->variables[1]->var, impl, false);
>> - break;
>> -
>> - case nir_intrinsic_load_var:
>> - /* All other intrinsics which access variables (image_load_store)
>> - * do so in a read-only fasion.
>> - */
>> - for (unsigned i = 0;
>> - i < nir_intrinsic_infos[intrin->intrinsic].num_variables; i++) {
>> - lower_param_to_local(intrin->variables[i]->var, impl, false);
>> - }
>> - break;
>> -
>> - default:
>> - continue;
>> - }
>> - }
>> -
>> - return true;
>> -}
>> -
>> static bool
>> inline_functions_block(nir_block *block, nir_builder *b,
>> struct set *inlined)
>> @@ -171,42 +55,43 @@ inline_functions_block(nir_block *block, nir_builder *b,
>> nir_function_impl_clone(call->callee->impl);
>> callee_copy->function = call->callee;
>>
>> - /* Add copies of all in parameters */
>> - assert(call->num_params == callee_copy->num_params);
>> -
>> exec_list_append(&b->impl->locals, &callee_copy->locals);
>> exec_list_append(&b->impl->registers, &callee_copy->registers);
>>
>> b->cursor = nir_before_instr(&call->instr);
>>
>> - /* We now need to tie the two functions together using the
>> - * parameters. There are two ways we do this: One is to turn the
>> - * parameter into a local variable and do a shadow-copy. The other
>> - * is to treat the parameter as a "proxy" and rewrite derefs to use
>> - * the actual variable that comes from the call instruction. We
>> - * implement both schemes. The first is needed in the case where we
>> - * have an in parameter that we write or similar. The second case is
>> - * needed for handling things such as images and uniforms properly.
>> + /* Rewrite all of the uses of the callee's parameters to use the call
>> + * instructions sources. In order to ensure that the "load" happens
>> + * here and not later (for register sources), we make sure to convert it
>> + * to an SSA value first.
>> */
>> -
>> - /* Figure out when we need to lower to a shadow local */
>> - nir_foreach_block(block, callee_copy) {
>> - lower_params_to_locals_block(block, callee_copy);
>> - }
>> -
>> - for (unsigned i = 0; i < callee_copy->num_params; i++) {
>> - nir_variable *param = callee_copy->params[i];
>> -
>> - if (param->data.mode == nir_var_local &&
>> - call->callee->params[i].param_type != nir_parameter_out) {
>> - nir_copy_deref_var(b, nir_deref_var_create(b->shader, param),
>> - call->params[i]);
>> - }
>> + const unsigned num_params = call->num_params;
>> + NIR_VLA(nir_ssa_def *, params, num_params);
>> + for (unsigned i = 0; i < num_params; i++) {
>> + params[i] = nir_ssa_for_src(b, call->params[i],
>> + call->callee->params[i].num_components);
>> }
>>
>> nir_foreach_block(block, callee_copy) {
>> - nir_foreach_instr(instr, block)
>> - rewrite_param_derefs(instr, call);
>> + nir_foreach_instr_safe(instr, block) {
>> + if (instr->type != nir_instr_type_intrinsic)
>> + continue;
>> +
>> + nir_intrinsic_instr *load = nir_instr_as_intrinsic(instr);
>> + if (load->intrinsic != nir_intrinsic_load_param)
>> + continue;
>> +
>> + unsigned param_idx = nir_intrinsic_param_idx(load);
>> + assert(param_idx < num_params);
>> + assert(load->dest.is_ssa);
>> + nir_ssa_def_rewrite_uses(&load->dest.ssa,
>> + nir_src_for_ssa(params[param_idx]));
>> +
>> + /* Remove any left-over load_param intrinsics because they're soon
>> + * to be in another function and therefore no longer valid.
>> + */
>> + nir_instr_remove(&load->instr);
>> + }
>> }
>>
>> /* Pluck the body out of the function and place it here */
>> @@ -214,26 +99,6 @@ inline_functions_block(nir_block *block, nir_builder *b,
>> nir_cf_list_extract(&body, &callee_copy->body);
>> nir_cf_reinsert(&body, b->cursor);
>>
>> - b->cursor = nir_before_instr(&call->instr);
>> -
>> - /* Add copies of all out parameters and the return */
>> - assert(call->num_params == callee_copy->num_params);
>> - for (unsigned i = 0; i < callee_copy->num_params; i++) {
>> - nir_variable *param = callee_copy->params[i];
>> -
>> - if (param->data.mode == nir_var_local &&
>> - call->callee->params[i].param_type != nir_parameter_in) {
>> - nir_copy_deref_var(b, call->params[i],
>> - nir_deref_var_create(b->shader, param));
>> - }
>> - }
>> - if (!glsl_type_is_void(call->callee->return_type) &&
>> - callee_copy->return_var->data.mode == nir_var_local) {
>> - nir_copy_deref_var(b, call->return_deref,
>> - nir_deref_var_create(b->shader,
>> - callee_copy->return_var));
>> - }
>> -
>> nir_instr_remove(&call->instr);
>> }
>>
>> diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py
>> index 758bdf5..1dec88b 100644
>> --- a/src/compiler/nir/nir_intrinsics.py
>> +++ b/src/compiler/nir/nir_intrinsics.py
>> @@ -102,6 +102,8 @@ INTERP_MODE = "NIR_INTRINSIC_INTERP_MODE"
>> REDUCTION_OP = "NIR_INTRINSIC_REDUCTION_OP"
>> # Cluster size for reduction operations
>> CLUSTER_SIZE = "NIR_INTRINSIC_CLUSTER_SIZE"
>> +# Parameter index for a load_param intrinsic
>> +PARAM_IDX = "NIR_INTRINSIC_PARAM_IDX"
>>
>> #
>> # Possible flags:
>> @@ -120,6 +122,8 @@ def intrinsic(name, src_comp=[], dest_comp=-1, num_vars=0, indices=[],
>>
>> intrinsic("nop", flags=[CAN_ELIMINATE])
>>
>> +intrinsic("load_param", dest_comp=0, indices=[PARAM_IDX], flags=[CAN_ELIMINATE])
>> +
>> intrinsic("load_var", dest_comp=0, num_vars=1, flags=[CAN_ELIMINATE])
>> intrinsic("store_var", src_comp=[0], num_vars=1, indices=[WRMASK])
>> intrinsic("copy_var", num_vars=2)
>> diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c
>> index dede585..bc39b63 100644
>> --- a/src/compiler/nir/nir_print.c
>> +++ b/src/compiler/nir/nir_print.c
>> @@ -410,7 +410,6 @@ get_variable_mode_str(nir_variable_mode mode, bool want_local_global_mode)
>> return "system";
>> case nir_var_shared:
>> return "shared";
>> - case nir_var_param:
>> case nir_var_global:
>> return want_local_global_mode ? "global" : "";
>> case nir_var_local:
>> @@ -651,14 +650,6 @@ print_var(nir_variable *var, print_state *state)
>> }
>>
>> static void
>> -print_arg(nir_variable *var, print_state *state)
>> -{
>> - FILE *fp = state->fp;
>> - fprintf(fp, "%s %s", glsl_get_type_name(var->type),
>> - get_var_name(var, state));
>> -}
>> -
>> -static void
>> print_deref_var(nir_deref_var *deref, print_state *state)
>> {
>> print_var(deref->var, state);
>> @@ -779,6 +770,7 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
>> [NIR_INTRINSIC_INTERP_MODE] = "interp_mode",
>> [NIR_INTRINSIC_REDUCTION_OP] = "reduction_op",
>> [NIR_INTRINSIC_CLUSTER_SIZE] = "cluster_size",
>> + [NIR_INTRINSIC_PARAM_IDX] = "param_idx",
>> };
>> for (unsigned idx = 1; idx < NIR_INTRINSIC_NUM_INDEX_FLAGS; idx++) {
>> if (!info->index_map[idx])
>> @@ -978,14 +970,7 @@ print_call_instr(nir_call_instr *instr, print_state *state)
>> if (i != 0)
>> fprintf(fp, ", ");
>>
>> - print_deref(instr->params[i], state);
>> - }
>> -
>> - if (instr->return_deref != NULL) {
>> - if (instr->num_params != 0)
>> - fprintf(fp, ", ");
>> - fprintf(fp, "returning ");
>> - print_deref(instr->return_deref, state);
>> + print_src(&instr->params[i], state);
>> }
>> }
>>
>> @@ -1260,20 +1245,6 @@ print_function_impl(nir_function_impl *impl, print_state *state)
>>
>> fprintf(fp, "\nimpl %s ", impl->function->name);
>>
>> - for (unsigned i = 0; i < impl->num_params; i++) {
>> - if (i != 0)
>> - fprintf(fp, ", ");
>> -
>> - print_arg(impl->params[i], state);
>> - }
>> -
>> - if (impl->return_var != NULL) {
>> - if (impl->num_params != 0)
>> - fprintf(fp, ", ");
>> - fprintf(fp, "returning ");
>> - print_arg(impl->return_var, state);
>> - }
>> -
>> fprintf(fp, "{\n");
>>
>> nir_foreach_variable(var, &impl->locals) {
>> @@ -1300,34 +1271,8 @@ print_function(nir_function *function, print_state *state)
>> {
>> FILE *fp = state->fp;
>>
>> - fprintf(fp, "decl_function %s ", function->name);
>> -
>> - for (unsigned i = 0; i < function->num_params; i++) {
>> - if (i != 0)
>> - fprintf(fp, ", ");
>> -
>> - switch (function->params[i].param_type) {
>> - case nir_parameter_in:
>> - fprintf(fp, "in ");
>> - break;
>> - case nir_parameter_out:
>> - fprintf(fp, "out ");
>> - break;
>> - case nir_parameter_inout:
>> - fprintf(fp, "inout ");
>> - break;
>> - default:
>> - unreachable("Invalid parameter type");
>> - }
>> -
>> - fprintf(fp, "%s", glsl_get_type_name(function->params[i].type));
>> - }
>> -
>> - if (function->return_type != NULL) {
>> - if (function->num_params != 0)
>> - fprintf(fp, ", ");
>> - fprintf(fp, "returning %s", glsl_get_type_name(function->return_type));
>> - }
>> + fprintf(fp, "decl_function %s (%d params)", function->name,
>> + function->num_params);
>>
>> fprintf(fp, "\n");
>>
>> diff --git a/src/compiler/nir/nir_remove_dead_variables.c b/src/compiler/nir/nir_remove_dead_variables.c
>> index 89e544f..41dddd9 100644
>> --- a/src/compiler/nir/nir_remove_dead_variables.c
>> +++ b/src/compiler/nir/nir_remove_dead_variables.c
>> @@ -52,7 +52,7 @@ deref_used_for_not_store(nir_deref_instr *deref)
>>
>> default:
>> /* If it's used by any other instruction type (most likely a texture
>> - * instruction), consider it used.
>> + * or call instruction), consider it used.
>> */
>> return true;
>> }
>> @@ -114,20 +114,6 @@ add_var_use_intrinsic(nir_intrinsic_instr *instr, struct set *live,
>> }
>>
>> static void
>> -add_var_use_call(nir_call_instr *instr, struct set *live)
>> -{
>> - if (instr->return_deref != NULL) {
>> - nir_variable *var = instr->return_deref->var;
>> - _mesa_set_add(live, var);
>> - }
>> -
>> - for (unsigned i = 0; i < instr->num_params; i++) {
>> - nir_variable *var = instr->params[i]->var;
>> - _mesa_set_add(live, var);
>> - }
>> -}
>> -
>> -static void
>> add_var_use_tex(nir_tex_instr *instr, struct set *live)
>> {
>> if (instr->texture != NULL) {
>> @@ -158,10 +144,6 @@ add_var_use_shader(nir_shader *shader, struct set *live, nir_variable_mode modes
>> modes);
>> break;
>>
>> - case nir_instr_type_call:
>> - add_var_use_call(nir_instr_as_call(instr), live);
>> - break;
>> -
>> case nir_instr_type_tex:
>> add_var_use_tex(nir_instr_as_tex(instr), live);
>> break;
>> diff --git a/src/compiler/nir/nir_serialize.c b/src/compiler/nir/nir_serialize.c
>> index 39f6d82..3769910 100644
>> --- a/src/compiler/nir/nir_serialize.c
>> +++ b/src/compiler/nir/nir_serialize.c
>> @@ -863,9 +863,7 @@ write_call(write_ctx *ctx, const nir_call_instr *call)
>> blob_write_intptr(ctx->blob, write_lookup_object(ctx, call->callee));
>>
>> for (unsigned i = 0; i < call->num_params; i++)
>> - write_deref_chain(ctx, call->params[i]);
>> -
>> - write_deref_chain(ctx, call->return_deref);
>> + write_src(ctx, &call->params[i]);
>> }
>>
>> static nir_call_instr *
>> @@ -875,9 +873,7 @@ read_call(read_ctx *ctx)
>> nir_call_instr *call = nir_call_instr_create(ctx->nir, callee);
>>
>> for (unsigned i = 0; i < call->num_params; i++)
>> - call->params[i] = read_deref_chain(ctx, &call->instr);
>> -
>> - call->return_deref = read_deref_chain(ctx, &call->instr);
>> + read_src(ctx, &call->params[i], call);
>>
>> return call;
>> }
>> @@ -1102,15 +1098,6 @@ write_function_impl(write_ctx *ctx, const nir_function_impl *fi)
>> write_reg_list(ctx, &fi->registers);
>> blob_write_uint32(ctx->blob, fi->reg_alloc);
>>
>> - blob_write_uint32(ctx->blob, fi->num_params);
>> - for (unsigned i = 0; i < fi->num_params; i++) {
>> - write_variable(ctx, fi->params[i]);
>> - }
>> -
>> - blob_write_uint32(ctx->blob, !!(fi->return_var));
>> - if (fi->return_var)
>> - write_variable(ctx, fi->return_var);
>> -
>> write_cf_list(ctx, &fi->body);
>> write_fixup_phis(ctx);
>> }
>> @@ -1125,17 +1112,6 @@ read_function_impl(read_ctx *ctx, nir_function *fxn)
>> read_reg_list(ctx, &fi->registers);
>> fi->reg_alloc = blob_read_uint32(ctx->blob);
>>
>> - fi->num_params = blob_read_uint32(ctx->blob);
>> - for (unsigned i = 0; i < fi->num_params; i++) {
>> - fi->params[i] = read_variable(ctx);
>> - }
>> -
>> - bool has_return = blob_read_uint32(ctx->blob);
>> - if (has_return)
>> - fi->return_var = read_variable(ctx);
>> - else
>> - fi->return_var = NULL;
>> -
>> read_cf_list(ctx, &fi->body);
>> read_fixup_phis(ctx);
>>
>> @@ -1155,12 +1131,12 @@ write_function(write_ctx *ctx, const nir_function *fxn)
>>
>> blob_write_uint32(ctx->blob, fxn->num_params);
>> for (unsigned i = 0; i < fxn->num_params; i++) {
>> - blob_write_uint32(ctx->blob, fxn->params[i].param_type);
>> - encode_type_to_blob(ctx->blob, fxn->params[i].type);
>> + uint32_t val =
>> + ((uint32_t)fxn->params[i].num_components) |
>> + ((uint32_t)fxn->params[i].bit_size) << 8;
>> + blob_write_uint32(ctx->blob, val);
>> }
>>
>> - encode_type_to_blob(ctx->blob, fxn->return_type);
>> -
>> /* At first glance, it looks like we should write the function_impl here.
>> * However, call instructions need to be able to reference at least the
>> * function and those will get processed as we write the function_impls.
>> @@ -1179,12 +1155,12 @@ read_function(read_ctx *ctx)
>> read_add_object(ctx, fxn);
>>
>> fxn->num_params = blob_read_uint32(ctx->blob);
>> + fxn->params = ralloc_array(fxn, nir_parameter, fxn->num_params);
>> for (unsigned i = 0; i < fxn->num_params; i++) {
>> - fxn->params[i].param_type = blob_read_uint32(ctx->blob);
>> - fxn->params[i].type = decode_type_from_blob(ctx->blob);
>> + uint32_t val = blob_read_uint32(ctx->blob);
>> + fxn->params[i].num_components = val & 0xff;
>> + fxn->params[i].bit_size = (val >> 8) & 0xff;
>> }
>> -
>> - fxn->return_type = decode_type_from_blob(ctx->blob);
>> }
>>
>> void
>> diff --git a/src/compiler/nir/nir_sweep.c b/src/compiler/nir/nir_sweep.c
>> index 0f1debc..b14bf13 100644
>> --- a/src/compiler/nir/nir_sweep.c
>> +++ b/src/compiler/nir/nir_sweep.c
>> @@ -118,10 +118,6 @@ sweep_impl(nir_shader *nir, nir_function_impl *impl)
>> {
>> ralloc_steal(nir, impl);
>>
>> - ralloc_steal(nir, impl->params);
>> - for (unsigned i = 0; i < impl->num_params; i++)
>> - ralloc_steal(nir, impl->params[i]);
>> - ralloc_steal(nir, impl->return_var);
>> steal_list(nir, nir_variable, &impl->locals);
>> steal_list(nir, nir_register, &impl->registers);
>>
>> diff --git a/src/compiler/nir/nir_validate.c b/src/compiler/nir/nir_validate.c
>> index 224a316..51f78e9 100644
>> --- a/src/compiler/nir/nir_validate.c
>> +++ b/src/compiler/nir/nir_validate.c
>> @@ -555,6 +555,15 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
>> unsigned dest_bit_size = 0;
>> unsigned src_bit_sizes[NIR_INTRINSIC_MAX_INPUTS] = { 0, };
>> switch (instr->intrinsic) {
>> + case nir_intrinsic_load_param: {
>> + unsigned param_idx = nir_intrinsic_param_idx(instr);
>> + validate_assert(state, param_idx < state->impl->function->num_params);
>> + nir_parameter *param = &state->impl->function->params[param_idx];
>> + validate_assert(state, instr->num_components == param->num_components);
>> + dest_bit_size = param->bit_size;
>> + break;
>> + }
>> +
>> case nir_intrinsic_load_deref: {
>> nir_deref_instr *src = nir_src_as_deref(instr->src[0]);
>> validate_assert(state, glsl_type_is_vector_or_scalar(src->type) ||
>> @@ -677,18 +686,12 @@ validate_tex_instr(nir_tex_instr *instr, validate_state *state)
>> static void
>> validate_call_instr(nir_call_instr *instr, validate_state *state)
>> {
>> - if (instr->return_deref == NULL) {
>> - validate_assert(state, glsl_type_is_void(instr->callee->return_type));
>> - } else {
>> - validate_assert(state, instr->return_deref->deref.type == instr->callee->return_type);
>> - validate_deref_var(instr, instr->return_deref, state);
>> - }
>> -
>> validate_assert(state, instr->num_params == instr->callee->num_params);
>>
>> for (unsigned i = 0; i < instr->num_params; i++) {
>> - validate_assert(state, instr->callee->params[i].type == instr->params[i]->deref.type);
>> - validate_deref_var(instr, instr->params[i], state);
>> + validate_src(&instr->params[i], state,
>> + instr->callee->params[i].bit_size,
>> + instr->callee->params[i].num_components);
>> }
>> }
>>
>> @@ -1175,23 +1178,6 @@ validate_function_impl(nir_function_impl *impl, validate_state *state)
>> validate_assert(state, impl->function->impl == impl);
>> validate_assert(state, impl->cf_node.parent == NULL);
>>
>> - validate_assert(state, impl->num_params == impl->function->num_params);
>> - for (unsigned i = 0; i < impl->num_params; i++) {
>> - validate_assert(state, impl->params[i]->type == impl->function->params[i].type);
>> - validate_assert(state, impl->params[i]->data.mode == nir_var_param);
>> - validate_assert(state, impl->params[i]->data.location == i);
>> - validate_var_decl(impl->params[i], false, state);
>> - }
>> -
>> - if (glsl_type_is_void(impl->function->return_type)) {
>> - validate_assert(state, impl->return_var == NULL);
>> - } else {
>> - validate_assert(state, impl->return_var->type == impl->function->return_type);
>> - validate_assert(state, impl->return_var->data.mode == nir_var_param);
>> - validate_assert(state, impl->return_var->data.location == -1);
>> - validate_var_decl(impl->return_var, false, state);
>> - }
>> -
>> validate_assert(state, exec_list_is_empty(&impl->end_block->instr_list));
>> validate_assert(state, impl->end_block->successors[0] == NULL);
>> validate_assert(state, impl->end_block->successors[1] == NULL);
>> diff --git a/src/compiler/spirv/spirv_to_nir.c b/src/compiler/spirv/spirv_to_nir.c
>> index 135c967..c424b61 100644
>> --- a/src/compiler/spirv/spirv_to_nir.c
>> +++ b/src/compiler/spirv/spirv_to_nir.c
>> @@ -1783,42 +1783,54 @@ vtn_handle_function_call(struct vtn_builder *b, SpvOp opcode,
>> vtn_callee->referenced = true;
>>
>> nir_call_instr *call = nir_call_instr_create(b->nb.shader, callee);
>> - for (unsigned i = 0; i < call->num_params; i++) {
>> - unsigned arg_id = w[4 + i];
>> - struct vtn_value *arg = vtn_untyped_value(b, arg_id);
>> - if (arg->value_type == vtn_value_type_pointer &&
>> - arg->pointer->ptr_type->type == NULL) {
>> - nir_deref_var *d = vtn_pointer_to_deref_var(b, arg->pointer);
>> - call->params[i] = nir_deref_var_clone(d, call);
>> - } else {
>> - struct vtn_ssa_value *arg_ssa = vtn_ssa_value(b, arg_id);
>>
>> - /* Make a temporary to store the argument in */
>> - nir_variable *tmp =
>> - nir_local_variable_create(b->nb.impl, arg_ssa->type, "arg_tmp");
>> - call->params[i] = nir_deref_var_create(call, tmp);
>> + unsigned param_idx = 0;
>>
>> - vtn_local_store(b, arg_ssa,
>> - nir_build_deref_for_chain(&b->nb, call->params[i]));
>> - }
>> + nir_deref_instr *ret_deref = NULL;
>> + struct vtn_type *ret_type = vtn_callee->type->return_type;
>> + if (ret_type->base_type != vtn_base_type_void) {
>> + nir_variable *ret_tmp =
>> + nir_local_variable_create(b->nb.impl, ret_type->type, "return_tmp");
>> + ret_deref = nir_build_deref_var(&b->nb, ret_tmp);
>> + call->params[param_idx++] = nir_src_for_ssa(&ret_deref->dest.ssa);
>> }
>>
>> - nir_variable *out_tmp = NULL;
>> - vtn_assert(res_type->type == callee->return_type);
>> - if (!glsl_type_is_void(callee->return_type)) {
>> - out_tmp = nir_local_variable_create(b->nb.impl, callee->return_type,
>> - "out_tmp");
>> - call->return_deref = nir_deref_var_create(call, out_tmp);
>> + for (unsigned i = 0; i < vtn_callee->type->length; i++) {
>> + struct vtn_type *arg_type = vtn_callee->type->params[i];
>> + unsigned arg_id = w[4 + i];
>> +
>> + if (arg_type->base_type == vtn_base_type_sampled_image) {
>> + struct vtn_sampled_image *sampled_image =
>> + vtn_value(b, arg_id, vtn_value_type_sampled_image)->sampled_image;
>> +
>> + call->params[param_idx++] =
>> + nir_src_for_ssa(&sampled_image->image->deref->dest.ssa);
>> + call->params[param_idx++] =
>> + nir_src_for_ssa(&sampled_image->sampler->deref->dest.ssa);
>> + } else if (arg_type->base_type == vtn_base_type_pointer ||
>> + arg_type->base_type == vtn_base_type_image ||
>> + arg_type->base_type == vtn_base_type_sampler) {
>> + struct vtn_pointer *pointer =
>> + vtn_value(b, arg_id, vtn_value_type_pointer)->pointer;
>> + call->params[param_idx++] =
>> + nir_src_for_ssa(vtn_pointer_to_ssa(b, pointer));
>> + } else {
>> + /* This is a regular SSA value and we need a temporary */
>> + nir_variable *tmp =
>> + nir_local_variable_create(b->nb.impl, arg_type->type, "arg_tmp");
>> + nir_deref_instr *tmp_deref = nir_build_deref_var(&b->nb, tmp);
>> + vtn_local_store(b, vtn_ssa_value(b, arg_id), tmp_deref);
>> + call->params[param_idx++] = nir_src_for_ssa(&tmp_deref->dest.ssa);
>> + }
>> }
>> + assert(param_idx == call->num_params);
>>
>> nir_builder_instr_insert(&b->nb, &call->instr);
>>
>> - if (glsl_type_is_void(callee->return_type)) {
>> + if (ret_type->base_type == vtn_base_type_void) {
>> vtn_push_value(b, w[2], vtn_value_type_undef);
>> } else {
>> - nir_deref_instr *return_deref =
>> - nir_build_deref_for_chain(&b->nb, call->return_deref);
>> - vtn_push_ssa(b, w[2], res_type, vtn_local_load(b, return_deref));
>> + vtn_push_ssa(b, w[2], res_type, vtn_local_load(b, ret_deref));
>> }
>> }
>>
>> diff --git a/src/compiler/spirv/vtn_cfg.c b/src/compiler/spirv/vtn_cfg.c
>> index 6c1fccf..6c411c8 100644
>> --- a/src/compiler/spirv/vtn_cfg.c
>> +++ b/src/compiler/spirv/vtn_cfg.c
>> @@ -25,18 +25,21 @@
>> #include "nir/nir_vla.h"
>>
>> static struct vtn_pointer *
>> -vtn_pointer_for_image_or_sampler_variable(struct vtn_builder *b,
>> - struct vtn_variable *var)
>> +vtn_load_param_pointer(struct vtn_builder *b,
>> + struct vtn_type *param_type,
>> + uint32_t param_idx)
>> {
>> - assert(var->type->base_type == vtn_base_type_image ||
>> - var->type->base_type == vtn_base_type_sampler);
>> -
>> - struct vtn_type *ptr_type = rzalloc(b, struct vtn_type);
>> - ptr_type->base_type = vtn_base_type_pointer;
>> - ptr_type->storage_class = SpvStorageClassUniformConstant;
>> - ptr_type->deref = var->type;
>> + struct vtn_type *ptr_type = param_type;
>> + if (param_type->base_type != vtn_base_type_pointer) {
>> + assert(param_type->base_type == vtn_base_type_image ||
>> + param_type->base_type == vtn_base_type_sampler);
>> + ptr_type = rzalloc(b, struct vtn_type);
>> + ptr_type->base_type = vtn_base_type_pointer;
>> + ptr_type->deref = param_type;
>> + ptr_type->storage_class = SpvStorageClassFunction;
>> + }
>>
>> - return vtn_pointer_for_variable(b, var, ptr_type);
>> + return vtn_pointer_from_ssa(b, nir_load_param(&b->nb, param_idx), ptr_type);
>> }
>>
>> static bool
>> @@ -64,41 +67,64 @@ vtn_cfg_handle_prepass_instruction(struct vtn_builder *b, SpvOp opcode,
>> nir_function *func =
>> nir_function_create(b->shader, ralloc_strdup(b->shader, val->name));
>>
>> - func->num_params = func_type->length;
>> - func->params = ralloc_array(b->shader, nir_parameter, func->num_params);
>> - unsigned np = 0;
>> + unsigned num_params = func_type->length;
>> + for (unsigned i = 0; i < func_type->length; i++) {
>> + /* Sampled images are actually two parameters */
>> + if (func_type->params[i]->base_type == vtn_base_type_sampled_image)
>> + num_params++;
>> + }
>> +
>> + /* Add one parameter for the function return value */
>> + if (func_type->return_type->base_type != vtn_base_type_void)
>> + num_params++;
>> +
>> + func->num_params = num_params;
>> + func->params = ralloc_array(b->shader, nir_parameter, num_params);
>> +
>> + unsigned idx = 0;
>> + if (func_type->return_type->base_type != vtn_base_type_void) {
>> + /* The return value is a regular pointer */
>> + func->params[idx++] = (nir_parameter) {
>> + .num_components = 1, .bit_size = 32,
>> + };
>> + }
>> +
>> for (unsigned i = 0; i < func_type->length; i++) {
>> - if (func_type->params[i]->base_type == vtn_base_type_pointer &&
>> - func_type->params[i]->type == NULL) {
>> - func->params[np].type = func_type->params[i]->deref->type;
>> - func->params[np].param_type = nir_parameter_inout;
>> - np++;
>> - } else if (func_type->params[i]->base_type ==
>> - vtn_base_type_sampled_image) {
>> - /* Sampled images are actually two parameters */
>> - func->params = reralloc(b->shader, func->params,
>> - nir_parameter, func->num_params++);
>> - func->params[np].type = func_type->params[i]->type;
>> - func->params[np].param_type = nir_parameter_in;
>> - np++;
>> - func->params[np].type = glsl_bare_sampler_type();
>> - func->params[np].param_type = nir_parameter_in;
>> - np++;
>> + if (func_type->params[i]->base_type == vtn_base_type_sampled_image) {
>> + /* Sampled images are two pointer parameters */
>> + func->params[idx++] = (nir_parameter) {
>> + .num_components = 1, .bit_size = 32,
>> + };
>> + func->params[idx++] = (nir_parameter) {
>> + .num_components = 1, .bit_size = 32,
>> + };
>> + } else if (func_type->params[i]->base_type == vtn_base_type_pointer &&
>> + func_type->params[i]->type != NULL) {
>> + /* Pointers with as storage class get passed by-value */
>> + assert(glsl_type_is_vector_or_scalar(func_type->params[i]->type));
>> + func->params[idx++] = (nir_parameter) {
>> + .num_components =
>> + glsl_get_vector_elements(func_type->params[i]->type),
>> + .bit_size = glsl_get_bit_size(func_type->params[i]->type),
>> + };
>> } else {
>> - func->params[np].type = func_type->params[i]->type;
>> - func->params[np].param_type = nir_parameter_in;
>> - np++;
>> + /* Everything else is a regular pointer */
>> + func->params[idx++] = (nir_parameter) {
>> + .num_components = 1, .bit_size = 32,
>> + };
>> }
>> }
>> - assert(np == func->num_params);
>> -
>> - func->return_type = func_type->return_type->type;
>> + assert(idx == num_params);
>>
>> b->func->impl = nir_function_impl_create(func);
>> nir_builder_init(&b->nb, func->impl);
>> b->nb.cursor = nir_before_cf_list(&b->func->impl->body);
>>
>> b->func_param_idx = 0;
>> +
>> + /* The return value is the first parameter */
>> + if (func_type->return_type->base_type != vtn_base_type_void)
>> + b->func_param_idx++;
>> break;
>> }
>>
>> @@ -110,92 +136,46 @@ vtn_cfg_handle_prepass_instruction(struct vtn_builder *b, SpvOp opcode,
>> case SpvOpFunctionParameter: {
>> struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type;
>>
>> - vtn_assert(b->func_param_idx < b->func->impl->num_params);
>> - nir_variable *param = b->func->impl->params[b->func_param_idx++];
>> -
>> - if (type->base_type == vtn_base_type_pointer && type->type == NULL) {
>> - struct vtn_variable *vtn_var = rzalloc(b, struct vtn_variable);
>> - vtn_var->type = type->deref;
>> - vtn_var->var = param;
>> -
>> - vtn_assert(vtn_var->type->type == param->type);
>> -
>> - struct vtn_type *without_array = vtn_var->type;
>> - while(glsl_type_is_array(without_array->type))
>> - without_array = without_array->array_element;
>> -
>> - if (glsl_type_is_image(without_array->type)) {
>> - vtn_var->mode = vtn_variable_mode_image;
>> - param->interface_type = without_array->type;
>> - } else if (glsl_type_is_sampler(without_array->type)) {
>> - vtn_var->mode = vtn_variable_mode_sampler;
>> - param->interface_type = without_array->type;
>> - } else {
>> - vtn_var->mode = vtn_variable_mode_param;
>> - }
>> + vtn_assert(b->func_param_idx < b->func->impl->function->num_params);
>>
>> + if (type->base_type == vtn_base_type_sampled_image) {
>> + /* Sampled images are actually two parameters. The first is the
>> + * image and the second is the sampler.
>> + */
>> + struct vtn_value *val =
>> + vtn_push_value(b, w[2], vtn_value_type_sampled_image);
>> +
>> + val->sampled_image = ralloc(b, struct vtn_sampled_image);
>> + val->sampled_image->type = type;
>> +
>> + struct vtn_type *sampler_type = rzalloc(b, struct vtn_type);
>> + sampler_type->base_type = vtn_base_type_sampler;
>> + sampler_type->type = glsl_bare_sampler_type();
>> +
>> + val->sampled_image->image =
>> + vtn_load_param_pointer(b, type, b->func_param_idx++);
>> + val->sampled_image->sampler =
>> + vtn_load_param_pointer(b, sampler_type, b->func_param_idx++);
>> + } else if (type->base_type == vtn_base_type_pointer &&
>> + type->type != NULL) {
>> + /* This is a pointer with an actual storage type */
>> struct vtn_value *val =
>> vtn_push_value(b, w[2], vtn_value_type_pointer);
>> -
>> - /* Name the parameter so it shows up nicely in NIR */
>> - param->name = ralloc_strdup(param, val->name);
>> -
>> - val->pointer = vtn_pointer_for_variable(b, vtn_var, type);
>> - } else if (type->base_type == vtn_base_type_image ||
>> - type->base_type == vtn_base_type_sampler ||
>> - type->base_type == vtn_base_type_sampled_image) {
>> - struct vtn_variable *vtn_var = rzalloc(b, struct vtn_variable);
>> - vtn_var->type = type;
>> - vtn_var->var = param;
>> - param->interface_type = param->type;
>> -
>> - if (type->base_type == vtn_base_type_sampled_image) {
>> - /* Sampled images are actually two parameters. The first is the
>> - * image and the second is the sampler.
>> - */
>> - struct vtn_value *val =
>> - vtn_push_value(b, w[2], vtn_value_type_sampled_image);
>> -
>> - /* Name the parameter so it shows up nicely in NIR */
>> - param->name = ralloc_strdup(param, val->name);
>> -
>> - /* Adjust the type of the image variable to the image type */
>> - vtn_var->type = type->image;
>> -
>> - /* Now get the sampler parameter and set up its variable */
>> - param = b->func->impl->params[b->func_param_idx++];
>> - struct vtn_variable *sampler_var = rzalloc(b, struct vtn_variable);
>> - sampler_var->type = rzalloc(b, struct vtn_type);
>> - sampler_var->type->base_type = vtn_base_type_sampler;
>> - sampler_var->type->type = glsl_bare_sampler_type();
>> - sampler_var->var = param;
>> - param->interface_type = param->type;
>> - param->name = ralloc_strdup(param, val->name);
>> -
>> - val->sampled_image = ralloc(b, struct vtn_sampled_image);
>> - val->sampled_image->type = type;
>> - val->sampled_image->image =
>> - vtn_pointer_for_image_or_sampler_variable(b, vtn_var);
>> - val->sampled_image->sampler =
>> - vtn_pointer_for_image_or_sampler_variable(b, sampler_var);
>> - } else {
>> - struct vtn_value *val =
>> - vtn_push_value(b, w[2], vtn_value_type_pointer);
>> -
>> - /* Name the parameter so it shows up nicely in NIR */
>> - param->name = ralloc_strdup(param, val->name);
>> -
>> - val->pointer =
>> - vtn_pointer_for_image_or_sampler_variable(b, vtn_var);
>> - }
>> + nir_ssa_def *ssa_ptr = nir_load_param(&b->nb, b->func_param_idx++);
>> + val->pointer = vtn_pointer_from_ssa(b, ssa_ptr, type);
>> + } else if (type->base_type == vtn_base_type_pointer ||
>> + type->base_type == vtn_base_type_image ||
>> + type->base_type == vtn_base_type_sampler) {
>> + struct vtn_value *val =
>> + vtn_push_value(b, w[2], vtn_value_type_pointer);
>> + val->pointer =
>> + vtn_load_param_pointer(b, type, b->func_param_idx++);
>> } else {
>> /* We're a regular SSA value. */
>> - struct vtn_ssa_value *param_ssa =
>> - vtn_local_load(b, nir_build_deref_var(&b->nb, param));
>> - struct vtn_value *val = vtn_push_ssa(b, w[2], type, param_ssa);
>> -
>> - /* Name the parameter so it shows up nicely in NIR */
>> - param->name = ralloc_strdup(param, val->name);
>> + nir_ssa_def *param_val = nir_load_param(&b->nb, b->func_param_idx++);
>> + nir_deref_instr *deref =
>> + nir_build_deref_cast(&b->nb, param_val, nir_var_local, type->type);
>> + vtn_push_ssa(b, w[2], type, vtn_local_load(b, deref));
>> }
>> break;
>> }
>> @@ -729,9 +709,14 @@ vtn_emit_cf_list(struct vtn_builder *b, struct list_head *cf_list,
>> nir_builder_instr_insert(&b->nb, &block->end_nop->instr);
>>
>> if ((*block->branch & SpvOpCodeMask) == SpvOpReturnValue) {
>> + vtn_fail_if(b->func->type->return_type->base_type ==
>> + vtn_base_type_void,
>> + "Return with a value from a function returning void");
>> struct vtn_ssa_value *src = vtn_ssa_value(b, block->branch[1]);
>> - vtn_local_store(b, src,
>> - nir_build_deref_var(&b->nb, b->nb.impl->return_var));
>> + nir_deref_instr *ret_deref =
>> + nir_build_deref_cast(&b->nb, nir_load_param(&b->nb, 0),
>> + nir_var_local, src->type);
>> + vtn_local_store(b, src, ret_deref);
>> }
>>
>> if (block->branch_type != vtn_branch_type_none) {
>> diff --git a/src/compiler/spirv/vtn_private.h b/src/compiler/spirv/vtn_private.h
>> index 6f2fcc7..42b1b30 100644
>> --- a/src/compiler/spirv/vtn_private.h
>> +++ b/src/compiler/spirv/vtn_private.h
>> @@ -403,7 +403,6 @@ struct vtn_access_chain {
>> enum vtn_variable_mode {
>> vtn_variable_mode_local,
>> vtn_variable_mode_global,
>> - vtn_variable_mode_param,
>> vtn_variable_mode_ubo,
>> vtn_variable_mode_ssbo,
>> vtn_variable_mode_push_constant,
>> @@ -678,8 +677,6 @@ struct vtn_pointer *vtn_pointer_for_variable(struct vtn_builder *b,
>> struct vtn_variable *var,
>> struct vtn_type *ptr_type);
>>
>> -nir_deref_var *vtn_pointer_to_deref_var(struct vtn_builder *b,
>> - struct vtn_pointer *ptr);
>> nir_deref_instr *vtn_pointer_to_deref(struct vtn_builder *b,
>> struct vtn_pointer *ptr);
>> nir_ssa_def *
>> diff --git a/src/compiler/spirv/vtn_variables.c b/src/compiler/spirv/vtn_variables.c
>> index b15a4cf..4489d76 100644
>> --- a/src/compiler/spirv/vtn_variables.c
>> +++ b/src/compiler/spirv/vtn_variables.c
>> @@ -416,12 +416,6 @@ vtn_pointer_to_deref(struct vtn_builder *b, struct vtn_pointer *ptr)
>> return tail;
>> }
>>
>> -nir_deref_var *
>> -vtn_pointer_to_deref_var(struct vtn_builder *b, struct vtn_pointer *ptr)
>> -{
>> - return nir_deref_instr_to_deref(vtn_pointer_to_deref(b, ptr), b);
>> -}
>> -
>> static void
>> _vtn_local_load_store(struct vtn_builder *b, bool load, nir_deref_instr *deref,
>> struct vtn_ssa_value *inout)
>> @@ -1743,9 +1737,6 @@ vtn_create_variable(struct vtn_builder *b, struct vtn_value *val,
>> break;
>> }
>>
>> - case vtn_variable_mode_param:
>> - vtn_fail("Not created through OpVariable");
>> -
>> case vtn_variable_mode_ubo:
>> case vtn_variable_mode_ssbo:
>> case vtn_variable_mode_push_constant:
>> diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
>> index 20b0804..ee576f3 100644
>> --- a/src/intel/vulkan/anv_pipeline.c
>> +++ b/src/intel/vulkan/anv_pipeline.c
>> @@ -172,8 +172,6 @@ anv_shader_compile_to_nir(struct anv_pipeline *pipeline,
>> nir_print_shader(nir, stderr);
>> }
>>
>> - NIR_PASS_V(nir, nir_lower_deref_instrs, ~0);
>> -
>> /* We have to lower away local constant initializers right before we
>> * inline functions. That way they get properly initialized at the top
>> * of the function and not at the top of its caller.
>> @@ -181,6 +179,7 @@ anv_shader_compile_to_nir(struct anv_pipeline *pipeline,
>> NIR_PASS_V(nir, nir_lower_constant_initializers, nir_var_local);
>> NIR_PASS_V(nir, nir_lower_returns);
>> NIR_PASS_V(nir, nir_inline_functions);
>> + NIR_PASS_V(nir, nir_copy_prop);
>>
>> /* Pick off the single entrypoint that we want */
>> foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
>> @@ -190,6 +189,8 @@ anv_shader_compile_to_nir(struct anv_pipeline *pipeline,
>> assert(exec_list_length(&nir->functions) == 1);
>> entry_point->name = ralloc_strdup(entry_point, "main");
>>
>> + NIR_PASS_V(nir, nir_lower_deref_instrs, ~0);
>> +
>> /* Now that we've deleted all but the main function, we can go ahead and
>> * lower the rest of the constant initializers. We do this here so that
>> * nir_remove_dead_variables and split_per_member_structs below see the
>> --
>> 2.5.0.400.gff86faf
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list