[Mesa-dev] [PATCH] gallivm: Compile flag to debug TGSI execution through printfs.
Roland Scheidegger
sroland at vmware.com
Wed Nov 13 10:27:39 PST 2013
Looks great to me. Just some minor nitpicks.
On 11/13/2013 05:37 PM, jfonseca at vmware.com wrote:
> From: José Fonseca <jfonseca at vmware.com>
>
> It is similar to tgsi_exec.c's DEBUG_EXECUTION compile flag.
>
> I had prototyped this for a while while debugging an issue, but finally
> cleaned this up and added a few more bells and whistles.
>
> Here is a sample output.
>
> CONST[0]:
> X: 0.006250 0.006250 0.006250 0.006250
> Y: -0.007143 -0.007143 -0.007143 -0.007143
> Z: -1.000000 -1.000000 -1.000000 -1.000000
> W: 1.000000 1.000000 1.000000 1.000000
> IN[0]:
> X: 143.500000 175.500000 175.500000 143.500000
> Y: 123.500000 123.500000 155.500000 155.500000
> Z: 0.000000 0.000000 0.000000 0.000000
> W: 1.000000 1.000000 1.000000 1.000000
>> 1: RCP TEMP[0].w, IN[0].wwww
> TEMP[0].w = 1 1 1 1
>> 2: MAD TEMP[0].xy, IN[0], CONST[0], CONST[0].zwzw
> TEMP[0].x = -0.103124976 0.0968750715 0.0968750715 -0.103124976
> TEMP[0].y = 0.117857158 0.117857158 -0.110714316 -0.110714316
>> 3: MUL OUT[0].xy, TEMP[0], TEMP[0].wwww
> OUT[0].x = -0.103124976 0.0968750715 0.0968750715 -0.103124976
> OUT[0].y = 0.117857158 0.117857158 -0.110714316 -0.110714316
>> 4: MUL OUT[0].z, IN[0].zzzz, TEMP[0].wwww
> OUT[0].z = 0 0 0 0
>> 5: MOV OUT[0].w, TEMP[0]
> OUT[0].w = 1 1 1 1
> ---
> src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 158 +++++++++++++++++++-----
> src/gallium/auxiliary/tgsi/tgsi_dump.c | 23 ++++
> src/gallium/auxiliary/tgsi/tgsi_dump.h | 7 ++
> 3 files changed, 159 insertions(+), 29 deletions(-)
>
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> index 5f81066..917826d 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
> @@ -47,6 +47,7 @@
> #include "tgsi/tgsi_parse.h"
> #include "tgsi/tgsi_util.h"
> #include "tgsi/tgsi_scan.h"
> +#include "tgsi/tgsi_strings.h"
> #include "lp_bld_tgsi_action.h"
> #include "lp_bld_type.h"
> #include "lp_bld_const.h"
> @@ -67,6 +68,17 @@
>
> #define DUMP_GS_EMITS 0
>
> +/*
> + * If non-zero, the generated LLVM IR will print intermediate results on every TGSI
> + * instruction.
> + *
> + * TODO:
> + * - take execution masks in consideration
> + * - debug control-flow instructions
> + */
> +#define DEBUG_EXECUTION 0
> +
> +
> static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
> {
> LLVMTypeRef int_type = LLVMInt32TypeInContext(bld->gallivm->context);
> @@ -664,6 +676,43 @@ static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
> }
>
>
> +static LLVMValueRef
> +get_file_ptr(struct lp_build_tgsi_soa_context *bld,
> + unsigned file,
> + unsigned index,
> + unsigned chan)
> +{
> + LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
> + LLVMValueRef (*array_of_vars)[TGSI_NUM_CHANNELS];
> + LLVMValueRef var_of_array;
> +
> + switch (file) {
> + case TGSI_FILE_TEMPORARY:
> + array_of_vars = bld->temps;
> + var_of_array = bld->temps_array;
> + break;
> + case TGSI_FILE_OUTPUT:
> + array_of_vars = bld->outputs;
> + var_of_array = bld->outputs_array;
> + break;
> + default:
> + assert(0);
> + return NULL;
> + }
> +
> + assert(chan < 4);
> +
> + if (bld->indirect_files & (1 << file)) {
> + LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
> + return LLVMBuildGEP(builder, var_of_array, &lindex, 1, "");
> + }
> + else {
> + assert(index <= bld->bld_base.info->file_max[file]);
> + return array_of_vars[index][chan];
> + }
> +}
> +
> +
> /**
> * Return pointer to a temporary register channel (src or dest).
> * Note that indirect addressing cannot be handled here.
> @@ -675,15 +724,7 @@ lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld,
> unsigned index,
> unsigned chan)
> {
> - LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
> - assert(chan < 4);
> - if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
> - LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
> - return LLVMBuildGEP(builder, bld->temps_array, &lindex, 1, "");
> - }
> - else {
> - return bld->temps[index][chan];
> - }
> + return get_file_ptr(bld, TGSI_FILE_TEMPORARY, index, chan);
> }
Doesn't look to me like it's worth keeping those lp_get_output_ptr /
lp_get_tmp_output_ptr helpers around really, as you could just call
get_file_ptr directly as easily. If you think they are though that's fine.
>
> /**
> @@ -697,16 +738,7 @@ lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld,
> unsigned index,
> unsigned chan)
> {
> - LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
> - assert(chan < 4);
> - if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
> - LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm,
> - index * 4 + chan);
> - return LLVMBuildGEP(builder, bld->outputs_array, &lindex, 1, "");
> - }
> - else {
> - return bld->outputs[index][chan];
> - }
> + return get_file_ptr(bld, TGSI_FILE_OUTPUT, index, chan);
> }
>
> /*
> @@ -1415,6 +1447,21 @@ emit_store_chan(
> bld_base->info->file_max[reg->Register.File]);
> }
>
> + if (DEBUG_EXECUTION) {
> + /*
> + * Dump the value.
> + */
> +
> + char buf[512];
> +
> + util_snprintf(buf, sizeof buf, " %s[%u].%c = ",
> + tgsi_file_name(reg->Register.File),
> + reg->Register.Index, "xyzw"[chan_index]);
> +
> + lp_build_print_value(gallivm, buf, value);
> + }
> +
> +
> switch( reg->Register.File ) {
> case TGSI_FILE_OUTPUT:
> /* Outputs are always stored as floats */
> @@ -1505,6 +1552,18 @@ emit_store(
> unsigned chan_index;
> struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
>
> + if (DEBUG_EXECUTION) {
> + /*
> + * Dump the TGSI instruction.
> + */
> +
> + char buf[512];
> + buf[0] = '>';
> + buf[1] = ' ';
> + tgsi_dump_instruction_str(inst, bld_base->pc, &buf[2], sizeof buf - 2);
> + lp_build_printf(bld_base->base.gallivm, buf);
> + }
> +
> if(info->num_dst) {
> LLVMValueRef pred[TGSI_NUM_CHANNELS];
>
> @@ -2253,28 +2312,61 @@ emit_kill(struct lp_build_tgsi_soa_context *bld,
> * to stdout.
> */
> static void
> -emit_dump_temps(struct lp_build_tgsi_soa_context *bld)
> +emit_dump_file(struct lp_build_tgsi_soa_context *bld,
> + unsigned file)
> {
> struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
> LLVMBuilderRef builder = gallivm->builder;
> - LLVMValueRef temp_ptr;
> + LLVMValueRef reg_ptr;
> LLVMValueRef i0 = lp_build_const_int32(gallivm, 0);
> LLVMValueRef i1 = lp_build_const_int32(gallivm, 1);
> LLVMValueRef i2 = lp_build_const_int32(gallivm, 2);
> LLVMValueRef i3 = lp_build_const_int32(gallivm, 3);
> - int index;
> - int n = bld->bld_base.info->file_max[TGSI_FILE_TEMPORARY];
> + unsigned index;
> + unsigned n = bld->bld_base.info->file_max[file];
> +
> + n = MIN2(n, 16);
Is this just an artificial limitation so only the first 17 regs get dumped?
>
> - for (index = 0; index < n; index++) {
> + for (index = 0; index <= n; index++) {
> LLVMValueRef idx = lp_build_const_int32(gallivm, index);
> LLVMValueRef v[4][4], res;
> int chan;
>
> - lp_build_printf(gallivm, "TEMP[%d]:\n", idx);
> + lp_build_printf(gallivm, "%s[%d]:\n",
> + lp_build_const_string(gallivm, tgsi_file_name(file)),
> + idx);
>
> for (chan = 0; chan < 4; chan++) {
> - temp_ptr = lp_get_temp_ptr_soa(bld, index, chan);
> - res = LLVMBuildLoad(builder, temp_ptr, "");
> + if (file == TGSI_FILE_CONSTANT) {
> + struct tgsi_full_src_register reg;
> + memset(®, 0, sizeof reg);
> + reg.Register.File = file;
> + reg.Register.Index = index;
> + reg.Register.SwizzleX = 0;
> + reg.Register.SwizzleY = 1;
> + reg.Register.SwizzleZ = 2;
> + reg.Register.SwizzleW = 3;
> +
> + res = bld->bld_base.emit_fetch_funcs[file](&bld->bld_base, ®, TGSI_TYPE_FLOAT, chan);
> + if (!res) {
> + continue;
> + }
> + } else if (file == TGSI_FILE_INPUT) {
> + res = bld->inputs[index][chan];
> + if (!res) {
> + continue;
> + }
> + } else if (file == TGSI_FILE_TEMPORARY) {
> + reg_ptr = lp_get_temp_ptr_soa(bld, index, chan);
> + assert(reg_ptr);
> + res = LLVMBuildLoad(builder, reg_ptr, "");
> + } else if (file == TGSI_FILE_OUTPUT) {
> + reg_ptr = lp_get_output_ptr(bld, index, chan);
> + res = LLVMBuildLoad(builder, reg_ptr, "");
> + assert(reg_ptr);
The assert here is after the crash.
> + } else {
> + assert(0);
> + }
> v[chan][0] = LLVMBuildExtractElement(builder, res, i0, "");
> v[chan][1] = LLVMBuildExtractElement(builder, res, i1, "");
> v[chan][2] = LLVMBuildExtractElement(builder, res, i2, "");
> @@ -3174,6 +3266,11 @@ static void emit_prologue(struct lp_build_tgsi_context * bld_base)
> LLVMBuildStore(gallivm->builder, uint_bld->zero,
> bld->total_emitted_vertices_vec_ptr);
> }
> +
> + if (DEBUG_EXECUTION) {
> + emit_dump_file(bld, TGSI_FILE_CONSTANT);
> + emit_dump_file(bld, TGSI_FILE_INPUT);
> + }
> }
>
> static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
> @@ -3181,9 +3278,12 @@ static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
> struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
> LLVMBuilderRef builder = bld_base->base.gallivm->builder;
>
> - if (0) {
> + if (DEBUG_EXECUTION) {
> /* for debugging */
> - emit_dump_temps(bld);
> + if (0) {
> + emit_dump_file(bld, TGSI_FILE_TEMPORARY);
> + }
> + emit_dump_file(bld, TGSI_FILE_OUTPUT);
> }
>
> /* If we have indirect addressing in outputs we need to copy our alloca array
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c
> index 7f6a3d8..77bca62 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
> +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
> @@ -721,3 +721,26 @@ tgsi_dump_str(
>
> tgsi_iterate_shader( tokens, &ctx.base.iter );
> }
> +
> +void
> +tgsi_dump_instruction_str(
> + const struct tgsi_full_instruction *inst,
> + uint instno,
> + char *str,
> + size_t size)
> +{
> + struct str_dump_ctx ctx;
> +
> + ctx.base.instno = instno;
> + ctx.base.immno = instno;
> + ctx.base.indent = 0;
> + ctx.base.dump_printf = &str_dump_ctx_printf;
> + ctx.base.indentation = 0;
> +
> + ctx.str = str;
> + ctx.str[0] = 0;
> + ctx.ptr = str;
> + ctx.left = (int)size;
> +
> + iter_instruction( &ctx.base.iter, (struct tgsi_full_instruction *)inst );
> +}
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.h b/src/gallium/auxiliary/tgsi/tgsi_dump.h
> index adaef9d..9820bb1 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_dump.h
> +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.h
> @@ -58,6 +58,13 @@ tgsi_dump_immediate(
> const struct tgsi_full_immediate *imm );
>
> void
> +tgsi_dump_instruction_str(
> + const struct tgsi_full_instruction *inst,
> + uint instno,
> + char *str,
> + size_t size);
> +
> +void
> tgsi_dump_instruction(
> const struct tgsi_full_instruction *inst,
> uint instno );
>
Roland
More information about the mesa-dev
mailing list