[Mesa-dev] [PATCH 1/6] translate_generic: use memcpy if possible
Keith Whitwell
keithw at vmware.com
Fri Aug 13 02:42:03 PDT 2010
Luca,
In this change you've got an int value (copy_size) which has some
special meaning when negative -- can you add comments explaining what
the meaning of a negative size is? Is there a way to use some more
explicit flag value to indicate this condition?
Keith
On Thu, 2010-08-12 at 10:08 -0700, Luca Barbieri wrote:
> When used in GPU drivers, translate can be used to simultaneously
> perform a gather operation, and convert away from unsupported formats.
>
> In this use case, input and output formats will often be identical: clearly
> it would make sense to use a memcpy in this case.
>
> Instead, translate will insist to convert to and from 32-bit floating point
> numbers.
>
> This is not only extremely expensive, but it also loses precision for
> 32/64-bit integers and 64-bit floating point numbers.
>
> This patch changes translate_generic to just use memcpy if the formats are
> identical, non-blocked, and with an integral number of bytes per pixel (note
> that all sensible vertex formats are like this).
> ---
> .../auxiliary/translate/translate_generic.c | 93 +++++++++++++------
> 1 files changed, 63 insertions(+), 30 deletions(-)
>
> diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c
> index 42cfd76..57a42b7 100644
> --- a/src/gallium/auxiliary/translate/translate_generic.c
> +++ b/src/gallium/auxiliary/translate/translate_generic.c
> @@ -63,6 +63,7 @@ struct translate_generic {
> const uint8_t *input_ptr;
> unsigned input_stride;
> unsigned max_index;
> + int copy_size;
>
> } attrib[PIPE_MAX_ATTRIBS];
>
> @@ -380,9 +381,10 @@ static void PIPE_CDECL generic_run_elts( struct translate *translate,
> float data[4];
> char *dst = vert + tg->attrib[attr].output_offset;
>
> - if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) {
> + if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) {
> const uint8_t *src;
> unsigned index;
> + int copy_size;
>
> if (tg->attrib[attr].instance_divisor) {
> index = instance_id / tg->attrib[attr].instance_divisor;
> @@ -396,27 +398,34 @@ static void PIPE_CDECL generic_run_elts( struct translate *translate,
> src = tg->attrib[attr].input_ptr +
> tg->attrib[attr].input_stride * index;
>
> - tg->attrib[attr].fetch( data, src, 0, 0 );
> -
> - if (0)
> - debug_printf("Fetch elt attr %d from %p stride %d div %u max %u index %d: "
> - " %f, %f, %f, %f \n",
> - attr,
> - tg->attrib[attr].input_ptr,
> - tg->attrib[attr].input_stride,
> - tg->attrib[attr].instance_divisor,
> - tg->attrib[attr].max_index,
> - index,
> - data[0], data[1],data[2], data[3]);
> + copy_size = tg->attrib[attr].copy_size;
> + if(likely(copy_size >= 0))
> + memcpy(dst, src, tg->attrib[attr].copy_size);
> + else
> + {
> + tg->attrib[attr].fetch( data, src, 0, 0 );
> +
> + if (0)
> + debug_printf("Fetch elt attr %d from %p stride %d div %u max %u index %d: "
> + " %f, %f, %f, %f \n",
> + attr,
> + tg->attrib[attr].input_ptr,
> + tg->attrib[attr].input_stride,
> + tg->attrib[attr].instance_divisor,
> + tg->attrib[attr].max_index,
> + index,
> + data[0], data[1],data[2], data[3]);
> + tg->attrib[attr].emit( data, dst );
> + }
> } else {
> - data[0] = (float)instance_id;
> + if(likely(tg->attrib[attr].copy_size >= 0))
> + memcpy(data, &instance_id, 4);
> + else
> + {
> + data[0] = (float)instance_id;
> + tg->attrib[attr].emit( data, dst );
> + }
> }
> -
> - if (0)
> - debug_printf("vert %d/%d attr %d: %f %f %f %f\n",
> - i, elt, attr, data[0], data[1], data[2], data[3]);
> -
> - tg->attrib[attr].emit( data, dst );
> }
> vert += tg->translate.key.output_stride;
> }
> @@ -448,6 +457,7 @@ static void PIPE_CDECL generic_run( struct translate *translate,
> if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) {
> const uint8_t *src;
> unsigned index;
> + int copy_size;
>
> if (tg->attrib[attr].instance_divisor) {
> index = instance_id / tg->attrib[attr].instance_divisor;
> @@ -462,25 +472,33 @@ static void PIPE_CDECL generic_run( struct translate *translate,
> src = tg->attrib[attr].input_ptr +
> tg->attrib[attr].input_stride * index;
>
> - tg->attrib[attr].fetch( data, src, 0, 0 );
> + copy_size = tg->attrib[attr].copy_size;
> + if(likely(copy_size >= 0))
> + memcpy(dst, src, tg->attrib[attr].copy_size);
> + else
> + {
> + tg->attrib[attr].fetch( data, src, 0, 0 );
>
> - if (0)
> - debug_printf("Fetch linear attr %d from %p stride %d index %d: "
> + if (0)
> + debug_printf("Fetch linear attr %d from %p stride %d index %d: "
> " %f, %f, %f, %f \n",
> attr,
> tg->attrib[attr].input_ptr,
> tg->attrib[attr].input_stride,
> index,
> data[0], data[1],data[2], data[3]);
> +
> + tg->attrib[attr].emit( data, dst );
> + }
> } else {
> - data[0] = (float)instance_id;
> + if(likely(tg->attrib[attr].copy_size >= 0))
> + memcpy(data, &instance_id, 4);
> + else
> + {
> + data[0] = (float)instance_id;
> + tg->attrib[attr].emit( data, dst );
> + }
> }
> -
> - if (0)
> - debug_printf("vert %d attr %d: %f %f %f %f\n",
> - i, attr, data[0], data[1], data[2], data[3]);
> -
> - tg->attrib[attr].emit( data, dst );
> }
>
> vert += tg->translate.key.output_stride;
> @@ -547,6 +565,21 @@ struct translate *translate_generic_create( const struct translate_key *key )
> tg->attrib[i].emit = get_emit_func(key->element[i].output_format);
> tg->attrib[i].output_offset = key->element[i].output_offset;
>
> + tg->attrib[i].copy_size = -1;
> + if (tg->attrib[i].type == TRANSLATE_ELEMENT_INSTANCE_ID)
> + {
> + if(key->element[i].output_format == PIPE_FORMAT_R32_USCALED
> + || key->element[i].output_format == PIPE_FORMAT_R32_SSCALED)
> + tg->attrib[i].copy_size = 4;
> + }
> + else
> + {
> + if(key->element[i].input_format == key->element[i].output_format
> + && format_desc->block.width == 1
> + && format_desc->block.height == 1
> + && !(format_desc->block.bits & 7))
> + tg->attrib[i].copy_size = format_desc->block.bits >> 3;
> + }
> }
>
> tg->nr_attrib = key->nr_elements;
More information about the mesa-dev
mailing list