[Mesa-dev] [PATCH 1/6] translate_generic: use memcpy if possible
Luca Barbieri
luca at luca-barbieri.com
Thu Aug 12 10:08:59 PDT 2010
When used in GPU drivers, translate can be used to simultaneously
perform a gather operation, and convert away from unsupported formats.
In this use case, input and output formats will often be identical: clearly
it would make sense to use a memcpy in this case.
Instead, translate will insist to convert to and from 32-bit floating point
numbers.
This is not only extremely expensive, but it also loses precision for
32/64-bit integers and 64-bit floating point numbers.
This patch changes translate_generic to just use memcpy if the formats are
identical, non-blocked, and with an integral number of bytes per pixel (note
that all sensible vertex formats are like this).
---
.../auxiliary/translate/translate_generic.c | 93 +++++++++++++------
1 files changed, 63 insertions(+), 30 deletions(-)
diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c
index 42cfd76..57a42b7 100644
--- a/src/gallium/auxiliary/translate/translate_generic.c
+++ b/src/gallium/auxiliary/translate/translate_generic.c
@@ -63,6 +63,7 @@ struct translate_generic {
const uint8_t *input_ptr;
unsigned input_stride;
unsigned max_index;
+ int copy_size;
} attrib[PIPE_MAX_ATTRIBS];
@@ -380,9 +381,10 @@ static void PIPE_CDECL generic_run_elts( struct translate *translate,
float data[4];
char *dst = vert + tg->attrib[attr].output_offset;
- if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) {
+ if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) {
const uint8_t *src;
unsigned index;
+ int copy_size;
if (tg->attrib[attr].instance_divisor) {
index = instance_id / tg->attrib[attr].instance_divisor;
@@ -396,27 +398,34 @@ static void PIPE_CDECL generic_run_elts( struct translate *translate,
src = tg->attrib[attr].input_ptr +
tg->attrib[attr].input_stride * index;
- tg->attrib[attr].fetch( data, src, 0, 0 );
-
- if (0)
- debug_printf("Fetch elt attr %d from %p stride %d div %u max %u index %d: "
- " %f, %f, %f, %f \n",
- attr,
- tg->attrib[attr].input_ptr,
- tg->attrib[attr].input_stride,
- tg->attrib[attr].instance_divisor,
- tg->attrib[attr].max_index,
- index,
- data[0], data[1],data[2], data[3]);
+ copy_size = tg->attrib[attr].copy_size;
+ if(likely(copy_size >= 0))
+ memcpy(dst, src, tg->attrib[attr].copy_size);
+ else
+ {
+ tg->attrib[attr].fetch( data, src, 0, 0 );
+
+ if (0)
+ debug_printf("Fetch elt attr %d from %p stride %d div %u max %u index %d: "
+ " %f, %f, %f, %f \n",
+ attr,
+ tg->attrib[attr].input_ptr,
+ tg->attrib[attr].input_stride,
+ tg->attrib[attr].instance_divisor,
+ tg->attrib[attr].max_index,
+ index,
+ data[0], data[1],data[2], data[3]);
+ tg->attrib[attr].emit( data, dst );
+ }
} else {
- data[0] = (float)instance_id;
+ if(likely(tg->attrib[attr].copy_size >= 0))
+ memcpy(data, &instance_id, 4);
+ else
+ {
+ data[0] = (float)instance_id;
+ tg->attrib[attr].emit( data, dst );
+ }
}
-
- if (0)
- debug_printf("vert %d/%d attr %d: %f %f %f %f\n",
- i, elt, attr, data[0], data[1], data[2], data[3]);
-
- tg->attrib[attr].emit( data, dst );
}
vert += tg->translate.key.output_stride;
}
@@ -448,6 +457,7 @@ static void PIPE_CDECL generic_run( struct translate *translate,
if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) {
const uint8_t *src;
unsigned index;
+ int copy_size;
if (tg->attrib[attr].instance_divisor) {
index = instance_id / tg->attrib[attr].instance_divisor;
@@ -462,25 +472,33 @@ static void PIPE_CDECL generic_run( struct translate *translate,
src = tg->attrib[attr].input_ptr +
tg->attrib[attr].input_stride * index;
- tg->attrib[attr].fetch( data, src, 0, 0 );
+ copy_size = tg->attrib[attr].copy_size;
+ if(likely(copy_size >= 0))
+ memcpy(dst, src, tg->attrib[attr].copy_size);
+ else
+ {
+ tg->attrib[attr].fetch( data, src, 0, 0 );
- if (0)
- debug_printf("Fetch linear attr %d from %p stride %d index %d: "
+ if (0)
+ debug_printf("Fetch linear attr %d from %p stride %d index %d: "
" %f, %f, %f, %f \n",
attr,
tg->attrib[attr].input_ptr,
tg->attrib[attr].input_stride,
index,
data[0], data[1],data[2], data[3]);
+
+ tg->attrib[attr].emit( data, dst );
+ }
} else {
- data[0] = (float)instance_id;
+ if(likely(tg->attrib[attr].copy_size >= 0))
+ memcpy(data, &instance_id, 4);
+ else
+ {
+ data[0] = (float)instance_id;
+ tg->attrib[attr].emit( data, dst );
+ }
}
-
- if (0)
- debug_printf("vert %d attr %d: %f %f %f %f\n",
- i, attr, data[0], data[1], data[2], data[3]);
-
- tg->attrib[attr].emit( data, dst );
}
vert += tg->translate.key.output_stride;
@@ -547,6 +565,21 @@ struct translate *translate_generic_create( const struct translate_key *key )
tg->attrib[i].emit = get_emit_func(key->element[i].output_format);
tg->attrib[i].output_offset = key->element[i].output_offset;
+ tg->attrib[i].copy_size = -1;
+ if (tg->attrib[i].type == TRANSLATE_ELEMENT_INSTANCE_ID)
+ {
+ if(key->element[i].output_format == PIPE_FORMAT_R32_USCALED
+ || key->element[i].output_format == PIPE_FORMAT_R32_SSCALED)
+ tg->attrib[i].copy_size = 4;
+ }
+ else
+ {
+ if(key->element[i].input_format == key->element[i].output_format
+ && format_desc->block.width == 1
+ && format_desc->block.height == 1
+ && !(format_desc->block.bits & 7))
+ tg->attrib[i].copy_size = format_desc->block.bits >> 3;
+ }
}
tg->nr_attrib = key->nr_elements;
--
1.7.0.4
More information about the mesa-dev
mailing list