[Pixman] [cairo] Cairo/pixman behavior with large translations
Siarhei Siamashka
siarhei.siamashka at gmail.com
Thu Dec 6 06:09:40 PST 2012
On Thu, 06 Dec 2012 14:02:11 +0100
sandmann at cs.au.dk (Søren Sandmann) wrote:
> Søren Sandmann <sandmann at cs.au.dk> writes:
>
> > Intermediate results from matrix and vector multiplications fit in 64
> > bits, and
>
> That is, as long as the final result doesn't overflow the 31.16 type,
> the intermediate values fit in 64 bits.
We are fine if we don't mind dealing with two intermediate 64-bit
variables for doing multiplication and accumulation ('lo' and 'hi'
variables in the example below):
/*******************************************************************/
#include <stdint.h>
#include <stdlib.h>
#include <assert.h>
#include <math.h>
/* a reference implementation (needs 128-bit int types) */
static uint64_t
ref_mul_16_16_by_31_16_to_48_16 (int32_t fix_16_16, int64_t fix_31_16)
{
return ((__int128_t)fix_16_16 * fix_31_16 + 0x8000) >> 16;
}
/* this is a reference floating point implementation for comparison */
static __float128
float_mul_16_16_by_31_16_to_48_16 (int32_t fix_16_16, int64_t fix_31_16)
{
__float128 a = (__float128)fix_16_16 / 65536.;
__float128 b = (__float128)fix_31_16 / 65536.;
return a * b;
}
/* this implementation does not need 128-bit data types */
static uint64_t
mul_16_16_by_31_16_to_48_16 (int32_t fix_16_16, int64_t fix_31_16)
{
int64_t hi = (int64_t)fix_16_16 * (fix_31_16 >> 16);
int64_t lo = (int64_t)fix_16_16 * (fix_31_16 & 0xFFFF);
return hi + ((lo + 0x8000) >> 16);
}
void fillrand (void *p, int size)
{
uint8_t *bytep = (uint8_t *)p;
while (size--)
*bytep++ = rand() & 0xFF;
}
int main (void)
{
int i;
for (i = 0; i < 100000000; i++)
{
int32_t fix_16_16;
int64_t fix_31_16;
int64_t ref_res, res;
__float128 float_res; /* 64-bit double data type is not enough */
fillrand (&fix_16_16, sizeof(fix_16_16));
fillrand (&fix_31_16, sizeof(fix_31_16));
/* in fact this will be 33.16 random number after this shift,
* but in the matrix by vector multiplication code we want to
* be able to add together 3 multiplication results without
* overflow, hence the fixed 31.16 data type is interesting
* for the extra 2 bits headroom */
fix_31_16 >>= 15;
/* test the correctness of mul_16_16_by_31_16_to_48_16 by comparing
* it with the other implementations */
ref_res = ref_mul_16_16_by_31_16_to_48_16 (fix_16_16, fix_31_16);
float_res = float_mul_16_16_by_31_16_to_48_16 (fix_16_16, fix_31_16);
res = mul_16_16_by_31_16_to_48_16 (fix_16_16, fix_31_16);
assert(ref_res == res);
assert(fabs(float_res * 65536. - (__float128)res) < 0.51);
}
return 0;
}
/*******************************************************************/
--
Best regards,
Siarhei Siamashka
More information about the Pixman
mailing list