[Mesa-dev] [PATCH V2] mesa: add SSE optimisation for glDrawElements
Timothy Arceri
t_arceri at yahoo.com.au
Fri Oct 24 05:56:04 PDT 2014
On Fri, 2014-10-24 at 23:47 +1100, Timothy Arceri wrote:
> +#ifdef __SSE4_1__
> +#include "main/glheader.h"
Just noticed this extra header after sending out the patch. Fixed now.
> +#include "main/sse_minmax.h"
> +#include <smmintrin.h>
> +
> +void
> +_mesa_uint_array_min_max(const unsigned *ui_indices, unsigned *min_index,
> + unsigned *max_index, const unsigned count)
> +{
> + unsigned i = 0;
> + unsigned max_ui = 0;
> + unsigned min_ui = ~0U;
> +
> + if (count >= 4) {
> + unsigned max_arr[4] __attribute__ ((aligned (16)));
> + unsigned min_arr[4] __attribute__ ((aligned (16)));
> + unsigned vec_count;
> + __m128i max_ui4 = _mm_setzero_si128();
> + __m128i min_ui4 = _mm_set1_epi32(~0U);
> + __m128i ui_indices4;
> + __m128i *ui_indices_ptr;
> +
> + vec_count = count & ~0x3;
> + ui_indices_ptr = (__m128i*)ui_indices;
> + for (i = 0; i < vec_count / 4; i++) {
> + ui_indices4 = _mm_loadu_si128(&ui_indices_ptr[i]);
> + max_ui4 = _mm_max_epu32(ui_indices4, max_ui4);
> + min_ui4 = _mm_min_epu32(ui_indices4, min_ui4);
> + }
> +
> + _mm_store_si128((__m128i*)max_arr, max_ui4);
> + _mm_store_si128((__m128i*)min_arr, min_ui4);
> +
> + for (i = 0; i < 4; i++) {
> + if (max_arr[i] > max_ui)
> + max_ui = max_arr[i];
> + if (min_arr[i] < min_ui)
> + min_ui = min_arr[i];
> + }
> + i = vec_count;
> + }
> +
> + for (; i < count; i++) {
> + if (ui_indices[i] > max_ui)
> + max_ui = ui_indices[i];
> + if (ui_indices[i] < min_ui)
> + min_ui = ui_indices[i];
> + }
> +
> + *min_index = min_ui;
> + *max_index = max_ui;
> +}
> +
> +#endif
More information about the mesa-dev
mailing list