[Mesa-dev] [PATCH V2] mesa: add SSE optimisation for glDrawElements

Timothy Arceri t_arceri at yahoo.com.au
Fri Oct 24 05:56:04 PDT 2014


On Fri, 2014-10-24 at 23:47 +1100, Timothy Arceri wrote:

> +#ifdef __SSE4_1__
> +#include "main/glheader.h"

Just noticed this extra header after sending out the patch. Fixed now.

> +#include "main/sse_minmax.h"
> +#include <smmintrin.h>
> +
> +void
> +_mesa_uint_array_min_max(const unsigned *ui_indices, unsigned *min_index,
> +                         unsigned *max_index, const unsigned count)
> +{
> +   unsigned i = 0;
> +   unsigned max_ui = 0;
> +   unsigned min_ui = ~0U;
> +
> +   if (count >= 4) {
> +      unsigned max_arr[4] __attribute__ ((aligned (16)));
> +      unsigned min_arr[4] __attribute__ ((aligned (16)));
> +      unsigned vec_count;
> +      __m128i max_ui4 = _mm_setzero_si128();
> +      __m128i min_ui4 = _mm_set1_epi32(~0U);
> +      __m128i ui_indices4;
> +      __m128i *ui_indices_ptr;
> +
> +      vec_count = count & ~0x3;
> +      ui_indices_ptr = (__m128i*)ui_indices;
> +      for (i = 0; i < vec_count / 4; i++) {
> +         ui_indices4 = _mm_loadu_si128(&ui_indices_ptr[i]);
> +         max_ui4 = _mm_max_epu32(ui_indices4, max_ui4);
> +         min_ui4 = _mm_min_epu32(ui_indices4, min_ui4);
> +      }
> +
> +      _mm_store_si128((__m128i*)max_arr, max_ui4);
> +      _mm_store_si128((__m128i*)min_arr, min_ui4);
> +
> +      for (i = 0; i < 4; i++) {
> +         if (max_arr[i] > max_ui)
> +            max_ui = max_arr[i];
> +         if (min_arr[i] < min_ui)
> +            min_ui = min_arr[i];
> +      }
> +      i = vec_count;
> +   }
> +
> +   for (; i < count; i++) {
> +      if (ui_indices[i] > max_ui)
> +         max_ui = ui_indices[i];
> +      if (ui_indices[i] < min_ui)
> +         min_ui = ui_indices[i];
> +   }
> +
> +   *min_index = min_ui;
> +   *max_index = max_ui;
> +}
> +
> +#endif





More information about the mesa-dev mailing list