[Mesa-dev] [PATCH 4/4] mesa: Reduce memory usage for reg alloc with many graph nodes (part 2).

Kenneth Graunke kenneth at whitecape.org
Sun Mar 3 00:47:19 PST 2013


On 02/19/2013 06:03 PM, Eric Anholt wrote:
> After the previous fix that almost removes an allocation of 4*n^2
> bytes, we can use a bitset to reduce another allocation from n^2 bytes
> to n^2/8 bytes.
>
> Between the previous commit and this one, the peak heap size for an
> oglconform ARB_fragment_program max instructions test on i965 goes from
> 4GB to 255MB.
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=55825
> ---
>   src/mesa/program/register_allocate.c |   12 ++++++++----
>   1 file changed, 8 insertions(+), 4 deletions(-)
>
> diff --git a/src/mesa/program/register_allocate.c b/src/mesa/program/register_allocate.c
> index 5862c78..a9064c3 100644
> --- a/src/mesa/program/register_allocate.c
> +++ b/src/mesa/program/register_allocate.c
> @@ -75,6 +75,7 @@
>   #include "main/imports.h"
>   #include "main/macros.h"
>   #include "main/mtypes.h"
> +#include "main/bitset.h"
>   #include "register_allocate.h"
>
>   #define NO_REG ~0
> @@ -118,7 +119,7 @@ struct ra_node {
>       * List of which nodes this node interferes with.  This should be
>       * symmetric with the other node.
>       */
> -   GLboolean *adjacency;
> +   BITSET_WORD *adjacency;
>      unsigned int *adjacency_list;
>      unsigned int adjacency_list_size;
>      unsigned int adjacency_count;
> @@ -307,7 +308,7 @@ ra_set_finalize(struct ra_regs *regs, unsigned int **q_values)
>   static void
>   ra_add_node_adjacency(struct ra_graph *g, unsigned int n1, unsigned int n2)
>   {
> -   g->nodes[n1].adjacency[n2] = GL_TRUE;
> +   BITSET_SET(g->nodes[n1].adjacency, n2);
>
>      if (g->nodes[n1].adjacency_count >=
>          g->nodes[n1].adjacency_list_size) {
> @@ -335,11 +336,14 @@ ra_alloc_interference_graph(struct ra_regs *regs, unsigned int count)
>      g->stack = rzalloc_array(g, unsigned int, count);
>
>      for (i = 0; i < count; i++) {
> -      g->nodes[i].adjacency = rzalloc_array(g, GLboolean, count);
> +      int bitset_count = ALIGN(count, BITSET_WORDBITS) / BITSET_WORDBITS;
> +      g->nodes[i].adjacency = rzalloc_array(g, BITSET_WORD, bitset_count);
> +
>         g->nodes[i].adjacency_list_size = 4;
>         g->nodes[i].adjacency_list =
>            ralloc_array(g, unsigned int, g->nodes[i].adjacency_list_size);
>         g->nodes[i].adjacency_count = 0;
> +
>         ra_add_node_adjacency(g, i, i);
>         g->nodes[i].reg = NO_REG;
>      }
> @@ -358,7 +362,7 @@ void
>   ra_add_node_interference(struct ra_graph *g,
>   			 unsigned int n1, unsigned int n2)
>   {
> -   if (!g->nodes[n1].adjacency[n2]) {
> +   if (!BITSET_TEST(g->nodes[n1].adjacency, n2)) {
>         ra_add_node_adjacency(g, n1, n2);
>         ra_add_node_adjacency(g, n2, n1);
>      }

We could also make a C++ bitset using operator[] to provide an identical 
interface that still saves the memory.

But this seems fine too.

For patches 2-4:
Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>


More information about the mesa-dev mailing list