[Cogl] [PATCH] Re-design the matrix stack using a graph of ops

Robert Bragg robert at sixbynine.org
Thu Apr 19 03:46:52 PDT 2012


From: Robert Bragg <robert at linux.intel.com>

This re-designs the matrix stack so we now keep track of each separate
operation such as rotating, scaling, translating and multiplying as
immutable, ref-counted nodes in a graph.

Being a "graph" here means that different transformations composed of
a sequence of linked operation nodes may share nodes.

The first node in a matrix-stack is always a LOAD_IDENTITY operation.

As an example consider if an application where to draw three rectangles
A, B and C something like this:

cogl_framebuffer_scale (fb, 2, 2, 2);
cogl_framebuffer_push_matrix(fb);

  cogl_framebuffer_translate (fb, 10, 0, 0);

  cogl_framebuffer_push_matrix(fb);

    cogl_framebuffer_rotate (fb, 45, 0, 0, 1);
    cogl_framebuffer_draw_rectangle (...); /* A */

  cogl_framebuffer_pop_matrix(fb);

  cogl_framebuffer_draw_rectangle (...); /* B */

cogl_framebuffer_pop_matrix(fb);

cogl_framebuffer_push_matrix(fb);
  cogl_framebuffer_set_modelview_matrix (fb, &mv);
  cogl_framebuffer_draw_rectangle (...); /* C */
cogl_framebuffer_pop_matrix(fb);

That would result in a graph of nodes like this:

LOAD_IDENTITY
      |
    SCALE
    /     \
SAVE       LOAD
  |           |
TRANSLATE    RECTANGLE(C)
  |     \
SAVE    RECTANGLE(B)
  |
ROTATE
  |
RECTANGLE(A)

Each push adds a SAVE operation which serves as a marker to rewind too
when a corresponding pop is issued and also each SAVE node may also
store a cached matrix representing the composition of all its ancestor
nodes. This means if we repeatedly need to resolve a real CoglMatrix
for a given node then we don't need to repeat the composition.

Some advantages of this design are:
- A single pointer to any node in the graph can now represent a
  complete, immutable transformation that can be logged for example
  into a journal. Previously we were storing a full CoglMatrix in
  each journal entry which is 16 floats for the matrix itself as well
  as space for flags and another 16 floats for possibly storing a
  cache of the inverse. This means that we significantly reduce
  the size of the journal when drawing lots of primitives and we also
  avoid copying over 128 bytes per entry.
- It becomes much cheaper to check for equality. In cases where some
  (unlikely) false negatives are allowed simply comparing the pointers
  of two matrix stack graph entries is enough. Previously we would use
  memcmp() to compare matrices.
- It becomes easier to do comparisons of transformations. By looking
  for the common ancestry between nodes we can determine the operations
  that differentiate the transforms and use those to gain a high level
  understanding of the differences. For example we use this in the
  journal to be able to efficiently determine when two rectangle
  transforms only differ by some translation so that we can perform
  software clipping.
---
 cogl/cogl-attribute.c               |    5 +-
 cogl/cogl-clip-stack.c              |  296 ++++++----
 cogl/cogl-clip-stack.h              |   37 +-
 cogl/cogl-context-private.h         |   25 +-
 cogl/cogl-context.c                 |   46 +-
 cogl/cogl-debug-options.h           |    5 +
 cogl/cogl-debug.c                   |    3 +-
 cogl/cogl-debug.h                   |    1 +
 cogl/cogl-framebuffer.c             |   97 +++-
 cogl/cogl-journal-private.h         |    5 +-
 cogl/cogl-journal.c                 |  188 ++-----
 cogl/cogl-matrix-private.h          |    7 +
 cogl/cogl-matrix-stack.c            | 1114 +++++++++++++++++++++++++----------
 cogl/cogl-matrix-stack.h            |  212 ++++++--
 cogl/cogl-matrix.c                  |   46 +-
 cogl/cogl-pipeline-opengl-private.h |    1 +
 cogl/cogl-pipeline-opengl.c         |   12 +-
 cogl/cogl-pipeline-private.h        |    6 +-
 cogl/cogl-pipeline-progend-fixed.c  |   20 +-
 cogl/cogl-pipeline-progend-glsl.c   |   71 ++--
 cogl/cogl-pipeline-vertend-fixed.c  |   12 +-
 cogl/cogl-pipeline-vertend-glsl.c   |   10 +-
 cogl/cogl.c                         |    1 +
 tests/micro-perf/test-journal.c     |    4 +-
 24 files changed, 1450 insertions(+), 774 deletions(-)

diff --git a/cogl/cogl-attribute.c b/cogl/cogl-attribute.c
index 3a6244f..3c4cc33 100644
--- a/cogl/cogl-attribute.c
+++ b/cogl/cogl-attribute.c
@@ -661,7 +661,10 @@ _cogl_flush_attributes_state (CoglFramebuffer *framebuffer,
        */
     }
 
-  _cogl_pipeline_flush_gl_state (pipeline, skip_gl_color, n_tex_coord_attribs);
+  _cogl_pipeline_flush_gl_state (pipeline,
+                                 framebuffer,
+                                 skip_gl_color,
+                                 n_tex_coord_attribs);
 
   _cogl_bitmask_clear_all (&ctx->enable_builtin_attributes_tmp);
   _cogl_bitmask_clear_all (&ctx->enable_texcoord_attributes_tmp);
diff --git a/cogl/cogl-clip-stack.c b/cogl/cogl-clip-stack.c
index 1de2960..8a1d9de 100644
--- a/cogl/cogl-clip-stack.c
+++ b/cogl/cogl-clip-stack.c
@@ -46,6 +46,7 @@
 #include "cogl-primitive-private.h"
 #include "cogl1-context.h"
 #include "cogl-offscreen.h"
+#include "cogl-matrix-stack.h"
 
 #ifndef GL_CLIP_PLANE0
 #define GL_CLIP_PLANE0 0x3000
@@ -113,9 +114,10 @@ set_clip_plane (CoglFramebuffer *framebuffer,
   /* Clip planes can only be used when a fixed function backend is in
      use so we know we can directly push this matrix to the builtin
      state */
-  _cogl_matrix_stack_flush_to_gl_builtins (ctx,
-                                           modelview_stack,
+  _cogl_matrix_entry_flush_to_gl_builtins (ctx,
+                                           modelview_stack->last_entry,
                                            COGL_MATRIX_MODELVIEW,
+                                           framebuffer,
                                            FALSE /* don't disable flip */);
 
   planef[0] = 0;
@@ -147,13 +149,12 @@ set_clip_plane (CoglFramebuffer *framebuffer,
 
 static void
 set_clip_planes (CoglFramebuffer *framebuffer,
+                 CoglMatrixEntry *modelview_entry,
                  float x_1,
-		 float y_1,
-		 float x_2,
-		 float y_2)
+                 float y_1,
+                 float x_2,
+                 float y_2)
 {
-  CoglMatrixStack *modelview_stack =
-    _cogl_framebuffer_get_modelview_stack (framebuffer);
   CoglMatrix modelview_matrix;
   CoglMatrixStack *projection_stack =
     _cogl_framebuffer_get_projection_stack (framebuffer);
@@ -167,7 +168,7 @@ set_clip_planes (CoglFramebuffer *framebuffer,
   float vertex_br[4] = { x_2, y_2, 0, 1.0 };
 
   _cogl_matrix_stack_get (projection_stack, &projection_matrix);
-  _cogl_matrix_stack_get (modelview_stack, &modelview_matrix);
+  _cogl_matrix_entry_get (modelview_entry, &modelview_matrix);
 
   cogl_matrix_multiply (&modelview_projection,
                         &projection_matrix,
@@ -207,23 +208,24 @@ set_clip_planes (CoglFramebuffer *framebuffer,
 
 static void
 add_stencil_clip_rectangle (CoglFramebuffer *framebuffer,
+                            CoglMatrixEntry *modelview_entry,
                             float x_1,
                             float y_1,
                             float x_2,
                             float y_2,
                             CoglBool first)
 {
-  CoglMatrixStack *modelview_stack =
-    _cogl_framebuffer_get_modelview_stack (framebuffer);
   CoglMatrixStack *projection_stack =
     _cogl_framebuffer_get_projection_stack (framebuffer);
   CoglContext *ctx = cogl_framebuffer_get_context (framebuffer);
 
-  /* This can be called from the journal code which doesn't flush
-     the matrix stacks between calls so we need to ensure they're
-     flushed now */
-  _cogl_context_set_current_projection (ctx, projection_stack);
-  _cogl_context_set_current_modelview (ctx, modelview_stack);
+  /* NB: This can be called while flushing the journal so we need
+   * to be very conservative with what state we change.
+   */
+
+  _cogl_context_set_current_projection_entry (ctx,
+                                              projection_stack->last_entry);
+  _cogl_context_set_current_modelview_entry (ctx, modelview_entry);
 
   if (first)
     {
@@ -256,21 +258,12 @@ add_stencil_clip_rectangle (CoglFramebuffer *framebuffer,
 	 rectangle are set will be valid */
       GE( ctx, glStencilOp (GL_DECR, GL_DECR, GL_DECR) );
 
-      _cogl_matrix_stack_push (projection_stack);
-      _cogl_matrix_stack_load_identity (projection_stack);
-
-      _cogl_matrix_stack_push (modelview_stack);
-      _cogl_matrix_stack_load_identity (modelview_stack);
-
-      _cogl_context_set_current_projection (ctx, projection_stack);
-      _cogl_context_set_current_modelview (ctx, modelview_stack);
+      _cogl_context_set_current_projection_entry (ctx, &ctx->identity_entry);
+      _cogl_context_set_current_modelview_entry (ctx, &ctx->identity_entry);
 
       _cogl_rectangle_immediate (framebuffer,
                                  ctx->stencil_pipeline,
                                  -1.0, -1.0, 1.0, 1.0);
-
-      _cogl_matrix_stack_pop (modelview_stack);
-      _cogl_matrix_stack_pop (projection_stack);
     }
 
   /* Restore the stencil mode */
@@ -285,6 +278,7 @@ typedef void (*SilhouettePaintCallback) (CoglFramebuffer *framebuffer,
 static void
 add_stencil_clip_silhouette (CoglFramebuffer *framebuffer,
                              SilhouettePaintCallback silhouette_callback,
+                             CoglMatrixEntry *modelview_entry,
                              float bounds_x1,
                              float bounds_y1,
                              float bounds_x2,
@@ -293,19 +287,19 @@ add_stencil_clip_silhouette (CoglFramebuffer *framebuffer,
                              CoglBool need_clear,
                              void *user_data)
 {
-  CoglMatrixStack *modelview_stack =
-    _cogl_framebuffer_get_modelview_stack (framebuffer);
   CoglMatrixStack *projection_stack =
     _cogl_framebuffer_get_projection_stack (framebuffer);
   CoglContext *ctx = cogl_framebuffer_get_context (framebuffer);
 
-  /* This can be called from the clip stack code which doesn't flush
-     the matrix stacks between calls so we need to ensure they're
-     flushed now */
-  _cogl_context_set_current_projection (ctx, projection_stack);
-  _cogl_context_set_current_modelview (ctx, modelview_stack);
+  /* NB: This can be called while flushing the journal so we need
+   * to be very conservative with what state we change.
+   */
+
+  _cogl_context_set_current_projection_entry (ctx,
+                                              projection_stack->last_entry);
+  _cogl_context_set_current_modelview_entry (ctx, modelview_entry);
 
-  _cogl_pipeline_flush_gl_state (ctx->stencil_pipeline, FALSE, 0);
+  _cogl_pipeline_flush_gl_state (ctx->stencil_pipeline, framebuffer, FALSE, 0);
 
   GE( ctx, glEnable (GL_STENCIL_TEST) );
 
@@ -360,19 +354,13 @@ add_stencil_clip_silhouette (CoglFramebuffer *framebuffer,
       /* Decrement all of the bits twice so that only pixels where the
          value is 3 will remain */
 
-      _cogl_matrix_stack_push (projection_stack);
-      _cogl_matrix_stack_load_identity (projection_stack);
-
-      _cogl_matrix_stack_push (modelview_stack);
-      _cogl_matrix_stack_load_identity (modelview_stack);
+      _cogl_context_set_current_projection_entry (ctx, &ctx->identity_entry);
+      _cogl_context_set_current_modelview_entry (ctx, &ctx->identity_entry);
 
       _cogl_rectangle_immediate (framebuffer, ctx->stencil_pipeline,
                                  -1.0, -1.0, 1.0, 1.0);
       _cogl_rectangle_immediate (framebuffer, ctx->stencil_pipeline,
                                  -1.0, -1.0, 1.0, 1.0);
-
-      _cogl_matrix_stack_pop (modelview_stack);
-      _cogl_matrix_stack_pop (projection_stack);
     }
 
   GE (ctx, glStencilMask (~(GLuint) 0));
@@ -400,6 +388,7 @@ paint_path_silhouette (CoglFramebuffer *framebuffer,
 
 static void
 add_stencil_clip_path (CoglFramebuffer *framebuffer,
+                       CoglMatrixEntry *modelview_entry,
                        CoglPath *path,
                        CoglBool merge,
                        CoglBool need_clear)
@@ -407,6 +396,7 @@ add_stencil_clip_path (CoglFramebuffer *framebuffer,
   CoglPathData *data = path->data;
   add_stencil_clip_silhouette (framebuffer,
                                paint_path_silhouette,
+                               modelview_entry,
                                data->path_nodes_min.x,
                                data->path_nodes_min.y,
                                data->path_nodes_max.x,
@@ -431,6 +421,7 @@ paint_primitive_silhouette (CoglFramebuffer *framebuffer,
 
 static void
 add_stencil_clip_primitive (CoglFramebuffer *framebuffer,
+                            CoglMatrixEntry *modelview_entry,
                             CoglPrimitive *primitive,
                             float bounds_x1,
                             float bounds_y1,
@@ -441,6 +432,7 @@ add_stencil_clip_primitive (CoglFramebuffer *framebuffer,
 {
   add_stencil_clip_silhouette (framebuffer,
                                paint_primitive_silhouette,
+                               modelview_entry,
                                bounds_x1,
                                bounds_y1,
                                bounds_x2,
@@ -499,32 +491,49 @@ _cogl_clip_stack_push_entry (CoglClipStack *clip_stack,
   return entry;
 }
 
+static void
+get_transformed_corners (float x_1,
+                         float y_1,
+                         float x_2,
+                         float y_2,
+                         CoglMatrix *modelview,
+                         CoglMatrix *projection,
+                         const float *viewport,
+                         float *transformed_corners)
+{
+  int i;
+
+  transformed_corners[0] = x_1;
+  transformed_corners[1] = y_1;
+  transformed_corners[2] = x_2;
+  transformed_corners[3] = y_1;
+  transformed_corners[4] = x_2;
+  transformed_corners[5] = y_2;
+  transformed_corners[6] = x_1;
+  transformed_corners[7] = y_2;
+
+
+  /* Project the coordinates to window space coordinates */
+  for (i = 0; i < 4; i++)
+    {
+      float *v = transformed_corners + i * 2;
+      _cogl_transform_point (modelview, projection, viewport, v, v + 1);
+    }
+}
+
 /* Sets the window-space bounds of the entry based on the projected
    coordinates of the given rectangle */
 static void
 _cogl_clip_stack_entry_set_bounds (CoglClipStack *entry,
-                                   float x_1,
-                                   float y_1,
-                                   float x_2,
-                                   float y_2,
-                                   const CoglMatrix *modelview)
+                                   float *transformed_corners)
 {
-  CoglMatrix projection;
-  float viewport[4];
-  float verts[4 * 2] = { x_1, y_1, x_2, y_1, x_2, y_2, x_1, y_2 };
   float min_x = G_MAXFLOAT, min_y = G_MAXFLOAT;
   float max_x = -G_MAXFLOAT, max_y = -G_MAXFLOAT;
   int i;
 
-  cogl_get_projection_matrix (&projection);
-  cogl_get_viewport (viewport);
-
   for (i = 0; i < 4; i++)
     {
-      float *v = verts + i * 2;
-
-      /* Project the coordinates to window space coordinates */
-      _cogl_transform_point (modelview, &projection, viewport, v, v + 1);
+      float *v = transformed_corners + i * 2;
 
       if (v[0] > max_x)
         max_x = v[0];
@@ -569,11 +578,28 @@ _cogl_clip_stack_push_rectangle (CoglClipStack *stack,
                                  float y_1,
                                  float x_2,
                                  float y_2,
-                                 const CoglMatrix *modelview_matrix)
+                                 CoglMatrixEntry *modelview_entry,
+                                 CoglMatrixEntry *projection_entry,
+                                 const float *viewport)
 {
   CoglClipStackRect *entry;
-  CoglMatrix matrix_p;
-  float v[4];
+  CoglMatrix modelview;
+  CoglMatrix projection;
+  CoglMatrix modelview_projection;
+
+  /* Corners of the given rectangle in an clockwise order:
+   *  (0, 1)     (2, 3)
+   *
+   *
+   *
+   *  (6, 7)     (4, 5)
+   */
+  float rect[] = {
+    x_1, y_1,
+    x_2, y_1,
+    x_2, y_2,
+    x_1, y_2
+  };
 
   /* Make a new entry */
   entry = _cogl_clip_stack_push_entry (stack,
@@ -585,34 +611,46 @@ _cogl_clip_stack_push_rectangle (CoglClipStack *stack,
   entry->x1 = x_2;
   entry->y1 = y_2;
 
-  entry->matrix = *modelview_matrix;
+  entry->matrix_entry = _cogl_matrix_entry_ref (modelview_entry);
+
+  _cogl_matrix_entry_get (modelview_entry, &modelview);
+  _cogl_matrix_entry_get (projection_entry, &projection);
 
-  /* If the modelview meets these constraints then a transformed rectangle
-   * should still be a rectangle when it reaches screen coordinates.
+  cogl_matrix_multiply (&modelview_projection,
+                        &projection,
+                        &modelview);
+
+  /* Technically we could avoid the viewport transform at this point
+   * if we want to make this a bit faster. */
+  _cogl_transform_point (&modelview, &projection, viewport, &rect[0], &rect[1]);
+  _cogl_transform_point (&modelview, &projection, viewport, &rect[2], &rect[3]);
+  _cogl_transform_point (&modelview, &projection, viewport, &rect[4], &rect[5]);
+  _cogl_transform_point (&modelview, &projection, viewport, &rect[6], &rect[7]);
+
+  /* If the fully transformed rectangle isn't still axis aligned we
+   * can't handle it using a scissor.
    *
-   * FIXME: we are are making certain assumptions about the projection
-   * matrix a.t.m and should really be looking at the combined modelview
-   * and projection matrix.
-   * FIXME: we don't consider rotations that are a multiple of 90 degrees
-   * which could be quite common.
+   * We don't use an epsilon here since we only really aim to catch
+   * simple cases where the transform doesn't leave the rectangle screen
+   * aligned and don't mind some false positives.
    */
-  if (modelview_matrix->xy != 0 || modelview_matrix->xz != 0 ||
-      modelview_matrix->yx != 0 || modelview_matrix->yz != 0 ||
-      modelview_matrix->zx != 0 || modelview_matrix->zy != 0)
+  if (rect[0] != rect[6] ||
+      rect[1] != rect[3] ||
+      rect[2] != rect[4] ||
+      rect[7] != rect[5])
     {
       entry->can_be_scissor = FALSE;
+
       _cogl_clip_stack_entry_set_bounds ((CoglClipStack *) entry,
-                                         x_1, y_1, x_2, y_2, modelview_matrix);
+                                         rect);
     }
   else
     {
       CoglClipStack *base_entry = (CoglClipStack *) entry;
-
-      cogl_get_projection_matrix (&matrix_p);
-      cogl_get_viewport (v);
-
-      _cogl_transform_point (modelview_matrix, &matrix_p, v, &x_1, &y_1);
-      _cogl_transform_point (modelview_matrix, &matrix_p, v, &x_2, &y_2);
+      x_1 = rect[0];
+      y_1 = rect[1];
+      x_2 = rect[4];
+      y_2 = rect[5];
 
       /* Consider that the modelview matrix may flip the rectangle
        * along the x or y axis... */
@@ -636,7 +674,9 @@ _cogl_clip_stack_push_rectangle (CoglClipStack *stack,
 CoglClipStack *
 _cogl_clip_stack_push_from_path (CoglClipStack *stack,
                                  CoglPath *path,
-                                 const CoglMatrix *modelview_matrix)
+                                 CoglMatrixEntry *modelview_entry,
+                                 CoglMatrixEntry *projection_entry,
+                                 const float *viewport)
 {
   float x_1, y_1, x_2, y_2;
 
@@ -649,10 +689,15 @@ _cogl_clip_stack_push_from_path (CoglClipStack *stack,
     return _cogl_clip_stack_push_rectangle (stack,
                                             x_1, y_1,
                                             x_2, y_2,
-                                            modelview_matrix);
+                                            modelview_entry,
+                                            projection_entry,
+                                            viewport);
   else
     {
       CoglClipStackPath *entry;
+      CoglMatrix modelview;
+      CoglMatrix projection;
+      float transformed_corners[8];
 
       entry = _cogl_clip_stack_push_entry (stack,
                                            sizeof (CoglClipStackPath),
@@ -660,10 +705,18 @@ _cogl_clip_stack_push_from_path (CoglClipStack *stack,
 
       entry->path = cogl_path_copy (path);
 
-      entry->matrix = *modelview_matrix;
+      entry->matrix_entry = _cogl_matrix_entry_ref (modelview_entry);
 
+      _cogl_matrix_entry_get (modelview_entry, &modelview);
+      _cogl_matrix_entry_get (projection_entry, &projection);
+
+      get_transformed_corners (x_1, y_1, x_2, y_2,
+                               &modelview,
+                               &projection,
+                               viewport,
+                               transformed_corners);
       _cogl_clip_stack_entry_set_bounds ((CoglClipStack *) entry,
-                                         x_1, y_1, x_2, y_2, modelview_matrix);
+                                         transformed_corners);
 
       return (CoglClipStack *) entry;
     }
@@ -676,9 +729,14 @@ _cogl_clip_stack_push_primitive (CoglClipStack *stack,
                                  float bounds_y1,
                                  float bounds_x2,
                                  float bounds_y2,
-                                 const CoglMatrix *modelview_matrix)
+                                 CoglMatrixEntry *modelview_entry,
+                                 CoglMatrixEntry *projection_entry,
+                                 const float *viewport)
 {
   CoglClipStackPrimitive *entry;
+  CoglMatrix modelview;
+  CoglMatrix projection;
+  float transformed_corners[8];
 
   entry = _cogl_clip_stack_push_entry (stack,
                                        sizeof (CoglClipStackPrimitive),
@@ -686,18 +744,26 @@ _cogl_clip_stack_push_primitive (CoglClipStack *stack,
 
   entry->primitive = cogl_object_ref (primitive);
 
-  entry->matrix = *modelview_matrix;
+  entry->matrix_entry = _cogl_matrix_entry_ref (modelview_entry);
 
   entry->bounds_x1 = bounds_x1;
   entry->bounds_y1 = bounds_y1;
   entry->bounds_x2 = bounds_x2;
   entry->bounds_y2 = bounds_y2;
 
+  _cogl_matrix_entry_get (modelview_entry, &modelview);
+  _cogl_matrix_entry_get (modelview_entry, &projection);
+
+  get_transformed_corners (bounds_x1, bounds_y1, bounds_x2, bounds_y2,
+                           &modelview,
+                           &projection,
+                           viewport,
+                           transformed_corners);
+
   /* NB: this is referring to the bounds in window coordinates as opposed
    * to the bounds above in primitive local coordinates. */
   _cogl_clip_stack_entry_set_bounds ((CoglClipStack *) entry,
-                                     bounds_x1, bounds_y1, bounds_x2, bounds_y2,
-                                     modelview_matrix);
+                                     transformed_corners);
 
   return (CoglClipStack *) entry;
 }
@@ -725,23 +791,33 @@ _cogl_clip_stack_unref (CoglClipStack *entry)
       switch (entry->type)
         {
         case COGL_CLIP_STACK_RECT:
-          g_slice_free1 (sizeof (CoglClipStackRect), entry);
-          break;
-
+          {
+            CoglClipStackRect *rect = (CoglClipStackRect *) entry;
+            _cogl_matrix_entry_unref (rect->matrix_entry);
+            g_slice_free1 (sizeof (CoglClipStackRect), entry);
+            break;
+          }
         case COGL_CLIP_STACK_WINDOW_RECT:
           g_slice_free1 (sizeof (CoglClipStackWindowRect), entry);
           break;
 
         case COGL_CLIP_STACK_PATH:
-          cogl_object_unref (((CoglClipStackPath *) entry)->path);
-          g_slice_free1 (sizeof (CoglClipStackPath), entry);
-          break;
-
+          {
+            CoglClipStackPath *path_entry = (CoglClipStackPath *) entry;
+            _cogl_matrix_entry_unref (path_entry->matrix_entry);
+            cogl_object_unref (path_entry->path);
+            g_slice_free1 (sizeof (CoglClipStackPath), entry);
+            break;
+          }
         case COGL_CLIP_STACK_PRIMITIVE:
-          cogl_object_unref (((CoglClipStackPrimitive *) entry)->primitive);
-          g_slice_free1 (sizeof (CoglClipStackPrimitive), entry);
-          break;
-
+          {
+            CoglClipStackPrimitive *primitive_entry =
+              (CoglClipStackPrimitive *) entry;
+            _cogl_matrix_entry_unref (primitive_entry->matrix_entry);
+            cogl_object_unref (primitive_entry->primitive);
+            g_slice_free1 (sizeof (CoglClipStackPrimitive), entry);
+            break;
+          }
         default:
           g_assert_not_reached ();
         }
@@ -803,6 +879,7 @@ void
 _cogl_clip_stack_flush (CoglClipStack *stack,
                         CoglFramebuffer *framebuffer)
 {
+  CoglContext *ctx = framebuffer->context;
   int has_clip_planes;
   CoglBool using_clip_planes = FALSE;
   CoglBool using_stencil_buffer = FALSE;
@@ -810,12 +887,9 @@ _cogl_clip_stack_flush (CoglClipStack *stack,
   int scissor_y0;
   int scissor_x1;
   int scissor_y1;
-  CoglMatrixStack *modelview_stack;
   CoglClipStack *entry;
   int scissor_y_start;
 
-  _COGL_GET_CONTEXT (ctx, NO_RETVAL);
-
   /* If we have already flushed this state then we don't need to do
      anything */
   if (ctx->current_clip_stack_valid)
@@ -829,9 +903,6 @@ _cogl_clip_stack_flush (CoglClipStack *stack,
   ctx->current_clip_stack_valid = TRUE;
   ctx->current_clip_stack = _cogl_clip_stack_ref (stack);
 
-  modelview_stack =
-    _cogl_framebuffer_get_modelview_stack (framebuffer);
-
   has_clip_planes =
     ctx->private_feature_flags & COGL_PRIVATE_FEATURE_FOUR_CLIP_PLANES;
 
@@ -904,16 +975,12 @@ _cogl_clip_stack_flush (CoglClipStack *stack,
 
               COGL_NOTE (CLIPPING, "Adding stencil clip for path");
 
-              _cogl_matrix_stack_push (modelview_stack);
-              _cogl_matrix_stack_set (modelview_stack, &path_entry->matrix);
-
               add_stencil_clip_path (framebuffer,
+                                     path_entry->matrix_entry,
                                      path_entry->path,
                                      using_stencil_buffer,
                                      TRUE);
 
-              _cogl_matrix_stack_pop (modelview_stack);
-
               using_stencil_buffer = TRUE;
               break;
             }
@@ -924,10 +991,8 @@ _cogl_clip_stack_flush (CoglClipStack *stack,
 
               COGL_NOTE (CLIPPING, "Adding stencil clip for primitive");
 
-              _cogl_matrix_stack_push (modelview_stack);
-              _cogl_matrix_stack_set (modelview_stack, &primitive_entry->matrix);
-
               add_stencil_clip_primitive (framebuffer,
+                                          primitive_entry->matrix_entry,
                                           primitive_entry->primitive,
                                           primitive_entry->bounds_x1,
                                           primitive_entry->bounds_y1,
@@ -936,8 +1001,6 @@ _cogl_clip_stack_flush (CoglClipStack *stack,
                                           using_stencil_buffer,
                                           TRUE);
 
-              _cogl_matrix_stack_pop (modelview_stack);
-
               using_stencil_buffer = TRUE;
               break;
             }
@@ -949,9 +1012,6 @@ _cogl_clip_stack_flush (CoglClipStack *stack,
                  rectangle was entirely described by its scissor bounds */
               if (!rect->can_be_scissor)
                 {
-                  _cogl_matrix_stack_push (modelview_stack);
-                  _cogl_matrix_stack_set (modelview_stack, &rect->matrix);
-
                   /* If we support clip planes and we haven't already used
                      them then use that instead */
                   if (has_clip_planes)
@@ -960,6 +1020,7 @@ _cogl_clip_stack_flush (CoglClipStack *stack,
                                  "Adding clip planes clip for rectangle");
 
                       set_clip_planes (framebuffer,
+                                       rect->matrix_entry,
                                        rect->x0,
                                        rect->y0,
                                        rect->x1,
@@ -973,6 +1034,7 @@ _cogl_clip_stack_flush (CoglClipStack *stack,
                       COGL_NOTE (CLIPPING, "Adding stencil clip for rectangle");
 
                       add_stencil_clip_rectangle (framebuffer,
+                                                  rect->matrix_entry,
                                                   rect->x0,
                                                   rect->y0,
                                                   rect->x1,
@@ -980,8 +1042,6 @@ _cogl_clip_stack_flush (CoglClipStack *stack,
                                                   !using_stencil_buffer);
                       using_stencil_buffer = TRUE;
                     }
-
-                  _cogl_matrix_stack_pop (modelview_stack);
                 }
               break;
             }
diff --git a/cogl/cogl-clip-stack.h b/cogl/cogl-clip-stack.h
index 04d8c30..48ab800 100644
--- a/cogl/cogl-clip-stack.h
+++ b/cogl/cogl-clip-stack.h
@@ -28,6 +28,7 @@
 #include "cogl-matrix.h"
 #include "cogl-primitive.h"
 #include "cogl-framebuffer.h"
+#include "cogl-matrix-stack.h"
 
 /* The clip stack works like a GSList where only a pointer to the top
    of the stack is stored. The empty clip stack is represented simply
@@ -112,13 +113,13 @@ struct _CoglClipStack
 
 struct _CoglClipStackRect
 {
-  CoglClipStack     _parent_data;
+  CoglClipStack _parent_data;
 
   /* The rectangle for this clip */
-  float                  x0;
-  float                  y0;
-  float                  x1;
-  float                  y1;
+  float x0;
+  float y0;
+  float x1;
+  float y1;
 
   /* If this is true then the clip for this rectangle is entirely
      described by the scissor bounds. This implies that the rectangle
@@ -128,15 +129,15 @@ struct _CoglClipStackRect
      modelview matrix is that same as when a rectangle is added to the
      journal. In that case we can use the original clip coordinates
      and modify the rectangle instead. */
-  CoglBool               can_be_scissor;
+  CoglBool can_be_scissor;
 
   /* The matrix that was current when the clip was set */
-  CoglMatrix             matrix;
+  CoglMatrixEntry *matrix_entry;
 };
 
 struct _CoglClipStackWindowRect
 {
-  CoglClipStack     _parent_data;
+  CoglClipStack _parent_data;
 
   /* The window rect clip doesn't need any specific data because it
      just adds to the scissor clip */
@@ -144,12 +145,12 @@ struct _CoglClipStackWindowRect
 
 struct _CoglClipStackPath
 {
-  CoglClipStack     _parent_data;
+  CoglClipStack _parent_data;
 
   /* The matrix that was current when the clip was set */
-  CoglMatrix             matrix;
+  CoglMatrixEntry *matrix_entry;
 
-  CoglPath              *path;
+  CoglPath *path;
 };
 
 struct _CoglClipStackPrimitive
@@ -157,7 +158,7 @@ struct _CoglClipStackPrimitive
   CoglClipStack _parent_data;
 
   /* The matrix that was current when the clip was set */
-  CoglMatrix matrix;
+  CoglMatrixEntry *matrix_entry;
 
   CoglPrimitive *primitive;
 
@@ -180,12 +181,16 @@ _cogl_clip_stack_push_rectangle (CoglClipStack *stack,
                                  float y_1,
                                  float x_2,
                                  float y_2,
-                                 const CoglMatrix *modelview_matrix);
+                                 CoglMatrixEntry *modelview_entry,
+                                 CoglMatrixEntry *projection_entry,
+                                 const float *viewport);
 
 CoglClipStack *
 _cogl_clip_stack_push_from_path (CoglClipStack *stack,
                                  CoglPath *path,
-                                 const CoglMatrix *modelview_matrix);
+                                 CoglMatrixEntry *modelview_entry,
+                                 CoglMatrixEntry *projection_entry,
+                                 const float *viewport);
 
 CoglClipStack *
 _cogl_clip_stack_push_primitive (CoglClipStack *stack,
@@ -194,7 +199,9 @@ _cogl_clip_stack_push_primitive (CoglClipStack *stack,
                                  float bounds_y1,
                                  float bounds_x2,
                                  float bounds_y2,
-                                 const CoglMatrix *modelview_matrix);
+                                 CoglMatrixEntry *modelview_entry,
+                                 CoglMatrixEntry *projection_entry,
+                                 const float *viewport);
 
 CoglClipStack *
 _cogl_clip_stack_pop (CoglClipStack *stack);
diff --git a/cogl/cogl-context-private.h b/cogl/cogl-context-private.h
index b94eabf..a1776dc 100644
--- a/cogl/cogl-context-private.h
+++ b/cogl/cogl-context-private.h
@@ -108,14 +108,17 @@ struct _CoglContext
      calling it multiple times */
   CoglMatrixMode    flushed_matrix_mode;
 
-  /* The matrix stack that should be used for the next render */
-  CoglMatrixStack  *current_projection_stack;
-  CoglMatrixStack  *current_modelview_stack;
+  /* The matrix stack entries that should be flushed during the next
+   * pipeline state flush */
+  CoglMatrixEntry *current_projection_entry;
+  CoglMatrixEntry *current_modelview_entry;
 
-  /* The last matrix stack with age that was flushed to the GL matrix
-     builtins */
-  CoglMatrixStackCache builtin_flushed_projection;
-  CoglMatrixStackCache builtin_flushed_modelview;
+  CoglMatrixEntry identity_entry;
+
+  /* A cache of the last (immutable) matrix stack entries that were
+   * flushed to the GL matrix builtins */
+  CoglMatrixEntryCache builtin_flushed_projection;
+  CoglMatrixEntryCache builtin_flushed_modelview;
 
   GArray           *texture_units;
   int               active_texture_unit;
@@ -324,11 +327,11 @@ if (ctxvar == NULL) return retval;
 #define NO_RETVAL
 
 void
-_cogl_context_set_current_projection (CoglContext *context,
-                                      CoglMatrixStack *stack);
+_cogl_context_set_current_projection_entry (CoglContext *context,
+                                            CoglMatrixEntry *entry);
 
 void
-_cogl_context_set_current_modelview (CoglContext *context,
-                                     CoglMatrixStack *stack);
+_cogl_context_set_current_modelview_entry (CoglContext *context,
+                                           CoglMatrixEntry *entry);
 
 #endif /* __COGL_CONTEXT_PRIVATE_H */
diff --git a/cogl/cogl-context.c b/cogl/cogl-context.c
index 45ed8e0..b989a8a 100644
--- a/cogl/cogl-context.c
+++ b/cogl/cogl-context.c
@@ -386,10 +386,11 @@ cogl_context_new (CoglDisplay *display,
     GE (context, glEnable (GL_ALPHA_TEST));
 #endif
 
-  _context->current_modelview_stack = NULL;
-  _context->current_projection_stack = NULL;
-  _cogl_matrix_stack_init_cache (&_context->builtin_flushed_projection);
-  _cogl_matrix_stack_init_cache (&_context->builtin_flushed_modelview);
+  _context->current_modelview_entry = NULL;
+  _context->current_projection_entry = NULL;
+  _cogl_matrix_entry_identity_init (&_context->identity_entry);
+  _cogl_matrix_entry_cache_init (&_context->builtin_flushed_projection);
+  _cogl_matrix_entry_cache_init (&_context->builtin_flushed_modelview);
 
   default_texture_bitmap =
     cogl_bitmap_new_for_data (_context,
@@ -420,7 +421,6 @@ cogl_context_new (CoglDisplay *display,
   cogl_object_unref (default_texture_bitmap);
 
   cogl_push_source (context->opaque_color_pipeline);
-  _cogl_pipeline_flush_gl_state (context->opaque_color_pipeline, FALSE, 0);
 
   context->atlases = NULL;
   g_hook_list_init (&context->atlas_reorganize_callbacks, sizeof (GHook));
@@ -513,12 +513,12 @@ _cogl_context_free (CoglContext *context)
   g_slist_free (context->texture_types);
   g_slist_free (context->buffer_types);
 
-  if (_context->current_modelview_stack)
-    cogl_object_unref (_context->current_modelview_stack);
-  if (_context->current_projection_stack)
-    cogl_object_unref (_context->current_projection_stack);
-  _cogl_matrix_stack_destroy_cache (&context->builtin_flushed_projection);
-  _cogl_matrix_stack_destroy_cache (&context->builtin_flushed_modelview);
+  if (_context->current_modelview_entry)
+    _cogl_matrix_entry_unref (_context->current_modelview_entry);
+  if (_context->current_projection_entry)
+    _cogl_matrix_entry_unref (_context->current_projection_entry);
+  _cogl_matrix_entry_cache_destroy (&context->builtin_flushed_projection);
+  _cogl_matrix_entry_cache_destroy (&context->builtin_flushed_modelview);
 
   cogl_pipeline_cache_free (context->pipeline_cache);
 
@@ -585,21 +585,21 @@ _cogl_context_update_features (CoglContext *context,
 }
 
 void
-_cogl_context_set_current_projection (CoglContext *context,
-                                      CoglMatrixStack *stack)
+_cogl_context_set_current_projection_entry (CoglContext *context,
+                                            CoglMatrixEntry *entry)
 {
-  cogl_object_ref (stack);
-  if (context->current_projection_stack)
-    cogl_object_unref (context->current_projection_stack);
-  context->current_projection_stack = stack;
+  _cogl_matrix_entry_ref (entry);
+  if (context->current_projection_entry)
+    _cogl_matrix_entry_unref (context->current_projection_entry);
+  context->current_projection_entry = entry;
 }
 
 void
-_cogl_context_set_current_modelview (CoglContext *context,
-                                     CoglMatrixStack *stack)
+_cogl_context_set_current_modelview_entry (CoglContext *context,
+                                           CoglMatrixEntry *entry)
 {
-  cogl_object_ref (stack);
-  if (context->current_modelview_stack)
-    cogl_object_unref (context->current_modelview_stack);
-  context->current_modelview_stack = stack;
+  _cogl_matrix_entry_ref (entry);
+  if (context->current_modelview_entry)
+    _cogl_matrix_entry_unref (context->current_modelview_entry);
+  context->current_modelview_entry = entry;
 }
diff --git a/cogl/cogl-debug-options.h b/cogl/cogl-debug-options.h
index 4b177ae..76d7232 100644
--- a/cogl/cogl-debug-options.h
+++ b/cogl/cogl-debug-options.h
@@ -185,3 +185,8 @@ OPT (CLIPPING,
      "clipping",
      N_("Trace clipping"),
      N_("Logs information about how Cogl is implementing clipping"))
+OPT (PERFORMANCE,
+     N_("Cogl Tracing"),
+     "performance",
+     N_("Trace performance concerns"),
+     N_("Tries to highlight sub-optimal Cogl usage."))
diff --git a/cogl/cogl-debug.c b/cogl/cogl-debug.c
index 20161d4..9836133 100644
--- a/cogl/cogl-debug.c
+++ b/cogl/cogl-debug.c
@@ -56,7 +56,8 @@ static const GDebugKey cogl_log_debug_keys[] = {
   { "texture-pixmap", COGL_DEBUG_TEXTURE_PIXMAP },
   { "bitmap", COGL_DEBUG_BITMAP },
   { "clipping", COGL_DEBUG_CLIPPING },
-  { "winsys", COGL_DEBUG_WINSYS }
+  { "winsys", COGL_DEBUG_WINSYS },
+  { "performance", COGL_DEBUG_PERFORMANCE }
 };
 static const int n_cogl_log_debug_keys =
   G_N_ELEMENTS (cogl_log_debug_keys);
diff --git a/cogl/cogl-debug.h b/cogl/cogl-debug.h
index aa34ea8..efb60e1 100644
--- a/cogl/cogl-debug.h
+++ b/cogl/cogl-debug.h
@@ -67,6 +67,7 @@ typedef enum {
   COGL_DEBUG_DISABLE_FAST_READ_PIXEL,
   COGL_DEBUG_CLIPPING,
   COGL_DEBUG_WINSYS,
+  COGL_DEBUG_PERFORMANCE,
 
   COGL_DEBUG_N_FLAGS
 } CoglDebugFlags;
diff --git a/cogl/cogl-framebuffer.c b/cogl/cogl-framebuffer.c
index c099506..45af11b 100644
--- a/cogl/cogl-framebuffer.c
+++ b/cogl/cogl-framebuffer.c
@@ -1517,18 +1517,38 @@ _cogl_framebuffer_flush_dither_state (CoglFramebuffer *framebuffer)
     }
 }
 
+static CoglMatrixEntry *
+_cogl_framebuffer_get_modelview_entry (CoglFramebuffer *framebuffer)
+{
+  CoglMatrixStack *modelview_stack =
+    _cogl_framebuffer_get_modelview_stack (framebuffer);
+  return modelview_stack->last_entry;
+}
+
 static void
 _cogl_framebuffer_flush_modelview_state (CoglFramebuffer *framebuffer)
 {
-  _cogl_context_set_current_modelview (framebuffer->context,
-                                       framebuffer->modelview_stack);
+  CoglMatrixEntry *modelview_entry =
+    _cogl_framebuffer_get_modelview_entry (framebuffer);
+  _cogl_context_set_current_modelview_entry (framebuffer->context,
+                                             modelview_entry);
+}
+
+static CoglMatrixEntry *
+_cogl_framebuffer_get_projection_entry (CoglFramebuffer *framebuffer)
+{
+  CoglMatrixStack *projection_stack =
+    _cogl_framebuffer_get_projection_stack (framebuffer);
+  return projection_stack->last_entry;
 }
 
 static void
 _cogl_framebuffer_flush_projection_state (CoglFramebuffer *framebuffer)
 {
-  _cogl_context_set_current_projection (framebuffer->context,
-                                        framebuffer->projection_stack);
+  CoglMatrixEntry *projection_entry =
+    _cogl_framebuffer_get_projection_entry (framebuffer);
+  _cogl_context_set_current_projection_entry (framebuffer->context,
+                                             projection_entry);
 }
 
 static void
@@ -2631,9 +2651,9 @@ void
 cogl_framebuffer_get_modelview_matrix (CoglFramebuffer *framebuffer,
                                        CoglMatrix *matrix)
 {
-  CoglMatrixStack *modelview_stack =
-    _cogl_framebuffer_get_modelview_stack (framebuffer);
-  _cogl_matrix_stack_get (modelview_stack, matrix);
+  CoglMatrixEntry *modelview_entry =
+    _cogl_framebuffer_get_modelview_entry (framebuffer);
+  _cogl_matrix_entry_get (modelview_entry, matrix);
   _COGL_MATRIX_DEBUG_PRINT (matrix);
 }
 
@@ -2656,9 +2676,9 @@ void
 cogl_framebuffer_get_projection_matrix (CoglFramebuffer *framebuffer,
                                         CoglMatrix *matrix)
 {
-  CoglMatrixStack *projection_stack =
-    _cogl_framebuffer_get_projection_stack (framebuffer);
-  _cogl_matrix_stack_get (projection_stack, matrix);
+  CoglMatrixEntry *projection_entry =
+    _cogl_framebuffer_get_projection_entry (framebuffer);
+  _cogl_matrix_entry_get (projection_entry, matrix);
   _COGL_MATRIX_DEBUG_PRINT (matrix);
 }
 
@@ -2708,14 +2728,25 @@ cogl_framebuffer_push_rectangle_clip (CoglFramebuffer *framebuffer,
                                       float y_2)
 {
   CoglClipState *clip_state = _cogl_framebuffer_get_clip_state (framebuffer);
-  CoglMatrix modelview_matrix;
-
-  cogl_framebuffer_get_modelview_matrix (framebuffer, &modelview_matrix);
+  CoglMatrixEntry *modelview_entry =
+    _cogl_framebuffer_get_modelview_entry (framebuffer);
+  CoglMatrixEntry *projection_entry =
+    _cogl_framebuffer_get_projection_entry (framebuffer);
+  /* XXX: It would be nicer if we stored the private viewport as a
+   * vec4 so we could avoid this redundant copy. */
+  float viewport[] = {
+      framebuffer->viewport_x,
+      framebuffer->viewport_y,
+      framebuffer->viewport_width,
+      framebuffer->viewport_height
+  };
 
   clip_state->stacks->data =
     _cogl_clip_stack_push_rectangle (clip_state->stacks->data,
                                      x_1, y_1, x_2, y_2,
-                                     &modelview_matrix);
+                                     modelview_entry,
+                                     projection_entry,
+                                     viewport);
 
   if (framebuffer->context->current_draw_buffer == framebuffer)
     framebuffer->context->current_draw_buffer_changes |=
@@ -2727,14 +2758,25 @@ cogl_framebuffer_push_path_clip (CoglFramebuffer *framebuffer,
                                  CoglPath *path)
 {
   CoglClipState *clip_state = _cogl_framebuffer_get_clip_state (framebuffer);
-  CoglMatrix modelview_matrix;
-
-  cogl_framebuffer_get_modelview_matrix (framebuffer, &modelview_matrix);
+  CoglMatrixEntry *modelview_entry =
+    _cogl_framebuffer_get_modelview_entry (framebuffer);
+  CoglMatrixEntry *projection_entry =
+    _cogl_framebuffer_get_projection_entry (framebuffer);
+  /* XXX: It would be nicer if we stored the private viewport as a
+   * vec4 so we could avoid this redundant copy. */
+  float viewport[] = {
+      framebuffer->viewport_x,
+      framebuffer->viewport_y,
+      framebuffer->viewport_width,
+      framebuffer->viewport_height
+  };
 
   clip_state->stacks->data =
     _cogl_clip_stack_push_from_path (clip_state->stacks->data,
                                      path,
-                                     &modelview_matrix);
+                                     modelview_entry,
+                                     projection_entry,
+                                     viewport);
 
   if (framebuffer->context->current_draw_buffer == framebuffer)
     framebuffer->context->current_draw_buffer_changes |=
@@ -2750,16 +2792,27 @@ cogl_framebuffer_push_primitive_clip (CoglFramebuffer *framebuffer,
                                       float bounds_y2)
 {
   CoglClipState *clip_state = _cogl_framebuffer_get_clip_state (framebuffer);
-  CoglMatrix modelview_matrix;
-
-  cogl_get_modelview_matrix (&modelview_matrix);
+  CoglMatrixEntry *modelview_entry =
+    _cogl_framebuffer_get_modelview_entry (framebuffer);
+  CoglMatrixEntry *projection_entry =
+    _cogl_framebuffer_get_projection_entry (framebuffer);
+  /* XXX: It would be nicer if we stored the private viewport as a
+   * vec4 so we could avoid this redundant copy. */
+  float viewport[] = {
+      framebuffer->viewport_x,
+      framebuffer->viewport_y,
+      framebuffer->viewport_width,
+      framebuffer->viewport_height
+  };
 
   clip_state->stacks->data =
     _cogl_clip_stack_push_primitive (clip_state->stacks->data,
                                      primitive,
                                      bounds_x1, bounds_y1,
                                      bounds_x2, bounds_y2,
-                                     &modelview_matrix);
+                                     modelview_entry,
+                                     projection_entry,
+                                     viewport);
 
   if (framebuffer->context->current_draw_buffer == framebuffer)
     framebuffer->context->current_draw_buffer_changes |=
diff --git a/cogl/cogl-journal-private.h b/cogl/cogl-journal-private.h
index e6e2015..f5190f6 100644
--- a/cogl/cogl-journal-private.h
+++ b/cogl/cogl-journal-private.h
@@ -67,13 +67,10 @@ typedef struct _CoglJournalEntry
 {
   CoglPipeline            *pipeline;
   int                      n_layers;
-  CoglMatrix               model_view;
+  CoglMatrixEntry         *modelview_entry;
   CoglClipStack           *clip_stack;
   /* Offset into ctx->logged_vertices */
   size_t                   array_offset;
-  /* XXX: These entries are pretty big now considering the padding in
-   * CoglPipelineFlushOptions and CoglMatrix, so we might need to optimize this
-   * later. */
 } CoglJournalEntry;
 
 CoglJournal *
diff --git a/cogl/cogl-journal.c b/cogl/cogl-journal.c
index 8f42f6c..d177ca1 100644
--- a/cogl/cogl-journal.c
+++ b/cogl/cogl-journal.c
@@ -107,9 +107,6 @@ typedef struct _CoglJournalFlushState
   CoglIndices *indices;
   size_t indices_type_size;
 
-  CoglMatrixStack *modelview_stack;
-  CoglMatrixStack *projection_stack;
-
   CoglPipeline *pipeline;
 } CoglJournalFlushState;
 
@@ -296,11 +293,8 @@ _cogl_journal_flush_modelview_and_entries (CoglJournalEntry *batch_start,
     g_print ("BATCHING:     modelview batch len = %d\n", batch_len);
 
   if (G_UNLIKELY (COGL_DEBUG_ENABLED (COGL_DEBUG_DISABLE_SOFTWARE_TRANSFORM)))
-    {
-      _cogl_matrix_stack_set (state->modelview_stack,
-                              &batch_start->model_view);
-      _cogl_context_set_current_modelview (ctx, state->modelview_stack);
-    }
+    _cogl_context_set_current_modelview_entry (ctx,
+                                               batch_start->modelview_entry);
 
   attributes = (CoglAttribute **)state->attributes->data;
 
@@ -412,21 +406,7 @@ compare_entry_modelviews (CoglJournalEntry *entry0,
                           CoglJournalEntry *entry1)
 {
   /* Batch together quads with the same model view matrix */
-
-  /* FIXME: this is nasty, there are much nicer ways to track this
-   * (at the add_quad_vertices level) without resorting to a memcmp!
-   *
-   * E.g. If the cogl-current-matrix code maintained an "age" for
-   * the modelview matrix we could simply check in add_quad_vertices
-   * if the age has increased, and if so record the change as a
-   * boolean in the journal.
-   */
-
-  if (memcmp (&entry0->model_view, &entry1->model_view,
-              sizeof (GLfloat) * 16) == 0)
-    return TRUE;
-  else
-    return FALSE;
+  return entry0->modelview_entry == entry1->modelview_entry;
 }
 
 /* At this point we have a run of quads that we know have compatible
@@ -701,6 +681,9 @@ _cogl_journal_flush_clip_stacks_and_entries (CoglJournalEntry *batch_start,
                                              void             *data)
 {
   CoglJournalFlushState *state = data;
+  CoglFramebuffer *framebuffer = state->journal->framebuffer;
+  CoglContext *ctx = framebuffer->context;
+  CoglMatrixStack *projection_stack;
 
   COGL_STATIC_TIMER (time_flush_clip_stack_pipeline_entries,
                      "Journal Flush", /* parent */
@@ -709,15 +692,13 @@ _cogl_journal_flush_clip_stacks_and_entries (CoglJournalEntry *batch_start,
                      "pipeline + entries",
                      0 /* no application private data */);
 
-  _COGL_GET_CONTEXT (ctx, NO_RETVAL);
-
   COGL_TIMER_START (_cogl_uprof_context,
                     time_flush_clip_stack_pipeline_entries);
 
   if (G_UNLIKELY (COGL_DEBUG_ENABLED (COGL_DEBUG_BATCHING)))
     g_print ("BATCHING:  clip stack batch len = %d\n", batch_len);
 
-  _cogl_clip_stack_flush (batch_start->clip_stack, state->journal->framebuffer);
+  _cogl_clip_stack_flush (batch_start->clip_stack, framebuffer);
 
   /* XXX: Because we are manually flushing clip state here we need to
    * make sure that the clip state gets updated the next time we flush
@@ -725,22 +706,21 @@ _cogl_journal_flush_clip_stacks_and_entries (CoglJournalEntry *batch_start,
    * as changed. */
   ctx->current_draw_buffer_changes |= COGL_FRAMEBUFFER_STATE_CLIP;
 
-  _cogl_matrix_stack_push (state->modelview_stack);
-
   /* If we have transformed all our quads at log time then we ensure
    * no further model transform is applied by loading the identity
    * matrix here. We need to do this after flushing the clip stack
-   * because the clip stack flushing code can modify the matrix */
+   * because the clip stack flushing code can modify the current
+   * modelview matrix entry */
   if (G_LIKELY (!(COGL_DEBUG_ENABLED (COGL_DEBUG_DISABLE_SOFTWARE_TRANSFORM))))
-    {
-      _cogl_matrix_stack_load_identity (state->modelview_stack);
-      _cogl_context_set_current_modelview (ctx, state->modelview_stack);
-    }
+    _cogl_context_set_current_modelview_entry (ctx, &ctx->identity_entry);
 
-  /* Setting up the clip state can sometimes also flush the projection
-     matrix so we should flush it again. This will be a no-op if the
-     clip code didn't modify the projection */
-  _cogl_context_set_current_projection (ctx, state->projection_stack);
+  /* Setting up the clip state can sometimes also update the current
+   * projection matrix entry so we should update it again. This will have
+   * no affect if the clip code didn't modify the projection */
+  projection_stack =
+    _cogl_framebuffer_get_projection_stack (framebuffer);
+  _cogl_context_set_current_projection_entry (ctx,
+                                              projection_stack->last_entry);
 
   batch_and_call (batch_start,
                   batch_len,
@@ -748,97 +728,10 @@ _cogl_journal_flush_clip_stacks_and_entries (CoglJournalEntry *batch_start,
                   _cogl_journal_flush_vbo_offsets_and_entries, /* callback */
                   data);
 
-  _cogl_matrix_stack_pop (state->modelview_stack);
-
   COGL_TIMER_STOP (_cogl_uprof_context,
                    time_flush_clip_stack_pipeline_entries);
 }
 
-static CoglBool
-calculate_translation (const CoglMatrix *a,
-                       const CoglMatrix *b,
-                       float *tx_p,
-                       float *ty_p)
-{
-  float tx, ty;
-  int x, y;
-
-  /* Assuming we had the original matrix in this form:
-   *
-   *      [ a₁₁, a₁₂, a₁₃, a₁₄ ]
-   *      [ a₂₁, a₂₂, a₂₃, a₂₄ ]
-   *  a = [ a₃₁, a₃₂, a₃₃, a₃₄ ]
-   *      [ a₄₁, a₄₂, a₄₃, a₄₄ ]
-   *
-   * then a translation of that matrix would be a multiplication by a
-   * matrix of this form:
-   *
-   *      [ 1, 0, 0, x ]
-   *      [ 0, 1, 0, y ]
-   *  t = [ 0, 0, 1, 0 ]
-   *      [ 0, 0, 0, 1 ]
-   *
-   * That would give us a matrix of this form.
-   *
-   *              [ a₁₁, a₁₂, a₁₃, a₁₁ x + a₁₂ y + a₁₄ ]
-   *              [ a₂₁, a₂₂, a₂₃, a₂₁ x + a₂₂ y + a₂₄ ]
-   *  b = a ⋅ t = [ a₃₁, a₃₂, a₃₃, a₃₁ x + a₃₂ y + a₃₄ ]
-   *              [ a₄₁, a₄₂, a₄₃, a₄₁ x + a₄₂ y + a₄₄ ]
-   *
-   * We can use the two equations from the top left of the matrix to
-   * work out the x and y translation given the two matrices:
-   *
-   *  b₁₄ = a₁₁x + a₁₂y + a₁₄
-   *  b₂₄ = a₂₁x + a₂₂y + a₂₄
-   *
-   * Rearranging gives us:
-   *
-   *        a₁₂ b₂₄ - a₂₄ a₁₂
-   *        -----------------  +  a₁₄ - b₁₄
-   *              a₂₂
-   *  x =  ---------------------------------
-   *                a₁₂ a₂₁
-   *                -------  -  a₁₁
-   *                  a₂₂
-   *
-   *      b₂₄ - a₂₁x - a₂₄
-   *  y = ----------------
-   *            a₂₂
-   *
-   * Once we've worked out what x and y would be if this was a valid
-   * translation then we can simply verify that the rest of the matrix
-   * matches up.
-   */
-
-  /* The leftmost 3x4 part of the matrix shouldn't change by a
-     translation so we can just compare it directly */
-  for (y = 0; y < 4; y++)
-    for (x = 0; x < 3; x++)
-      if ((&a->xx)[x * 4 + y] != (&b->xx)[x * 4 + y])
-        return FALSE;
-
-  tx = (((a->xy * b->yw - a->yw * a->xy) / a->yy + a->xw - b->xw) /
-        ((a->xy * a->yx) / a->yy - a->xx));
-  ty = (b->yw - a->yx * tx - a->yw) / a->yy;
-
-#define APPROX_EQUAL(a, b) (fabsf ((a) - (b)) < 1e-6f)
-
-  /* Check whether the 4th column of the matrices match up to the
-     calculation */
-  if (!APPROX_EQUAL (b->xw, a->xx * tx + a->xy * ty + a->xw) ||
-      !APPROX_EQUAL (b->yw, a->yx * tx + a->yy * ty + a->yw) ||
-      !APPROX_EQUAL (b->zw, a->zx * tx + a->zy * ty + a->zw) ||
-      !APPROX_EQUAL (b->ww, a->wx * tx + a->wy * ty + a->ww))
-    return FALSE;
-
-#undef APPROX_EQUAL
-
-  *tx_p = tx;
-  *ty_p = ty;
-
-  return TRUE;
-}
-
 typedef struct
 {
   float x_1, y_1;
@@ -878,13 +771,15 @@ can_software_clip_entry (CoglJournalEntry *journal_entry,
     {
       float rect_x1, rect_y1, rect_x2, rect_y2;
       CoglClipStackRect *clip_rect;
-      float tx, ty;
+      float tx, ty, tz;
+      CoglMatrixEntry *modelview_entry;
 
       clip_rect = (CoglClipStackRect *) clip_entry;
 
-      if (!calculate_translation (&clip_rect->matrix,
-                                  &journal_entry->model_view,
-                                  &tx, &ty))
+      modelview_entry = journal_entry->modelview_entry;
+      if (!_cogl_matrix_entry_calculate_translation (clip_rect->matrix_entry,
+                                                     modelview_entry,
+                                                     &tx, &ty, &tz))
         return FALSE;
 
       if (clip_rect->x0 < clip_rect->x1)
@@ -1203,6 +1098,7 @@ upload_vertices (CoglJournal *journal,
       else
         {
           float v[8];
+          CoglMatrix modelview;
 
           v[0] = vin[0];
           v[1] = vin[1];
@@ -1213,7 +1109,8 @@ upload_vertices (CoglJournal *journal,
           v[6] = vin[array_stride];
           v[7] = vin[1];
 
-          cogl_matrix_transform_points (&entry->model_view,
+          _cogl_matrix_entry_get (entry->modelview_entry, &modelview);
+          cogl_matrix_transform_points (&modelview,
                                         2, /* n_components */
                                         sizeof (float) * 2, /* stride_in */
                                         v, /* points_in */
@@ -1260,6 +1157,7 @@ _cogl_journal_discard (CoglJournal *journal)
       CoglJournalEntry *entry =
         &g_array_index (journal->entries, CoglJournalEntry, i);
       _cogl_pipeline_journal_unref (entry->pipeline);
+      _cogl_matrix_entry_unref (entry->modelview_entry);
       _cogl_clip_stack_unref (entry->clip_stack);
     }
 
@@ -1348,25 +1246,27 @@ _cogl_journal_all_entries_within_bounds (CoglJournal *journal,
 void
 _cogl_journal_flush (CoglJournal *journal)
 {
+  CoglFramebuffer *framebuffer;
+  CoglContext *ctx;
   CoglJournalFlushState state;
-  int                   i;
-  CoglMatrixStack      *modelview_stack;
+  int i;
   COGL_STATIC_TIMER (flush_timer,
                      "Mainloop", /* parent */
                      "Journal Flush",
                      "The time spent flushing the Cogl journal",
                      0 /* no application private data */);
 
-  _COGL_GET_CONTEXT (ctx, NO_RETVAL);
-
   if (journal->entries->len == 0)
     return;
 
+  framebuffer = journal->framebuffer;
+  ctx = framebuffer->context;
+
   /* The entries in this journal may depend on images in other
    * framebuffers which may require that we flush the journals
    * associated with those framebuffers before we can flush
    * this journal... */
-  _cogl_framebuffer_flush_dependency_journals (journal->framebuffer);
+  _cogl_framebuffer_flush_dependency_journals (framebuffer);
 
   /* Note: we start the timer after flushing dependency journals so
    * that the timer isn't started recursively. */
@@ -1377,8 +1277,8 @@ _cogl_journal_flush (CoglJournal *journal)
 
   /* NB: the journal deals with flushing the modelview stack and clip
      state manually */
-  _cogl_framebuffer_flush_state (journal->framebuffer,
-                                 journal->framebuffer,
+  _cogl_framebuffer_flush_state (framebuffer,
+                                 framebuffer,
                                  COGL_FRAMEBUFFER_STATE_ALL &
                                  ~(COGL_FRAMEBUFFER_STATE_MODELVIEW |
                                    COGL_FRAMEBUFFER_STATE_CLIP));
@@ -1387,12 +1287,6 @@ _cogl_journal_flush (CoglJournal *journal)
 
   state.attributes = ctx->journal_flush_attributes_array;
 
-  modelview_stack =
-    _cogl_framebuffer_get_modelview_stack (journal->framebuffer);
-  state.modelview_stack = modelview_stack;
-  state.projection_stack =
-    _cogl_framebuffer_get_projection_stack (journal->framebuffer);
-
   if (G_UNLIKELY ((COGL_DEBUG_ENABLED (COGL_DEBUG_DISABLE_SOFTWARE_CLIP)) == 0))
     {
       /* We do an initial walk of the journal to analyse the clip stack
@@ -1493,6 +1387,7 @@ _cogl_journal_log_quad (CoglJournal  *journal,
   CoglPipeline *final_pipeline;
   CoglClipStack *clip_stack;
   CoglPipelineFlushOptions flush_options;
+  CoglMatrixStack *modelview_stack;
   COGL_STATIC_TIMER (log_timer,
                      "Mainloop", /* parent */
                      "Journal Log",
@@ -1590,8 +1485,9 @@ _cogl_journal_log_quad (CoglJournal  *journal,
   if (G_UNLIKELY (final_pipeline != pipeline))
     cogl_object_unref (final_pipeline);
 
-  cogl_framebuffer_get_modelview_matrix (framebuffer,
-                                         &entry->model_view);
+  modelview_stack =
+    _cogl_framebuffer_get_modelview_stack (framebuffer);
+  entry->modelview_entry = _cogl_matrix_entry_ref (modelview_stack->last_entry);
 
   _cogl_pipeline_foreach_layer_internal (pipeline,
                                          add_framebuffer_deps_cb,
@@ -1613,6 +1509,7 @@ entry_to_screen_polygon (CoglFramebuffer *framebuffer,
     GET_JOURNAL_ARRAY_STRIDE_FOR_N_LAYERS (entry->n_layers);
   CoglMatrixStack *projection_stack;
   CoglMatrix projection;
+  CoglMatrix modelview;
   int i;
   float viewport[4];
 
@@ -1640,7 +1537,8 @@ entry_to_screen_polygon (CoglFramebuffer *framebuffer,
    * _cogl_transform_points utility...
    */
 
-  cogl_matrix_transform_points (&entry->model_view,
+  _cogl_matrix_entry_get (entry->modelview_entry, &modelview);
+  cogl_matrix_transform_points (&modelview,
                                 2, /* n_components */
                                 sizeof (float) * 4, /* stride_in */
                                 poly, /* points_in */
diff --git a/cogl/cogl-matrix-private.h b/cogl/cogl-matrix-private.h
index 1bdea7b..ef39e84 100644
--- a/cogl/cogl-matrix-private.h
+++ b/cogl/cogl-matrix-private.h
@@ -41,6 +41,13 @@ G_BEGIN_DECLS
 void
 _cogl_matrix_print (const CoglMatrix *matrix);
 
+void
+_cogl_matrix_prefix_print (const char *prefix, const CoglMatrix *matrix);
+
+void
+_cogl_matrix_init_from_matrix_without_inverse (CoglMatrix *matrix,
+                                               const CoglMatrix *src);
+
 G_END_DECLS
 
 #endif /* __COGL_MATRIX_PRIVATE_H */
diff --git a/cogl/cogl-matrix-stack.c b/cogl/cogl-matrix-stack.c
index 64ff803..abbd007 100644
--- a/cogl/cogl-matrix-stack.c
+++ b/cogl/cogl-matrix-stack.c
@@ -3,7 +3,7 @@
  *
  * An object oriented GL/GLES Abstraction/Utility Layer
  *
- * Copyright (C) 2009,2010 Intel Corporation.
+ * Copyright (C) 2009,2010,2012 Intel Corporation.
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
@@ -16,13 +16,13 @@
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this library. If not, see <http://www.gnu.org/licenses/>.
- *
- *
+ * License along with this library. If not, see
+ * <http://www.gnu.org/licenses/>.
  *
  * Authors:
  *   Havoc Pennington <hp at pobox.com> for litl
  *   Robert Bragg <robert at linux.intel.com>
+ *   Neil Roberts <neil at linux.intel.com>
  */
 
 #ifdef HAVE_CONFIG_H
@@ -35,341 +35,674 @@
 #include "cogl-framebuffer-private.h"
 #include "cogl-object-private.h"
 #include "cogl-offscreen.h"
+#include "cogl-matrix-private.h"
+#include "cogl-magazine-private.h"
 
-typedef struct {
-  CoglMatrix matrix;
-  CoglBool is_identity;
-  /* count of pushes with no changes; when a change is
-   * requested, we create a new state and decrement this
-   */
-  int push_count;
-} CoglMatrixState;
+static void _cogl_matrix_stack_free (CoglMatrixStack *stack);
 
-/**
- * CoglMatrixStack:
- *
- * Stores a cogl-side matrix stack, which we use as a cache
- * so we can get the matrix efficiently when using indirect
- * rendering.
- */
-struct _CoglMatrixStack
+COGL_OBJECT_INTERNAL_DEFINE (MatrixStack, matrix_stack);
+
+static CoglMagazine *_cogl_matrix_stack_magazine;
+static CoglMagazine *_cogl_matrix_stack_matrices_magazine;
+
+static void *
+_cogl_matrix_stack_push_entry (CoglMatrixStack *stack,
+                               size_t size,
+                               CoglMatrixOp operation)
 {
-  CoglObject _parent;
+  CoglMatrixEntry *entry =
+    _cogl_magazine_chunk_alloc (_cogl_matrix_stack_magazine);
 
-  GArray *stack;
+  /* The new entry starts with a ref count of 1 because the stack
+     holds a reference to it as it is the top entry */
+  entry->ref_count = 1;
+  entry->op = operation;
+  entry->parent = stack->last_entry;
 
-  unsigned int age;
-};
+  entry->composite_gets = 0;
 
-static void _cogl_matrix_stack_free (CoglMatrixStack *stack);
+  stack->last_entry = entry;
 
-COGL_OBJECT_INTERNAL_DEFINE (MatrixStack, matrix_stack);
+  /* We don't need to take a reference to the parent from the entry
+     because the we are stealing the ref in the new stack top */
 
-/* XXX: this doesn't initialize the matrix! */
-static void
-_cogl_matrix_state_init (CoglMatrixState *state)
-{
-  state->push_count = 0;
-  state->is_identity = FALSE;
+  return entry;
 }
 
-static CoglMatrixState *
-_cogl_matrix_stack_top (CoglMatrixStack *stack)
+void
+_cogl_matrix_entry_identity_init (CoglMatrixEntry *entry)
 {
-  return &g_array_index (stack->stack, CoglMatrixState, stack->stack->len - 1);
+  entry->ref_count = 1;
+  entry->op = COGL_MATRIX_OP_LOAD_IDENTITY;
+  entry->parent = NULL;
+  entry->composite_gets = 0;
 }
 
-/* XXX:
- * Operations like scale, translate, rotate etc need to have an
- * initialized state->matrix to work with, so they will pass
- * initialize = TRUE.
- *
- * _cogl_matrix_stack_load_identity and _cogl_matrix_stack_set on the
- * other hand don't so they will pass initialize = FALSE
- *
- * NB: Identity matrices are represented by setting
- * state->is_identity=TRUE in which case state->matrix will be
- * uninitialized.
- */
-static CoglMatrixState *
-_cogl_matrix_stack_top_mutable (CoglMatrixStack *stack,
-                                CoglBool initialize)
+void
+_cogl_matrix_stack_load_identity (CoglMatrixStack *stack)
 {
-  CoglMatrixState *state;
-  CoglMatrixState *new_top;
+  _cogl_matrix_stack_push_entry (stack,
+                                 sizeof (CoglMatrixEntry),
+                                 COGL_MATRIX_OP_LOAD_IDENTITY);
+}
 
-  state = _cogl_matrix_stack_top (stack);
+void
+_cogl_matrix_stack_translate (CoglMatrixStack *stack,
+                              float x,
+                              float y,
+                              float z)
+{
+  CoglMatrixEntryTranslate *entry;
 
-  if (state->push_count == 0)
-    {
-      if (state->is_identity && initialize)
-        cogl_matrix_init_identity (&state->matrix);
-      return state;
-    }
+  entry = _cogl_matrix_stack_push_entry (stack,
+                                         sizeof (CoglMatrixEntryTranslate),
+                                         COGL_MATRIX_OP_TRANSLATE);
 
-  state->push_count -= 1;
+  entry->x = x;
+  entry->y = y;
+  entry->z = z;
+}
 
-  g_array_set_size (stack->stack, stack->stack->len + 1);
-  /* if g_array_set_size reallocs we need to get state
-   * pointer again */
-  state = &g_array_index (stack->stack, CoglMatrixState,
-                            stack->stack->len - 2);
-  new_top = _cogl_matrix_stack_top(stack);
-  _cogl_matrix_state_init (new_top);
+void
+_cogl_matrix_stack_rotate (CoglMatrixStack *stack,
+                           float angle,
+                           float x,
+                           float y,
+                           float z)
+{
+  CoglMatrixEntryRotate *entry;
 
-  if (initialize)
-    {
-      if (state->is_identity)
-        cogl_matrix_init_identity (&new_top->matrix);
-      else
-        new_top->matrix = state->matrix;
-    }
+  entry = _cogl_matrix_stack_push_entry (stack,
+                                         sizeof (CoglMatrixEntryRotate),
+                                         COGL_MATRIX_OP_ROTATE);
 
-  return new_top;
+  entry->angle = angle;
+  entry->x = x;
+  entry->y = y;
+  entry->z = z;
 }
 
-CoglMatrixStack*
-_cogl_matrix_stack_new (void)
+void
+_cogl_matrix_stack_scale (CoglMatrixStack *stack,
+                          float x,
+                          float y,
+                          float z)
 {
-  CoglMatrixStack *stack;
-  CoglMatrixState *state;
+  CoglMatrixEntryScale *entry;
 
-  stack = g_slice_new0 (CoglMatrixStack);
+  entry = _cogl_matrix_stack_push_entry (stack,
+                                         sizeof (CoglMatrixEntryScale),
+                                         COGL_MATRIX_OP_SCALE);
 
-  stack->stack = g_array_sized_new (FALSE, FALSE,
-                                    sizeof (CoglMatrixState), 10);
-  g_array_set_size (stack->stack, 1);
-  state = &g_array_index (stack->stack, CoglMatrixState, 0);
-  _cogl_matrix_state_init (state);
-  state->is_identity = TRUE;
+  entry->x = x;
+  entry->y = y;
+  entry->z = z;
+}
 
-  stack->age = 0;
+void
+_cogl_matrix_stack_multiply (CoglMatrixStack *stack,
+                             const CoglMatrix *matrix)
+{
+  CoglMatrixEntryMultiply *entry;
 
-  return _cogl_matrix_stack_object_new (stack);
+  entry = _cogl_matrix_stack_push_entry (stack,
+                                         sizeof (CoglMatrixEntryMultiply),
+                                         COGL_MATRIX_OP_MULTIPLY);
+
+  entry->matrix =
+    _cogl_magazine_chunk_alloc (_cogl_matrix_stack_matrices_magazine);
+
+  cogl_matrix_init_from_array (entry->matrix, (float *)matrix);
 }
 
-static void
-_cogl_matrix_stack_free (CoglMatrixStack *stack)
+void
+_cogl_matrix_stack_set (CoglMatrixStack *stack,
+                        const CoglMatrix *matrix)
 {
-  g_array_free (stack->stack, TRUE);
-  g_slice_free (CoglMatrixStack, stack);
+  CoglMatrixEntryLoad *entry;
+
+  entry = _cogl_matrix_stack_push_entry (stack,
+                                         sizeof (CoglMatrixEntryLoad),
+                                         COGL_MATRIX_OP_LOAD);
+
+  entry->matrix =
+    _cogl_magazine_chunk_alloc (_cogl_matrix_stack_matrices_magazine);
+
+  cogl_matrix_init_from_array (entry->matrix, (float *)matrix);
 }
 
 void
-_cogl_matrix_stack_push (CoglMatrixStack *stack)
+_cogl_matrix_stack_frustum (CoglMatrixStack *stack,
+                            float left,
+                            float right,
+                            float bottom,
+                            float top,
+                            float z_near,
+                            float z_far)
 {
-  CoglMatrixState *state;
+  CoglMatrixEntryLoad *entry;
 
-  state = _cogl_matrix_stack_top (stack);
+  entry = _cogl_matrix_stack_push_entry (stack,
+                                         sizeof (CoglMatrixEntryLoad),
+                                         COGL_MATRIX_OP_LOAD);
 
-  /* we lazily create a new stack top if someone changes the matrix
-   * while push_count > 0
-   */
-  state->push_count += 1;
+  entry->matrix =
+    _cogl_magazine_chunk_alloc (_cogl_matrix_stack_matrices_magazine);
+
+  cogl_matrix_init_identity (entry->matrix);
+  cogl_matrix_frustum (entry->matrix,
+                       left, right, bottom, top,
+                       z_near, z_far);
 }
 
 void
-_cogl_matrix_stack_pop (CoglMatrixStack *stack)
+_cogl_matrix_stack_perspective (CoglMatrixStack *stack,
+                                float fov_y,
+                                float aspect,
+                                float z_near,
+                                float z_far)
 {
-  CoglMatrixState *state;
+  CoglMatrixEntryLoad *entry;
 
-  state = _cogl_matrix_stack_top (stack);
+  entry = _cogl_matrix_stack_push_entry (stack,
+                                         sizeof (CoglMatrixEntryLoad),
+                                         COGL_MATRIX_OP_LOAD);
 
-  if (state->push_count > 0)
-    {
-      state->push_count -= 1;
-    }
-  else
-    {
-      if (stack->stack->len == 1)
-        {
-          g_warning ("Too many matrix pops");
-          return;
-        }
+  entry->matrix =
+    _cogl_magazine_chunk_alloc (_cogl_matrix_stack_matrices_magazine);
 
-      stack->age++;
-      g_array_set_size (stack->stack, stack->stack->len - 1);
-    }
+  cogl_matrix_init_identity (entry->matrix);
+  cogl_matrix_perspective (entry->matrix,
+                           fov_y, aspect, z_near, z_far);
 }
 
 void
-_cogl_matrix_stack_load_identity (CoglMatrixStack *stack)
+_cogl_matrix_stack_orthographic (CoglMatrixStack *stack,
+                                 float x_1,
+                                 float y_1,
+                                 float x_2,
+                                 float y_2,
+                                 float near,
+                                 float far)
 {
-  CoglMatrixState *state;
+  CoglMatrixEntryLoad *entry;
 
-  state = _cogl_matrix_stack_top_mutable (stack, FALSE);
+  entry = _cogl_matrix_stack_push_entry (stack,
+                                         sizeof (CoglMatrixEntryLoad),
+                                         COGL_MATRIX_OP_LOAD);
 
-  /* NB: Identity matrices are represented by setting
-   * state->is_identity = TRUE and leaving state->matrix
-   * uninitialized.
-   *
-   * This is done to optimize the heavy usage of
-   * _cogl_matrix_stack_load_identity by the Cogl Journal.
-   */
-  if (!state->is_identity)
-    {
-      state->is_identity = TRUE;
-      stack->age++;
-    }
+  entry->matrix =
+    _cogl_magazine_chunk_alloc (_cogl_matrix_stack_matrices_magazine);
+
+  cogl_matrix_init_identity (entry->matrix);
+  cogl_matrix_orthographic (entry->matrix,
+                            x_1, y_1, x_2, y_2, near, far);
 }
 
 void
-_cogl_matrix_stack_scale (CoglMatrixStack *stack,
-                          float            x,
-                          float            y,
-                          float            z)
+_cogl_matrix_stack_push (CoglMatrixStack *stack)
 {
-  CoglMatrixState *state;
+  CoglMatrixEntrySave *entry;
+
+  entry = _cogl_matrix_stack_push_entry (stack,
+                                         sizeof (CoglMatrixEntrySave),
+                                         COGL_MATRIX_OP_SAVE);
 
-  state = _cogl_matrix_stack_top_mutable (stack, TRUE);
-  cogl_matrix_scale (&state->matrix, x, y, z);
-  state->is_identity = FALSE;
-  stack->age++;
+  entry->cache_valid = FALSE;
 }
 
-void
-_cogl_matrix_stack_translate (CoglMatrixStack *stack,
-                              float            x,
-                              float            y,
-                              float            z)
+CoglMatrixEntry *
+_cogl_matrix_entry_ref (CoglMatrixEntry *entry)
 {
-  CoglMatrixState *state;
+  /* A NULL pointer is considered a valid stack so we should accept
+     that as an argument */
+  if (entry)
+    entry->ref_count++;
 
-  state = _cogl_matrix_stack_top_mutable (stack, TRUE);
-  cogl_matrix_translate (&state->matrix, x, y, z);
-  state->is_identity = FALSE;
-  stack->age++;
+  return entry;
 }
 
 void
-_cogl_matrix_stack_rotate (CoglMatrixStack *stack,
-                           float            angle,
-                           float            x,
-                           float            y,
-                           float            z)
+_cogl_matrix_entry_unref (CoglMatrixEntry *entry)
 {
-  CoglMatrixState *state;
+  for (; entry && --entry->ref_count <= 0; entry = entry->parent)
+    {
+      switch (entry->op)
+        {
+        case COGL_MATRIX_OP_LOAD_IDENTITY:
+        case COGL_MATRIX_OP_TRANSLATE:
+        case COGL_MATRIX_OP_ROTATE:
+        case COGL_MATRIX_OP_SCALE:
+          break;
+        case COGL_MATRIX_OP_MULTIPLY:
+          {
+            CoglMatrixEntryMultiply *multiply =
+              (CoglMatrixEntryMultiply *)entry;
+            _cogl_magazine_chunk_free (_cogl_matrix_stack_matrices_magazine,
+                                       multiply->matrix);
+            break;
+          }
+        case COGL_MATRIX_OP_LOAD:
+          {
+            CoglMatrixEntryLoad *load = (CoglMatrixEntryLoad *)entry;
+            _cogl_magazine_chunk_free (_cogl_matrix_stack_matrices_magazine,
+                                       load->matrix);
+            break;
+          }
+        case COGL_MATRIX_OP_SAVE:
+          {
+            CoglMatrixEntrySave *save = (CoglMatrixEntrySave *)entry;
+            if (save->cache_valid)
+              _cogl_magazine_chunk_free (_cogl_matrix_stack_matrices_magazine,
+                                         save->cache);
+            break;
+          }
+        }
 
-  state = _cogl_matrix_stack_top_mutable (stack, TRUE);
-  cogl_matrix_rotate (&state->matrix, angle, x, y, z);
-  state->is_identity = FALSE;
-  stack->age++;
+      _cogl_magazine_chunk_free (_cogl_matrix_stack_magazine, entry);
+    }
 }
 
 void
-_cogl_matrix_stack_multiply (CoglMatrixStack  *stack,
-                             const CoglMatrix *matrix)
+_cogl_matrix_stack_pop (CoglMatrixStack *stack)
 {
-  CoglMatrixState *state;
+  CoglMatrixEntry *old_top;
+  CoglMatrixEntry *new_top;
+
+  _COGL_RETURN_IF_FAIL (stack != NULL);
+
+  old_top = stack->last_entry;
+  _COGL_RETURN_IF_FAIL (old_top != NULL);
+
+  /* To pop we are moving the top of the stack to the old top's parent
+   * node. The stack always needs to have a reference to the top entry
+   * so we must take a reference to the new top. The stack would have
+   * previously had a reference to the old top so we need to decrease
+   * the ref count on that. We need to ref the new head first in case
+   * this stack was the only thing referencing the old top. In that
+   * case the call to _cogl_matrix_entry_unref will unref the parent.
+   */
+
+  /* Find the last save operation and remove it */
+
+  /* XXX: it would be an error to pop to the very beginning of the
+   * stack so we don't need to check for NULL pointer dereferencing. */
+  for (new_top = old_top;
+       new_top->op != COGL_MATRIX_OP_SAVE;
+       new_top = new_top->parent)
+    ;
 
-  state = _cogl_matrix_stack_top_mutable (stack, TRUE);
-  cogl_matrix_multiply (&state->matrix, &state->matrix, matrix);
-  state->is_identity = FALSE;
-  stack->age++;
+  new_top = new_top->parent;
+  _cogl_matrix_entry_ref (new_top);
+
+  _cogl_matrix_entry_unref (old_top);
+
+  stack->last_entry = new_top;
 }
 
-void
-_cogl_matrix_stack_frustum (CoglMatrixStack *stack,
-                            float            left,
-                            float            right,
-                            float            bottom,
-                            float            top,
-                            float            z_near,
-                            float            z_far)
+CoglBool
+_cogl_matrix_stack_get_inverse (CoglMatrixStack *stack,
+                                CoglMatrix *inverse)
 {
-  CoglMatrixState *state;
+  CoglMatrix matrix;
+  CoglMatrix *internal = _cogl_matrix_stack_get (stack, &matrix);
 
-  state = _cogl_matrix_stack_top_mutable (stack, TRUE);
-  cogl_matrix_frustum (&state->matrix,
-                       left, right, bottom, top,
-                       z_near, z_far);
-  state->is_identity = FALSE;
-  stack->age++;
+  if (internal)
+    return cogl_matrix_get_inverse (internal, inverse);
+  else
+    return cogl_matrix_get_inverse (&matrix, inverse);
 }
 
-void
-_cogl_matrix_stack_perspective (CoglMatrixStack *stack,
-                                float            fov_y,
-                                float            aspect,
-                                float            z_near,
-                                float            z_far)
+/* In addition to writing the stack matrix into the give @matrix
+ * argument this function *may* sometimes also return a pointer
+ * to a matrix too so if we are querying the inverse matrix we
+ * should query from the return matrix so that the result can
+ * be cached within the stack. */
+CoglMatrix *
+_cogl_matrix_entry_get (CoglMatrixEntry *entry,
+                        CoglMatrix *matrix)
 {
-  CoglMatrixState *state;
+  int depth;
+  CoglMatrixEntry *current;
+  CoglMatrixEntry **children;
+  int i;
+
+  for (depth = 0, current = entry;
+       current;
+       current = current->parent, depth++)
+    {
+      switch (current->op)
+        {
+        case COGL_MATRIX_OP_LOAD_IDENTITY:
+          cogl_matrix_init_identity (matrix);
+          goto initialized;
+        case COGL_MATRIX_OP_LOAD:
+          {
+            CoglMatrixEntryLoad *load = (CoglMatrixEntryLoad *)current;
+            _cogl_matrix_init_from_matrix_without_inverse (matrix,
+                                                           load->matrix);
+            goto initialized;
+          }
+        case COGL_MATRIX_OP_SAVE:
+          {
+            CoglMatrixEntrySave *save = (CoglMatrixEntrySave *)current;
+            if (!save->cache_valid)
+              {
+                CoglMagazine *matrices_magazine =
+                  _cogl_matrix_stack_matrices_magazine;
+                save->cache = _cogl_magazine_chunk_alloc (matrices_magazine);
+                _cogl_matrix_entry_get (current->parent, save->cache);
+                save->cache_valid = TRUE;
+              }
+            _cogl_matrix_init_from_matrix_without_inverse (matrix, save->cache);
+            goto initialized;
+          }
+        default:
+          continue;
+        }
+    }
 
-  state = _cogl_matrix_stack_top_mutable (stack, TRUE);
-  cogl_matrix_perspective (&state->matrix,
-                           fov_y, aspect, z_near, z_far);
-  state->is_identity = FALSE;
-  stack->age++;
+initialized:
+
+  if (depth == 0)
+    {
+      switch (entry->op)
+        {
+        case COGL_MATRIX_OP_LOAD_IDENTITY:
+        case COGL_MATRIX_OP_TRANSLATE:
+        case COGL_MATRIX_OP_ROTATE:
+        case COGL_MATRIX_OP_SCALE:
+        case COGL_MATRIX_OP_MULTIPLY:
+          return NULL;
+
+        case COGL_MATRIX_OP_LOAD:
+          {
+            CoglMatrixEntryLoad *load = (CoglMatrixEntryLoad *)entry;
+            return load->matrix;
+          }
+        case COGL_MATRIX_OP_SAVE:
+          {
+            CoglMatrixEntrySave *save = (CoglMatrixEntrySave *)entry;
+            return save->cache;
+          }
+        }
+      g_warn_if_reached ();
+      return NULL;
+    }
+
+#ifdef COGL_ENABLE_DEBUG
+  if (!current)
+    {
+      g_warning ("Inconsistent matrix stack");
+      return NULL;
+    }
+#endif
+
+  entry->composite_gets++;
+
+  children = g_alloca (sizeof (CoglMatrixEntry) * depth);
+
+  /* We need walk the list of entries from the init/load/save entry
+   * back towards the leaf node but the nodes don't link to their
+   * children so we need to re-walk them here to add to a separate
+   * array. */
+  for (i = depth - 1, current = entry;
+       i >= 0 && current;
+       i--, current = current->parent)
+    {
+      children[i] = current;
+    }
+
+  if (COGL_DEBUG_ENABLED (COGL_DEBUG_PERFORMANCE) &&
+      entry->composite_gets >= 2)
+    {
+      COGL_NOTE (PERFORMANCE,
+                 "Re-composing a matrix stack entry multiple times");
+    }
+
+  for (i = 0; i < depth; i++)
+    {
+      switch (children[i]->op)
+        {
+        case COGL_MATRIX_OP_TRANSLATE:
+          {
+            CoglMatrixEntryTranslate *translate =
+              (CoglMatrixEntryTranslate *)children[i];
+            cogl_matrix_translate (matrix,
+                                   translate->x,
+                                   translate->y,
+                                   translate->z);
+            continue;
+          }
+        case COGL_MATRIX_OP_ROTATE:
+          {
+            CoglMatrixEntryRotate *rotate=
+              (CoglMatrixEntryRotate *)children[i];
+            cogl_matrix_rotate (matrix,
+                                rotate->angle,
+                                rotate->x,
+                                rotate->y,
+                                rotate->z);
+            continue;
+          }
+        case COGL_MATRIX_OP_SCALE:
+          {
+            CoglMatrixEntryScale *scale =
+              (CoglMatrixEntryScale *)children[i];
+            cogl_matrix_scale (matrix,
+                               scale->x,
+                               scale->y,
+                               scale->z);
+            continue;
+          }
+        case COGL_MATRIX_OP_MULTIPLY:
+          {
+            CoglMatrixEntryMultiply *multiply =
+              (CoglMatrixEntryMultiply *)children[i];
+            cogl_matrix_multiply (matrix, matrix, multiply->matrix);
+            continue;
+          }
+
+        case COGL_MATRIX_OP_LOAD_IDENTITY:
+        case COGL_MATRIX_OP_LOAD:
+        case COGL_MATRIX_OP_SAVE:
+          g_warn_if_reached ();
+          continue;
+        }
+    }
+
+  return NULL;
 }
 
-void
-_cogl_matrix_stack_ortho (CoglMatrixStack *stack,
-                          float            left,
-                          float            right,
-                          float            bottom,
-                          float            top,
-                          float            z_near,
-                          float            z_far)
+/* In addition to writing the stack matrix into the give @matrix
+ * argument this function *may* sometimes also return a pointer
+ * to a matrix too so if we are querying the inverse matrix we
+ * should query from the return matrix so that the result can
+ * be cached within the stack. */
+CoglMatrix *
+_cogl_matrix_stack_get (CoglMatrixStack *stack,
+                        CoglMatrix *matrix)
 {
-  CoglMatrixState *state;
+  return _cogl_matrix_entry_get (stack->last_entry, matrix);
+}
 
-  state = _cogl_matrix_stack_top_mutable (stack, TRUE);
-  cogl_matrix_ortho (&state->matrix,
-                     left, right, bottom, top, z_near, z_far);
-  state->is_identity = FALSE;
-  stack->age++;
+static void
+_cogl_matrix_stack_free (CoglMatrixStack *stack)
+{
+  _cogl_matrix_entry_unref (stack->last_entry);
+  g_slice_free (CoglMatrixStack, stack);
 }
 
-CoglBool
-_cogl_matrix_stack_get_inverse (CoglMatrixStack *stack,
-                                CoglMatrix      *inverse)
+CoglMatrixStack *
+_cogl_matrix_stack_new (void)
 {
-  CoglMatrixState *state;
+  CoglMatrixStack *stack = g_slice_new (CoglMatrixStack);
+
+  if (G_UNLIKELY (_cogl_matrix_stack_magazine == NULL))
+    {
+      _cogl_matrix_stack_magazine =
+        _cogl_magazine_new (sizeof (CoglMatrixEntryFull), 20);
+      _cogl_matrix_stack_matrices_magazine =
+        _cogl_magazine_new (sizeof (CoglMatrix), 20);
+    }
+
+  stack->last_entry = NULL;
 
-  state = _cogl_matrix_stack_top_mutable (stack, TRUE);
+  _cogl_matrix_stack_load_identity (stack);
 
-  return cogl_matrix_get_inverse (&state->matrix, inverse);
+  return _cogl_matrix_stack_object_new (stack);
 }
 
-void
-_cogl_matrix_stack_get (CoglMatrixStack *stack,
-                        CoglMatrix      *matrix)
+static CoglMatrixEntry *
+_cogl_matrix_entry_skip_saves (CoglMatrixEntry *entry)
 {
-  CoglMatrixState *state;
+  /* We currently assume that every stack starts with an
+   * _OP_LOAD_IDENTITY so we don't need to worry about
+   * NULL pointer dereferencing here. */
+  while (entry->op == COGL_MATRIX_OP_SAVE)
+    entry = entry->parent;
 
-  state = _cogl_matrix_stack_top (stack);
+  return entry;
+}
 
-  /* NB: identity matrices are lazily initialized because we can often avoid
-   * initializing them at all if nothing is pushed on top of them since we
-   * load them using glLoadIdentity()
+CoglBool
+_cogl_matrix_entry_calculate_translation (CoglMatrixEntry *entry0,
+                                          CoglMatrixEntry *entry1,
+                                          float *x,
+                                          float *y,
+                                          float *z)
+{
+  GSList *head0 = NULL;
+  GSList *head1 = NULL;
+  CoglMatrixEntry *node0;
+  CoglMatrixEntry *node1;
+  int len0 = 0;
+  int len1 = 0;
+  int count;
+  GSList *common_ancestor0;
+  GSList *common_ancestor1;
+
+  /* Algorithm:
+   *
+   * 1) Ignoring _OP_SAVE entries walk the ancestors of each entry to
+   *    the root node or any non-translation node, adding a pointer to
+   *    each ancestor node to two linked lists.
+   *
+   * 2) Compare the lists to find the nodes where they start to
+   *    differ marking the common_ancestor node for each list.
+   *
+   * 3) For the list corresponding to entry0, start iterating after
+   *    the common ancestor applying the negative of all translations
+   *    to x, y and z.
    *
-   * The Cogl journal typically loads an identiy matrix because it performs
-   * software transformations, which is why we have optimized this case.
+   * 4) For the list corresponding to entry1, start iterating after
+   *    the common ancestor applying the positive of all translations
+   *    to x, y and z.
+   *
+   * If we come across any non-translation operations during 3) or 4)
+   * then bail out returning FALSE.
    */
-  if (state->is_identity)
-    cogl_matrix_init_identity (matrix);
-  else
-    *matrix = state->matrix;
+
+  for (node0 = entry0; node0; node0 = node0->parent)
+    {
+      GSList *link;
+
+      if (node0->op == COGL_MATRIX_OP_SAVE)
+        continue;
+
+      link = alloca (sizeof (GSList));
+      link->next = head0;
+      link->data = node0;
+      head0 = link;
+      len0++;
+
+      if (node0->op != COGL_MATRIX_OP_TRANSLATE)
+        break;
+    }
+  for (node1 = entry1; node1; node1 = node1->parent)
+    {
+      GSList *link;
+
+      if (node1->op == COGL_MATRIX_OP_SAVE)
+        continue;
+
+      link = alloca (sizeof (GSList));
+      link->next = head1;
+      link->data = node1;
+      head1 = link;
+      len1++;
+
+      if (node1->op != COGL_MATRIX_OP_TRANSLATE)
+        break;
+    }
+
+  if (head0->data != head1->data)
+    return FALSE;
+
+  common_ancestor0 = head0;
+  common_ancestor1 = head1;
+  head0 = head0->next;
+  head1 = head1->next;
+  count = MIN (len0, len1) - 1;
+  while (count--)
+    {
+      if (head0->data != head1->data)
+        break;
+      common_ancestor0 = head0;
+      common_ancestor1 = head1;
+      head0 = head0->next;
+      head1 = head1->next;
+    }
+
+  *x = 0;
+  *y = 0;
+  *z = 0;
+
+  for (head0 = common_ancestor0->next; head0; head0 = head0->next)
+    {
+      CoglMatrixEntryTranslate *translate;
+
+      node0 = head0->data;
+
+      if (node0->op != COGL_MATRIX_OP_TRANSLATE)
+        return FALSE;
+
+      translate = (CoglMatrixEntryTranslate *)node0;
+
+      *x = *x - translate->x;
+      *y = *y - translate->y;
+      *z = *z - translate->z;
+    }
+  for (head1 = common_ancestor1->next; head1; head1 = head1->next)
+    {
+      CoglMatrixEntryTranslate *translate;
+
+      node1 = head1->data;
+
+      if (node1->op != COGL_MATRIX_OP_TRANSLATE)
+        return FALSE;
+
+      translate = (CoglMatrixEntryTranslate *)node1;
+
+      *x = *x + translate->x;
+      *y = *y + translate->y;
+      *z = *z + translate->z;
+    }
+
+  return TRUE;
 }
 
-void
-_cogl_matrix_stack_set (CoglMatrixStack  *stack,
-                        const CoglMatrix *matrix)
+CoglBool
+_cogl_matrix_entry_has_identity_flag (CoglMatrixEntry *entry)
 {
-  CoglMatrixState *state;
-
-  state = _cogl_matrix_stack_top_mutable (stack, FALSE);
-  state->matrix = *matrix;
-  state->is_identity = FALSE;
-  stack->age++;
+  return entry ? entry->op == COGL_MATRIX_OP_LOAD_IDENTITY : FALSE;
 }
 
 static void
-_cogl_matrix_stack_flush_matrix_to_gl_builtin (CoglContext *ctx,
-                                               CoglBool is_identity,
-                                               CoglMatrix *matrix,
-                                               CoglMatrixMode mode)
+_cogl_matrix_flush_to_gl_builtin (CoglContext *ctx,
+                                  CoglBool is_identity,
+                                  CoglMatrix *matrix,
+                                  CoglMatrixMode mode)
 {
   g_assert (ctx->driver == COGL_DRIVER_GL ||
             ctx->driver == COGL_DRIVER_GLES1);
@@ -406,9 +739,10 @@ _cogl_matrix_stack_flush_matrix_to_gl_builtin (CoglContext *ctx,
 }
 
 void
-_cogl_matrix_stack_flush_to_gl_builtins (CoglContext *ctx,
-                                         CoglMatrixStack *stack,
+_cogl_matrix_entry_flush_to_gl_builtins (CoglContext *ctx,
+                                         CoglMatrixEntry *entry,
                                          CoglMatrixMode mode,
+                                         CoglFramebuffer *framebuffer,
                                          CoglBool disable_flip)
 {
   g_assert (ctx->driver == COGL_DRIVER_GL ||
@@ -417,10 +751,7 @@ _cogl_matrix_stack_flush_to_gl_builtins (CoglContext *ctx,
 #if defined (HAVE_COGL_GL) || defined (HAVE_COGL_GLES)
   {
     CoglBool needs_flip;
-    CoglMatrixState *state;
-    CoglMatrixStackCache *cache;
-
-    state = _cogl_matrix_stack_top (stack);
+    CoglMatrixEntryCache *cache;
 
     if (mode == COGL_MATRIX_PROJECTION)
       {
@@ -433,7 +764,7 @@ _cogl_matrix_stack_flush_to_gl_builtins (CoglContext *ctx,
         if (disable_flip)
           needs_flip = FALSE;
         else
-          needs_flip = cogl_is_offscreen (ctx->current_draw_buffer);
+          needs_flip = cogl_is_offscreen (framebuffer);
 
         cache = &ctx->builtin_flushed_projection;
       }
@@ -449,9 +780,18 @@ _cogl_matrix_stack_flush_to_gl_builtins (CoglContext *ctx,
 
     /* We don't need to do anything if the state is the same */
     if (!cache ||
-        _cogl_matrix_stack_check_and_update_cache (stack, cache, needs_flip))
+        _cogl_matrix_entry_cache_maybe_update (cache, entry, needs_flip))
       {
-        CoglBool is_identity = state->is_identity && !needs_flip;
+        CoglBool is_identity;
+        CoglMatrix matrix;
+
+        if (entry->op == COGL_MATRIX_OP_LOAD_IDENTITY)
+          is_identity = TRUE;
+        else
+          {
+            is_identity = FALSE;
+            _cogl_matrix_entry_get (entry, &matrix);
+          }
 
         if (needs_flip)
           {
@@ -459,98 +799,242 @@ _cogl_matrix_stack_flush_to_gl_builtins (CoglContext *ctx,
 
             cogl_matrix_multiply (&flipped_matrix,
                                   &ctx->y_flip_matrix,
-                                  state->is_identity ?
+                                  is_identity ?
                                   &ctx->identity_matrix :
-                                  &state->matrix);
+                                  &matrix);
 
-            _cogl_matrix_stack_flush_matrix_to_gl_builtin (ctx,
-                                                           /* not identity */
-                                                           FALSE,
-                                                           &flipped_matrix,
-                                                           mode);
+            _cogl_matrix_flush_to_gl_builtin (ctx,
+                                              /* not identity */
+                                              FALSE,
+                                              &flipped_matrix,
+                                              mode);
           }
         else
-          _cogl_matrix_stack_flush_matrix_to_gl_builtin (ctx,
-                                                         is_identity,
-                                                         &state->matrix,
-                                                         mode);
+          {
+            _cogl_matrix_flush_to_gl_builtin (ctx,
+                                              is_identity,
+                                              &matrix,
+                                              mode);
+          }
       }
   }
 #endif
 }
 
-unsigned int
-_cogl_matrix_stack_get_age (CoglMatrixStack *stack)
-{
-  return stack->age;
-}
-
 CoglBool
-_cogl_matrix_stack_has_identity_flag (CoglMatrixStack *stack)
+_cogl_matrix_entry_fast_equal (CoglMatrixEntry *entry0,
+                               CoglMatrixEntry *entry1)
 {
-  return _cogl_matrix_stack_top (stack)->is_identity;
+  return entry0 == entry1;
 }
 
 CoglBool
-_cogl_matrix_stack_equal (CoglMatrixStack *stack0,
-                          CoglMatrixStack *stack1)
+_cogl_matrix_entry_equal (CoglMatrixEntry *entry0,
+                          CoglMatrixEntry *entry1)
 {
-  CoglMatrixState *state0 = _cogl_matrix_stack_top (stack0);
-  CoglMatrixState *state1 = _cogl_matrix_stack_top (stack1);
+  for (;
+       entry0 && entry1;
+       entry0 = entry0->parent, entry1 = entry1->parent)
+    {
+      entry0 = _cogl_matrix_entry_skip_saves (entry0);
+      entry1 = _cogl_matrix_entry_skip_saves (entry1);
 
-  if (state0->is_identity != state1->is_identity)
-    return FALSE;
+      if (entry0 == entry1)
+        return TRUE;
 
-  if (state0->is_identity)
-    return TRUE;
-  else
-    return cogl_matrix_equal (&state0->matrix, &state1->matrix);
+      if (entry0->op != entry1->op)
+        return FALSE;
+
+      switch (entry0->op)
+        {
+        case COGL_MATRIX_OP_LOAD_IDENTITY:
+          return TRUE;
+        case COGL_MATRIX_OP_TRANSLATE:
+          {
+            CoglMatrixEntryTranslate *translate0 =
+              (CoglMatrixEntryTranslate *)entry0;
+            CoglMatrixEntryTranslate *translate1 =
+              (CoglMatrixEntryTranslate *)entry1;
+            /* We could perhaps use an epsilon to compare here?
+             * I expect the false negatives are probaly never going to
+             * be a problem and this is a bit cheaper. */
+            if (translate0->x != translate1->x ||
+                translate0->y != translate1->y ||
+                translate0->z != translate1->z)
+              return FALSE;
+          }
+        case COGL_MATRIX_OP_ROTATE:
+          {
+            CoglMatrixEntryRotate *rotate0 =
+              (CoglMatrixEntryRotate *)entry0;
+            CoglMatrixEntryRotate *rotate1 =
+              (CoglMatrixEntryRotate *)entry1;
+            if (rotate0->angle != rotate1->angle ||
+                rotate0->x != rotate1->x ||
+                rotate0->y != rotate1->y ||
+                rotate0->z != rotate1->z)
+              return FALSE;
+          }
+        case COGL_MATRIX_OP_SCALE:
+          {
+            CoglMatrixEntryScale *scale0 = (CoglMatrixEntryScale *)entry0;
+            CoglMatrixEntryScale *scale1 = (CoglMatrixEntryScale *)entry1;
+            if (scale0->x != scale1->x ||
+                scale0->y != scale1->y ||
+                scale0->z != scale1->z)
+              return FALSE;
+          }
+        case COGL_MATRIX_OP_MULTIPLY:
+          {
+            CoglMatrixEntryMultiply *mult0 = (CoglMatrixEntryMultiply *)entry0;
+            CoglMatrixEntryMultiply *mult1 = (CoglMatrixEntryMultiply *)entry1;
+            if (!cogl_matrix_equal (mult0->matrix, mult1->matrix))
+              return FALSE;
+          }
+        case COGL_MATRIX_OP_LOAD:
+          {
+            CoglMatrixEntryLoad *load0 = (CoglMatrixEntryLoad *)entry0;
+            CoglMatrixEntryLoad *load1 = (CoglMatrixEntryLoad *)entry1;
+            /* There's no need to check any further since an
+             * _OP_LOAD makes all the ancestors redundant as far as
+             * the final matrix value is concerned. */
+            return cogl_matrix_equal (load0->matrix, load1->matrix);
+          }
+        case COGL_MATRIX_OP_SAVE:
+          /* We skip over saves above so we shouldn't see save entries */
+          g_warn_if_reached ();
+        }
+    }
+
+  return FALSE;
 }
 
-CoglBool
-_cogl_matrix_stack_check_and_update_cache (CoglMatrixStack *stack,
-                                           CoglMatrixStackCache *cache,
-                                           CoglBool flip)
+void
+_cogl_matrix_entry_print (CoglMatrixEntry *entry)
 {
-  CoglBool is_identity =
-    _cogl_matrix_stack_has_identity_flag (stack) && !flip;
-  CoglBool is_dirty;
-
-  if (is_identity && cache->flushed_identity)
-    is_dirty = FALSE;
-  else if (cache->stack == NULL ||
-           cache->stack->age != cache->age ||
-           flip != cache->flipped)
-    is_dirty = TRUE;
-  else
-    is_dirty = (cache->stack != stack &&
-                !_cogl_matrix_stack_equal (cache->stack, stack));
-
-  /* We'll update the cache values even if the stack isn't dirty in
-     case the reason it wasn't dirty is because we compared the
-     matrices and found them to be the same. In that case updating the
-     cache values will avoid the comparison next time */
-  cache->age = stack->age;
-  cogl_object_ref (stack);
-  if (cache->stack)
-    cogl_object_unref (cache->stack);
-  cache->stack = stack;
-  cache->flushed_identity = is_identity;
-  cache->flipped = flip;
-
-  return is_dirty;
+  int depth;
+  CoglMatrixEntry *e;
+  CoglMatrixEntry **children;
+  int i;
+
+  for (depth = 0, e = entry; e; e = e->parent)
+    depth++;
+
+  children = g_alloca (sizeof (CoglMatrixEntry) * depth);
+
+  for (i = depth - 1, e = entry;
+       i >= 0 && e;
+       i--, e = e->parent)
+    {
+      children[i] = e;
+    }
+
+  g_print ("MatrixEntry %p =\n", entry);
+
+  for (i = 0; i < depth; i++)
+    {
+      entry = children[i];
+
+      switch (entry->op)
+        {
+        case COGL_MATRIX_OP_LOAD_IDENTITY:
+          g_print ("  LOAD IDENTITY\n");
+          continue;
+        case COGL_MATRIX_OP_TRANSLATE:
+          {
+            CoglMatrixEntryTranslate *translate =
+              (CoglMatrixEntryTranslate *)entry;
+            g_print ("  TRANSLATE X=%f Y=%f Z=%f\n",
+                     translate->x,
+                     translate->y,
+                     translate->z);
+            continue;
+          }
+        case COGL_MATRIX_OP_ROTATE:
+          {
+            CoglMatrixEntryRotate *rotate =
+              (CoglMatrixEntryRotate *)entry;
+            g_print ("  ROTATE ANGLE=%f X=%f Y=%f Z=%f\n",
+                     rotate->angle,
+                     rotate->x,
+                     rotate->y,
+                     rotate->z);
+            continue;
+          }
+        case COGL_MATRIX_OP_SCALE:
+          {
+            CoglMatrixEntryScale *scale = (CoglMatrixEntryScale *)entry;
+            g_print ("  SCALE X=%f Y=%f Z=%f\n",
+                     scale->x,
+                     scale->y,
+                     scale->z);
+            continue;
+          }
+        case COGL_MATRIX_OP_MULTIPLY:
+          {
+            CoglMatrixEntryMultiply *mult = (CoglMatrixEntryMultiply *)entry;
+            g_print ("  MULT:\n");
+            _cogl_matrix_prefix_print ("    ", mult->matrix);
+            continue;
+          }
+        case COGL_MATRIX_OP_LOAD:
+          {
+            CoglMatrixEntryLoad *load = (CoglMatrixEntryLoad *)entry;
+            g_print ("  LOAD:\n");
+            _cogl_matrix_prefix_print ("    ", load->matrix);
+            continue;
+          }
+        case COGL_MATRIX_OP_SAVE:
+          g_print ("  SAVE\n");
+        }
+    }
 }
 
 void
-_cogl_matrix_stack_init_cache (CoglMatrixStackCache *cache)
+_cogl_matrix_entry_cache_init (CoglMatrixEntryCache *cache)
 {
-  cache->stack = NULL;
+  cache->entry = NULL;
   cache->flushed_identity = FALSE;
 }
 
+/* NB: This function can report false negatives since it never does a
+ * deep comparison of the stack matrices. */
+CoglBool
+_cogl_matrix_entry_cache_maybe_update (CoglMatrixEntryCache *cache,
+                                       CoglMatrixEntry *entry,
+                                       CoglBool flip)
+{
+  CoglBool is_identity;
+  CoglBool updated = FALSE;
+
+  if (cache->flipped != flip)
+    {
+      cache->flipped = flip;
+      updated = TRUE;
+    }
+
+  is_identity = (entry->op == COGL_MATRIX_OP_LOAD_IDENTITY);
+  if (cache->flushed_identity != is_identity)
+    {
+      cache->flushed_identity = is_identity;
+      updated = TRUE;
+    }
+
+  if (cache->entry != entry)
+    {
+      _cogl_matrix_entry_ref (entry);
+      if (cache->entry)
+        _cogl_matrix_entry_unref (cache->entry);
+      cache->entry = entry;
+      updated = TRUE;
+    }
+
+  return updated;
+}
+
 void
-_cogl_matrix_stack_destroy_cache (CoglMatrixStackCache *cache)
+_cogl_matrix_entry_cache_destroy (CoglMatrixEntryCache *cache)
 {
-  if (cache->stack)
-    cogl_object_unref (cache->stack);
+  if (cache->entry)
+    _cogl_matrix_entry_unref (cache->entry);
 }
diff --git a/cogl/cogl-matrix-stack.h b/cogl/cogl-matrix-stack.h
index 49cd03e..db957d6 100644
--- a/cogl/cogl-matrix-stack.h
+++ b/cogl/cogl-matrix-stack.h
@@ -3,7 +3,7 @@
  *
  * An object oriented GL/GLES Abstraction/Utility Layer
  *
- * Copyright (C) 2009,2010 Intel Corporation.
+ * Copyright (C) 2009,2010,2012 Intel Corporation.
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
@@ -29,29 +29,114 @@
 #ifndef __COGL_MATRIX_STACK_H
 #define __COGL_MATRIX_STACK_H
 
+#include "cogl-object-private.h"
 #include "cogl-matrix.h"
 #include "cogl-context.h"
+#include "cogl-framebuffer.h"
 
-typedef struct _CoglMatrixStack CoglMatrixStack;
+typedef enum _CoglMatrixOp
+{
+  COGL_MATRIX_OP_LOAD_IDENTITY,
+  COGL_MATRIX_OP_TRANSLATE,
+  COGL_MATRIX_OP_ROTATE,
+  COGL_MATRIX_OP_SCALE,
+  COGL_MATRIX_OP_MULTIPLY,
+  COGL_MATRIX_OP_LOAD,
+  COGL_MATRIX_OP_SAVE,
+} CoglMatrixOp;
+
+typedef struct _CoglMatrixEntry CoglMatrixEntry;
 
-typedef struct
+struct _CoglMatrixEntry
 {
-  CoglMatrixStack *stack;
-  unsigned int age;
-  CoglBool flushed_identity;
-  CoglBool flipped;
-} CoglMatrixStackCache;
+  CoglMatrixOp op;
+  CoglMatrixEntry *parent;
+  unsigned int ref_count;
 
-typedef enum {
-  COGL_MATRIX_MODELVIEW,
-  COGL_MATRIX_PROJECTION,
-  COGL_MATRIX_TEXTURE
-} CoglMatrixMode;
+  /* used for performance tracing */
+  int composite_gets;
+};
 
-typedef void (* CoglMatrixStackFlushFunc) (CoglContext *context,
-                                           CoglBool is_identity,
-                                           const CoglMatrix *matrix,
-                                           void *user_data);
+typedef struct _CoglMatrixEntryTranslate
+{
+  CoglMatrixEntry _parent_data;
+
+  float x;
+  float y;
+  float z;
+
+} CoglMatrixEntryTranslate;
+
+typedef struct _CoglMatrixEntryRotate
+{
+  CoglMatrixEntry _parent_data;
+
+  float angle;
+  float x;
+  float y;
+  float z;
+
+} CoglMatrixEntryRotate;
+
+typedef struct _CoglMatrixEntryScale
+{
+  CoglMatrixEntry _parent_data;
+
+  float x;
+  float y;
+  float z;
+
+} CoglMatrixEntryScale;
+
+typedef struct _CoglMatrixEntryMultiply
+{
+  CoglMatrixEntry _parent_data;
+
+  CoglMatrix *matrix;
+
+} CoglMatrixEntryMultiply;
+
+typedef struct _CoglMatrixEntryLoad
+{
+  CoglMatrixEntry _parent_data;
+
+  CoglMatrix *matrix;
+
+} CoglMatrixEntryLoad;
+
+typedef struct _CoglMatrixEntrySave
+{
+  CoglMatrixEntry _parent_data;
+
+  CoglBool cache_valid;
+  CoglMatrix *cache;
+
+} CoglMatrixEntrySave;
+
+typedef union _CoglMatrixEntryFull
+{
+  CoglMatrixEntry any;
+  CoglMatrixEntryTranslate translate;
+  CoglMatrixEntryRotate rotae;
+  CoglMatrixEntryScale scale;
+  CoglMatrixEntryMultiply multiply;
+  CoglMatrixEntryLoad load;
+  CoglMatrixEntrySave save;
+} CoglMatrixEntryFull;
+
+typedef struct _CoglMatrixStack
+{
+  CoglObject _parent;
+
+  CoglMatrixEntry *last_entry;
+} CoglMatrixStack;
+
+typedef struct _CoglMatrixEntryCache
+{
+  CoglMatrixEntry *entry;
+  CoglBool flushed_identity;
+  CoglBool flipped;
+} CoglMatrixEntryCache;
 
 CoglMatrixStack *
 _cogl_matrix_stack_new (void);
@@ -63,6 +148,9 @@ void
 _cogl_matrix_stack_pop (CoglMatrixStack *stack);
 
 void
+_cogl_matrix_entry_identity_init (CoglMatrixEntry *entry);
+
+void
 _cogl_matrix_stack_load_identity (CoglMatrixStack *stack);
 
 void
@@ -99,53 +187,89 @@ _cogl_matrix_stack_perspective (CoglMatrixStack *stack,
                                 float z_near,
                                 float z_far);
 void
-_cogl_matrix_stack_ortho (CoglMatrixStack *stack,
-                          float left,
-                          float right,
-                          float bottom,
-                          float top,
-                          float z_near,
-                          float z_far);
+_cogl_matrix_stack_orthographic (CoglMatrixStack *stack,
+                                 float x_1,
+                                 float y_1,
+                                 float x_2,
+                                 float y_2,
+                                 float near,
+                                 float far);
+
 CoglBool
 _cogl_matrix_stack_get_inverse (CoglMatrixStack *stack,
                                 CoglMatrix *inverse);
-void
+
+/* NB: This function only *sometimes* returns a pointer to a matrix if
+ * the matrix returned didn't need to be composed of multiple
+ * operations */
+CoglMatrix *
 _cogl_matrix_stack_get (CoglMatrixStack *stack,
                         CoglMatrix *matrix);
+
+/* NB: This function only *sometimes* returns a pointer to a matrix if
+ * the matrix returned didn't need to be composed of multiple
+ * operations */
+CoglMatrix *
+_cogl_matrix_entry_get (CoglMatrixEntry *entry,
+                        CoglMatrix *matrix);
+
 void
 _cogl_matrix_stack_set (CoglMatrixStack *stack,
                         const CoglMatrix *matrix);
 
-void
-_cogl_matrix_stack_flush_to_gl_builtins (CoglContext *ctx,
-                                         CoglMatrixStack *stack,
-                                         CoglMatrixMode mode,
-                                         CoglBool disable_flip);
+CoglBool
+_cogl_matrix_entry_calculate_translation (CoglMatrixEntry *entry0,
+                                          CoglMatrixEntry *entry1,
+                                          float *x,
+                                          float *y,
+                                          float *z);
 
-unsigned int
-_cogl_matrix_stack_get_age (CoglMatrixStack *stack);
+/* If this returns TRUE then the entry is definitely the identity
+ * matrix. If it returns FALSE it may or may not be the identity
+ * matrix but no expensive comparison is performed to verify it. */
+CoglBool
+_cogl_matrix_entry_has_identity_flag (CoglMatrixEntry *entry);
 
-/* If this returns TRUE then the top of the matrix is definitely the
-   identity matrix. If it returns FALSE it may or may not be the
-   identity matrix but no expensive comparison is performed to verify
-   it. */
 CoglBool
-_cogl_matrix_stack_has_identity_flag (CoglMatrixStack *stack);
+_cogl_matrix_entry_fast_equal (CoglMatrixEntry *entry0,
+                               CoglMatrixEntry *entry1);
 
 CoglBool
-_cogl_matrix_stack_equal (CoglMatrixStack *stack0,
-                          CoglMatrixStack *stack1);
+_cogl_matrix_entry_equal (CoglMatrixEntry *entry0,
+                          CoglMatrixEntry *entry1);
+
+void
+_cogl_matrix_entry_print (CoglMatrixEntry *entry);
+
+CoglMatrixEntry *
+_cogl_matrix_entry_ref (CoglMatrixEntry *entry);
+
+void
+_cogl_matrix_entry_unref (CoglMatrixEntry *entry);
+
+typedef enum {
+  COGL_MATRIX_MODELVIEW,
+  COGL_MATRIX_PROJECTION,
+  COGL_MATRIX_TEXTURE
+} CoglMatrixMode;
+
+void
+_cogl_matrix_entry_flush_to_gl_builtins (CoglContext *ctx,
+                                         CoglMatrixEntry *entry,
+                                         CoglMatrixMode mode,
+                                         CoglFramebuffer *framebuffer,
+                                         CoglBool disable_flip);
 
 void
-_cogl_matrix_stack_init_cache (CoglMatrixStackCache *cache);
+_cogl_matrix_entry_cache_init (CoglMatrixEntryCache *cache);
 
 CoglBool
-_cogl_matrix_stack_check_and_update_cache (CoglMatrixStack *stack,
-                                           CoglMatrixStackCache *cache,
-                                           CoglBool flip);
+_cogl_matrix_entry_cache_maybe_update (CoglMatrixEntryCache *cache,
+                                       CoglMatrixEntry *entry,
+                                       CoglBool flip);
 
 void
-_cogl_matrix_stack_destroy_cache (CoglMatrixStackCache *cache);
+_cogl_matrix_entry_cache_destroy (CoglMatrixEntryCache *cache);
 
 CoglBool
 _cogl_is_matrix_stack (void *object);
diff --git a/cogl/cogl-matrix.c b/cogl/cogl-matrix.c
index b1574eb..29db3f8 100644
--- a/cogl/cogl-matrix.c
+++ b/cogl/cogl-matrix.c
@@ -360,40 +360,47 @@ _cogl_matrix_multiply_array (CoglMatrix *result, const float *array)
  * Called by _cogl_matrix_print() to print a matrix or its inverse.
  */
 static void
-print_matrix_floats (const float m[16])
+print_matrix_floats (const char *prefix, const float m[16])
 {
   int i;
   for (i = 0;i < 4; i++)
-    g_print ("\t%f %f %f %f\n", m[i], m[4+i], m[8+i], m[12+i] );
+    g_print ("%s\t%f %f %f %f\n", prefix, m[i], m[4+i], m[8+i], m[12+i] );
 }
 
-/*
- * Dumps the contents of a CoglMatrix structure.
- */
 void
-_cogl_matrix_print (const CoglMatrix *matrix)
+_cogl_matrix_prefix_print (const char *prefix, const CoglMatrix *matrix)
 {
   if (!(matrix->flags & MAT_DIRTY_TYPE))
     {
       _COGL_RETURN_IF_FAIL (matrix->type < COGL_MATRIX_N_TYPES);
-      g_print ("Matrix type: %s, flags: %x\n",
-               types[matrix->type], (int)matrix->flags);
+      g_print ("%sMatrix type: %s, flags: %x\n",
+               prefix, types[matrix->type], (int)matrix->flags);
     }
   else
-    g_print ("Matrix type: DIRTY, flags: %x\n", (int)matrix->flags);
+    g_print ("%sMatrix type: DIRTY, flags: %x\n",
+             prefix, (int)matrix->flags);
 
-  print_matrix_floats ((float *)matrix);
-  g_print ("Inverse: \n");
+  print_matrix_floats (prefix, (float *)matrix);
+  g_print ("%sInverse: \n", prefix);
   if (!(matrix->flags & MAT_DIRTY_INVERSE))
     {
       float prod[16];
-      print_matrix_floats (matrix->inv);
+      print_matrix_floats (prefix, matrix->inv);
       matrix_multiply4x4 (prod, (float *)matrix, matrix->inv);
-      g_print ("Mat * Inverse:\n");
-      print_matrix_floats (prod);
+      g_print ("%sMat * Inverse:\n", prefix);
+      print_matrix_floats (prefix, prod);
     }
   else
-    g_print ("  - not available\n");
+    g_print ("%s  - not available\n", prefix);
+}
+
+/*
+ * Dumps the contents of a CoglMatrix structure.
+ */
+void
+_cogl_matrix_print (const CoglMatrix *matrix)
+{
+  _cogl_matrix_prefix_print ("", matrix);
 }
 
 /*
@@ -1659,6 +1666,15 @@ cogl_matrix_init_from_array (CoglMatrix *matrix, const float *array)
   _COGL_MATRIX_DEBUG_PRINT (matrix);
 }
 
+void
+_cogl_matrix_init_from_matrix_without_inverse (CoglMatrix *matrix,
+                                               const CoglMatrix *src)
+{
+  memcpy (matrix, src, 16 * sizeof (float));
+  matrix->type = src->type;
+  matrix->flags = src->flags | MAT_DIRTY_INVERSE;
+}
+
 static void
 _cogl_matrix_init_from_quaternion (CoglMatrix *matrix,
                                    CoglQuaternion *quaternion)
diff --git a/cogl/cogl-pipeline-opengl-private.h b/cogl/cogl-pipeline-opengl-private.h
index 5742b4a..c9f56fa 100644
--- a/cogl/cogl-pipeline-opengl-private.h
+++ b/cogl/cogl-pipeline-opengl-private.h
@@ -143,6 +143,7 @@ _cogl_delete_gl_texture (GLuint gl_texture);
 
 void
 _cogl_pipeline_flush_gl_state (CoglPipeline *pipeline,
+                               CoglFramebuffer *framebuffer,
                                CoglBool skip_gl_state,
                                int n_tex_coord_attribs);
 
diff --git a/cogl/cogl-pipeline-opengl.c b/cogl/cogl-pipeline-opengl.c
index d947a53..5341803 100644
--- a/cogl/cogl-pipeline-opengl.c
+++ b/cogl/cogl-pipeline-opengl.c
@@ -1056,8 +1056,6 @@ fragend_add_layer_cb (CoglPipelineLayer *layer,
   CoglPipeline *pipeline = state->pipeline;
   int unit_index = _cogl_pipeline_layer_get_unit_index (layer);
 
-  _COGL_GET_CONTEXT (ctx, FALSE);
-
   /* Either generate per layer code snippets or setup the
    * fixed function glTexEnv for each layer... */
   if (G_LIKELY (fragend->add_layer (pipeline,
@@ -1075,6 +1073,7 @@ fragend_add_layer_cb (CoglPipelineLayer *layer,
 
 typedef struct
 {
+  CoglFramebuffer *framebuffer;
   const CoglPipelineVertend *vertend;
   CoglPipeline *pipeline;
   unsigned long *layer_differences;
@@ -1092,13 +1091,12 @@ vertend_add_layer_cb (CoglPipelineLayer *layer,
   CoglPipeline *pipeline = state->pipeline;
   int unit_index = _cogl_pipeline_layer_get_unit_index (layer);
 
-  _COGL_GET_CONTEXT (ctx, FALSE);
-
   /* Either enerate per layer code snippets or setup the
    * fixed function matrix uniforms for each layer... */
   if (G_LIKELY (vertend->add_layer (pipeline,
                                     layer,
-                                    state->layer_differences[unit_index])))
+                                    state->layer_differences[unit_index],
+                                    state->framebuffer)))
     state->added_layer = TRUE;
   else
     {
@@ -1161,6 +1159,7 @@ vertend_add_layer_cb (CoglPipelineLayer *layer,
  */
 void
 _cogl_pipeline_flush_gl_state (CoglPipeline *pipeline,
+                               CoglFramebuffer *framebuffer,
                                CoglBool skip_gl_color,
                                int n_tex_coord_attribs)
 {
@@ -1330,6 +1329,7 @@ _cogl_pipeline_flush_gl_state (CoglPipeline *pipeline,
                                        n_tex_coord_attribs)))
         continue;
 
+      state.framebuffer = framebuffer;
       state.vertend = vertend;
       state.pipeline = pipeline;
       state.layer_differences = layer_differences;
@@ -1407,7 +1407,7 @@ done:
      matrices */
   for (i = 0; i < COGL_PIPELINE_N_PROGENDS; i++)
     if (_cogl_pipeline_progends[i]->pre_paint)
-      _cogl_pipeline_progends[i]->pre_paint (pipeline);
+      _cogl_pipeline_progends[i]->pre_paint (pipeline, framebuffer);
 
   /* Handle the fact that OpenGL associates texture filter and wrap
    * modes with the texture objects not the texture units... */
diff --git a/cogl/cogl-pipeline-private.h b/cogl/cogl-pipeline-private.h
index 0d2c9be..71b4c4e 100644
--- a/cogl/cogl-pipeline-private.h
+++ b/cogl/cogl-pipeline-private.h
@@ -39,6 +39,7 @@
 #include "cogl-boxed-value.h"
 #include "cogl-pipeline-snippet-private.h"
 #include "cogl-pipeline-state.h"
+#include "cogl-framebuffer.h"
 
 #include <glib.h>
 
@@ -576,7 +577,8 @@ typedef struct _CoglPipelineVertend
                      int n_tex_coord_attribs);
   CoglBool (*add_layer) (CoglPipeline *pipeline,
                          CoglPipelineLayer *layer,
-                         unsigned long layers_difference);
+                         unsigned long layers_difference,
+                         CoglFramebuffer *framebuffer);
   CoglBool (*end) (CoglPipeline *pipeline,
                    unsigned long pipelines_difference);
 
@@ -602,7 +604,7 @@ typedef struct
   /* This is called after all of the other functions whenever the
      pipeline is flushed, even if the pipeline hasn't changed since
      the last flush */
-  void (* pre_paint) (CoglPipeline *pipeline);
+  void (* pre_paint) (CoglPipeline *pipeline, CoglFramebuffer *framebuffer);
 } CoglPipelineProgend;
 
 typedef enum
diff --git a/cogl/cogl-pipeline-progend-fixed.c b/cogl/cogl-pipeline-progend-fixed.c
index d715439..c1051fa 100644
--- a/cogl/cogl-pipeline-progend-fixed.c
+++ b/cogl/cogl-pipeline-progend-fixed.c
@@ -37,24 +37,28 @@
 
 #include "cogl-context.h"
 #include "cogl-context-private.h"
+#include "cogl-framebuffer-private.h"
 
 static void
-_cogl_pipeline_progend_fixed_pre_paint (CoglPipeline *pipeline)
+_cogl_pipeline_progend_fixed_pre_paint (CoglPipeline *pipeline,
+                                        CoglFramebuffer *framebuffer)
 {
-  _COGL_GET_CONTEXT (ctx, NO_RETVAL);
+  CoglContext *ctx = framebuffer->context;
 
   if (pipeline->vertend != COGL_PIPELINE_VERTEND_FIXED)
     return;
 
-  if (ctx->current_projection_stack)
-    _cogl_matrix_stack_flush_to_gl_builtins (ctx,
-                                             ctx->current_projection_stack,
+  if (ctx->current_projection_entry)
+    _cogl_matrix_entry_flush_to_gl_builtins (ctx,
+                                             ctx->current_projection_entry,
                                              COGL_MATRIX_PROJECTION,
+                                             framebuffer,
                                              FALSE /* enable flip */);
-  if (ctx->current_modelview_stack)
-    _cogl_matrix_stack_flush_to_gl_builtins (ctx,
-                                             ctx->current_modelview_stack,
+  if (ctx->current_modelview_entry)
+    _cogl_matrix_entry_flush_to_gl_builtins (ctx,
+                                             ctx->current_modelview_entry,
                                              COGL_MATRIX_MODELVIEW,
+                                             framebuffer,
                                              FALSE /* enable flip */);
 }
 
diff --git a/cogl/cogl-pipeline-progend-glsl.c b/cogl/cogl-pipeline-progend-glsl.c
index cb4e333..2a23857 100644
--- a/cogl/cogl-pipeline-progend-glsl.c
+++ b/cogl/cogl-pipeline-progend-glsl.c
@@ -118,8 +118,8 @@ typedef struct
   GLint projection_uniform;
   GLint mvp_uniform;
 
-  CoglMatrixStackCache projection_cache;
-  CoglMatrixStackCache modelview_cache;
+  CoglMatrixEntryCache projection_cache;
+  CoglMatrixEntryCache modelview_cache;
 #endif
 
   /* We need to track the last pipeline that the program was used with
@@ -226,10 +226,10 @@ clear_attribute_cache (CoglPipelineProgramState *program_state)
 static void
 clear_flushed_matrix_stacks (CoglPipelineProgramState *program_state)
 {
-  _cogl_matrix_stack_destroy_cache (&program_state->projection_cache);
-  _cogl_matrix_stack_init_cache (&program_state->projection_cache);
-  _cogl_matrix_stack_destroy_cache (&program_state->modelview_cache);
-  _cogl_matrix_stack_init_cache (&program_state->modelview_cache);
+  _cogl_matrix_entry_cache_destroy (&program_state->projection_cache);
+  _cogl_matrix_entry_cache_init (&program_state->projection_cache);
+  _cogl_matrix_entry_cache_destroy (&program_state->modelview_cache);
+  _cogl_matrix_entry_cache_init (&program_state->modelview_cache);
 }
 
 #endif /* HAVE_COGL_GLES2 */
@@ -247,8 +247,8 @@ program_state_new (int n_layers)
   program_state->uniform_locations = NULL;
   program_state->attribute_locations = NULL;
 #ifdef HAVE_COGL_GLES2
-  _cogl_matrix_stack_init_cache (&program_state->modelview_cache);
-  _cogl_matrix_stack_init_cache (&program_state->projection_cache);
+  _cogl_matrix_entry_cache_init (&program_state->modelview_cache);
+  _cogl_matrix_entry_cache_init (&program_state->projection_cache);
 #endif
 
   return program_state;
@@ -276,8 +276,8 @@ destroy_program_state (void *user_data,
 #ifdef HAVE_COGL_GLES2
       if (ctx->driver == COGL_DRIVER_GLES2)
         {
-          _cogl_matrix_stack_destroy_cache (&program_state->projection_cache);
-          _cogl_matrix_stack_destroy_cache (&program_state->modelview_cache);
+          _cogl_matrix_entry_cache_destroy (&program_state->projection_cache);
+          _cogl_matrix_entry_cache_destroy (&program_state->modelview_cache);
         }
 #endif
 
@@ -892,11 +892,12 @@ _cogl_pipeline_progend_glsl_layer_pre_change_notify (
 }
 
 static void
-_cogl_pipeline_progend_glsl_pre_paint (CoglPipeline *pipeline)
+_cogl_pipeline_progend_glsl_pre_paint (CoglPipeline *pipeline,
+                                       CoglFramebuffer *framebuffer)
 {
   CoglBool needs_flip;
-  CoglMatrixStack *projection_stack;
-  CoglMatrixStack *modelview_stack;
+  CoglMatrixEntry *projection_entry;
+  CoglMatrixEntry *modelview_entry;
   CoglPipelineProgramState *program_state;
 
   _COGL_GET_CONTEXT (ctx, NO_RETVAL);
@@ -906,13 +907,13 @@ _cogl_pipeline_progend_glsl_pre_paint (CoglPipeline *pipeline)
 
   program_state = get_program_state (pipeline);
 
-  projection_stack = ctx->current_projection_stack;
-  modelview_stack = ctx->current_modelview_stack;
+  projection_entry = ctx->current_projection_entry;
+  modelview_entry = ctx->current_modelview_entry;
 
   /* An initial pipeline is flushed while creating the context. At
      this point there are no matrices selected so we can't do
      anything */
-  if (modelview_stack == NULL || projection_stack == NULL)
+  if (modelview_entry == NULL || projection_entry == NULL)
     return;
 
   needs_flip = cogl_is_offscreen (ctx->current_draw_buffer);
@@ -927,19 +928,17 @@ _cogl_pipeline_progend_glsl_pre_paint (CoglPipeline *pipeline)
       CoglMatrix modelview, projection;
 
       projection_changed =
-        _cogl_matrix_stack_check_and_update_cache (projection_stack,
-                                                   &program_state->
-                                                   projection_cache,
-                                                   needs_flip &&
-                                                   program_state->
-                                                   flip_uniform == -1);
+        _cogl_matrix_entry_cache_maybe_update (&program_state->projection_cache,
+                                               projection_entry,
+                                               (needs_flip &&
+                                                program_state->flip_uniform ==
+                                                -1));
 
       modelview_changed =
-        _cogl_matrix_stack_check_and_update_cache (modelview_stack,
-                                                   &program_state->
-                                                   modelview_cache,
-                                                   /* never flip modelview */
-                                                   FALSE);
+        _cogl_matrix_entry_cache_maybe_update (&program_state->modelview_cache,
+                                               modelview_entry,
+                                               /* never flip modelview */
+                                               FALSE);
 
       if (modelview_changed || projection_changed)
         {
@@ -954,19 +953,19 @@ _cogl_pipeline_progend_glsl_pre_paint (CoglPipeline *pipeline)
             }
 
           if (need_modelview)
-            _cogl_matrix_stack_get (modelview_stack, &modelview);
+            _cogl_matrix_entry_get (modelview_entry, &modelview);
           if (need_projection)
             {
               if (needs_flip && program_state->flip_uniform == -1)
                 {
                   CoglMatrix tmp_matrix;
-                  _cogl_matrix_stack_get (projection_stack, &tmp_matrix);
+                  _cogl_matrix_entry_get (projection_entry, &tmp_matrix);
                   cogl_matrix_multiply (&projection,
                                         &ctx->y_flip_matrix,
                                         &tmp_matrix);
                 }
               else
-                _cogl_matrix_stack_get (projection_stack, &projection);
+                _cogl_matrix_entry_get (projection_entry, &projection);
             }
 
           if (projection_changed && program_state->projection_uniform != -1)
@@ -986,7 +985,7 @@ _cogl_pipeline_progend_glsl_pre_paint (CoglPipeline *pipeline)
               /* The journal usually uses an identity matrix for the
                  modelview so we can optimise this common case by
                  avoiding the matrix multiplication */
-              if (_cogl_matrix_stack_has_identity_flag (modelview_stack))
+              if (_cogl_matrix_entry_has_identity_flag (modelview_entry))
                 {
                   GE (ctx,
                       glUniformMatrix4fv (program_state->mvp_uniform,
@@ -1019,13 +1018,15 @@ _cogl_pipeline_progend_glsl_pre_paint (CoglPipeline *pipeline)
          geometry via the matrix and use the flip vertex instead */
       disable_flip = program_state->flip_uniform != -1;
 
-      _cogl_matrix_stack_flush_to_gl_builtins (ctx,
-                                               projection_stack,
+      _cogl_matrix_entry_flush_to_gl_builtins (ctx,
+                                               projection_entry,
                                                COGL_MATRIX_PROJECTION,
+                                               framebuffer,
                                                disable_flip);
-      _cogl_matrix_stack_flush_to_gl_builtins (ctx,
-                                               modelview_stack,
+      _cogl_matrix_entry_flush_to_gl_builtins (ctx,
+                                               modelview_entry,
                                                COGL_MATRIX_MODELVIEW,
+                                               framebuffer,
                                                disable_flip);
     }
 
diff --git a/cogl/cogl-pipeline-vertend-fixed.c b/cogl/cogl-pipeline-vertend-fixed.c
index c86b099..499f545 100644
--- a/cogl/cogl-pipeline-vertend-fixed.c
+++ b/cogl/cogl-pipeline-vertend-fixed.c
@@ -33,6 +33,7 @@
 #include "cogl-pipeline-private.h"
 #include "cogl-pipeline-state-private.h"
 #include "cogl-pipeline-opengl-private.h"
+#include "cogl-framebuffer-private.h"
 
 #ifdef COGL_PIPELINE_VERTEND_FIXED
 
@@ -68,26 +69,29 @@ _cogl_pipeline_vertend_fixed_start (CoglPipeline *pipeline,
 static CoglBool
 _cogl_pipeline_vertend_fixed_add_layer (CoglPipeline *pipeline,
                                         CoglPipelineLayer *layer,
-                                        unsigned long layers_difference)
+                                        unsigned long layers_difference,
+                                        CoglFramebuffer *framebuffer)
 {
+  CoglContext *ctx = framebuffer->context;
   int unit_index = _cogl_pipeline_layer_get_unit_index (layer);
   CoglTextureUnit *unit = _cogl_get_texture_unit (unit_index);
 
-  _COGL_GET_CONTEXT (ctx, FALSE);
-
   if (layers_difference & COGL_PIPELINE_LAYER_STATE_USER_MATRIX)
     {
       CoglPipelineLayerState state = COGL_PIPELINE_LAYER_STATE_USER_MATRIX;
       CoglPipelineLayer *authority =
         _cogl_pipeline_layer_get_authority (layer, state);
+      CoglMatrixEntry *matrix_entry;
 
       _cogl_matrix_stack_set (unit->matrix_stack,
                               &authority->big_state->matrix);
 
       _cogl_set_active_texture_unit (unit_index);
 
-      _cogl_matrix_stack_flush_to_gl_builtins (ctx, unit->matrix_stack,
+      matrix_entry = unit->matrix_stack->last_entry;
+      _cogl_matrix_entry_flush_to_gl_builtins (ctx, matrix_entry,
                                                COGL_MATRIX_TEXTURE,
+                                               framebuffer,
                                                FALSE /* enable flip */);
     }
 
diff --git a/cogl/cogl-pipeline-vertend-glsl.c b/cogl/cogl-pipeline-vertend-glsl.c
index 298996a..ff6d942 100644
--- a/cogl/cogl-pipeline-vertend-glsl.c
+++ b/cogl/cogl-pipeline-vertend-glsl.c
@@ -257,7 +257,8 @@ _cogl_pipeline_vertend_glsl_start (CoglPipeline *pipeline,
 static CoglBool
 _cogl_pipeline_vertend_glsl_add_layer (CoglPipeline *pipeline,
                                        CoglPipelineLayer *layer,
-                                       unsigned long layers_difference)
+                                       unsigned long layers_difference,
+                                       CoglFramebuffer *framebuffer)
 {
   CoglPipelineShaderState *shader_state;
   CoglPipelineSnippetData snippet_data;
@@ -280,15 +281,18 @@ _cogl_pipeline_vertend_glsl_add_layer (CoglPipeline *pipeline,
           CoglPipelineLayer *authority =
             _cogl_pipeline_layer_get_authority (layer, state);
           CoglTextureUnit *unit = _cogl_get_texture_unit (unit_index);
+          CoglMatrixEntry *matrix_entry;
 
           _cogl_matrix_stack_set (unit->matrix_stack,
                                   &authority->big_state->matrix);
 
           _cogl_set_active_texture_unit (unit_index);
 
-          _cogl_matrix_stack_flush_to_gl_builtins (ctx,
-                                                   unit->matrix_stack,
+          matrix_entry = unit->matrix_stack->last_entry;
+          _cogl_matrix_entry_flush_to_gl_builtins (ctx,
+                                                   matrix_entry,
                                                    COGL_MATRIX_TEXTURE,
+                                                   framebuffer,
                                                    FALSE /* do flip */);
         }
     }
diff --git a/cogl/cogl.c b/cogl/cogl.c
index 9ee4d55..49520d8 100644
--- a/cogl/cogl.c
+++ b/cogl/cogl.c
@@ -362,6 +362,7 @@ cogl_begin_gl (void)
    */
   pipeline = cogl_get_source ();
   _cogl_pipeline_flush_gl_state (pipeline,
+                                 cogl_get_draw_framebuffer (),
                                  FALSE,
                                  cogl_pipeline_get_n_layers (pipeline));
 
diff --git a/tests/micro-perf/test-journal.c b/tests/micro-perf/test-journal.c
index aaee9b5..a41dfb2 100644
--- a/tests/micro-perf/test-journal.c
+++ b/tests/micro-perf/test-journal.c
@@ -5,7 +5,7 @@
 #define FRAMEBUFFER_WIDTH 800
 #define FRAMEBUFFER_HEIGHT 600
 
-gboolean run_all = FALSE;
+CoglBool run_all = FALSE;
 
 typedef struct _Data
 {
@@ -116,7 +116,7 @@ static TestCallback tests[] =
   test_rectangles
 };
 
-static gboolean
+static CoglBool
 paint_cb (void *user_data)
 {
   Data *data = user_data;
-- 
1.7.7.6



More information about the Cogl mailing list