[Mesa-dev] [RFC 3/6] Model INTEL perf query backend after query object BE

Tue May 5 17:53:51 PDT 2015

Instead of using the same backend interface as AMD_performance_monitor
this defines a dedicated INTEL_performance_query interface that is based
on the ARB_query_buffer_object interface (considering the similarity of
the extensions) with the addition of vfuncs for enumerating queries and
their counters.

Compared to the previous backend, some notable differences are:

- The backend is free to represent counters using whatever data
  structures are optimal/convenient since queries and counters are
  enumerated via an iterator api instead of declaring them using
  structures directly shared with the frontend.

  This is also done to help us support the full range of data and
  semantic types available with INTEL_performance_query which is awkward
  while using a structure shared with the AMD_performance_monitor
  backend since neither extension's types are a subset of the other.

- The backend must support waiting for a query instead of the frontend
  simply using glFinish().

- Objects go through 'Active' and 'Ready' states consistent with the
  query object backend (hopefully making them more familiar). There is
  no 'Ended' state (which used to show that a query has ended at least
  once for a given object). There is a new 'Used' state similar to the
  'EverBound' state of query objects, set when a query is first begun
  which implies that we are expecting to get results back for the object
  at some point.

The INTEL_performance_query and AMD_performance_monitor extensions are
now completely orthogonal within Mesa (though a driver could optionally
choose to implement both extensions within a unified backend if that
were convenient for the sake of sharing state/code).

Signed-off-by: Robert Bragg <robert at sixbynine.org>
---
 src/mesa/main/dd.h                |  39 +++
 src/mesa/main/mtypes.h            |  25 +-
 src/mesa/main/performance_query.c | 590 ++++++++++++++------------------------
 src/mesa/main/performance_query.h |   6 +-
 4 files changed, 275 insertions(+), 385 deletions(-)

diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index 0c1a13f..4ba1524 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -759,6 +759,45 @@ struct dd_function_table {
                                 GLint *bytesWritten);
    /*@}*/
 
+   /**
+    * \name Performance Query objects
+    */
+   /*@{*/
+   void (*GetPerfQueryInfo)(struct gl_context *ctx,
+                            int queryIndex,
+                            const char **name,
+                            GLuint *dataSize,
+                            GLuint *numCounters,
+                            GLuint *numActive);
+   void (*GetPerfCounterInfo)(struct gl_context *ctx,
+                              int queryIndex,
+                              int counterIndex,
+                              const char **name,
+                              const char **desc,
+                              GLuint *offset,
+                              GLuint *data_size,
+                              GLuint *type_enum,
+                              GLuint *data_type_enum,
+                              GLuint64 *raw_max);
+   struct gl_perf_query_object * (*NewPerfQueryObject)(struct gl_context *ctx,
+                                                       int queryIndex);
+   void (*DeletePerfQuery)(struct gl_context *ctx,
+                           struct gl_perf_query_object *obj);
+   GLboolean (*BeginPerfQuery)(struct gl_context *ctx,
+                               struct gl_perf_query_object *obj);
+   void (*EndPerfQuery)(struct gl_context *ctx,
+                        struct gl_perf_query_object *obj);
+   void (*WaitPerfQuery)(struct gl_context *ctx,
+                         struct gl_perf_query_object *obj);
+   GLboolean (*IsPerfQueryReady)(struct gl_context *ctx,
+                                 struct gl_perf_query_object *obj);
+   void (*GetPerfQueryData)(struct gl_context *ctx,
+                            struct gl_perf_query_object *obj,
+                            GLsizei dataSize,
+                            GLuint *data,
+                            GLuint *bytesWritten);
+   /*@}*/
+
 
    /**
     * \name Vertex Array objects
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index b1e5fa9..a26109d 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -2014,6 +2014,23 @@ struct gl_perf_monitor_group
 
 
 /**
+ * A query object instance as described in INTEL_performance_query.
+ *
+ * NB: We want to keep this and the corresponding backend structure
+ * relatively lean considering that applications may expect to
+ * allocate enough objects to be able to query around all draw calls
+ * in a frame.
+ */
+struct gl_perf_query_object
+{
+   GLuint Id;        /**< hash table ID/name */
+   GLuint Used:1;    /**< has been used for 1 or more queries */
+   GLuint Active:1;  /**< inside Begin/EndPerfQuery */
+   GLuint Ready:1;   /**< result is ready? */
+};
+
+
+/**
  * Context state for AMD_performance_monitor.
  */
 struct gl_perf_monitor_state
@@ -2032,12 +2049,8 @@ struct gl_perf_monitor_state
  */
 struct gl_perf_query_state
 {
-   /** Array of performance monitor groups (indexed by group ID) */
-   const struct gl_perf_monitor_group *Groups;
-   GLuint NumGroups;
-
-   /** The table of all performance query objects. */
-   struct _mesa_HashTable *Objects;
+   struct _mesa_HashTable *Objects; /**< The table of all performance query objects */
+   GLuint NumQueries;
 };
 
 
diff --git a/src/mesa/main/performance_query.c b/src/mesa/main/performance_query.c
index 355836e..57d6f8e 100644
--- a/src/mesa/main/performance_query.c
+++ b/src/mesa/main/performance_query.c
@@ -1,5 +1,5 @@
 /*
- * Copyright © 2012 Intel Corporation
+ * Copyright © 2012-2015 Intel Corporation
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -24,14 +24,6 @@
 /**
  * \file performance_query.c
  * Core Mesa support for the INTEL_performance_query extension.
- *
- * In order to implement this extension, start by defining two enums:
- * one for Groups, and one for Counters.  These will be used as indexes into
- * arrays, so they should start at 0 and increment from there.
- *
- * Counter IDs need to be globally unique.  That is, you can't have counter 7
- * in group A and counter 7 in group B.  A global enum of all available
- * counters is a convenient way to guarantee this.
  */
 
 #include <stdbool.h>
@@ -42,66 +34,21 @@
 #include "macros.h"
 #include "mtypes.h"
 #include "performance_query.h"
-#include "util/bitset.h"
 #include "util/ralloc.h"
 
 void
 _mesa_init_performance_queries(struct gl_context *ctx)
 {
    ctx->PerfQuery.Objects = _mesa_NewHashTable();
-   ctx->PerfQuery.NumGroups = 0;
-   ctx->PerfQuery.Groups = NULL;
-}
-
-static struct gl_perf_monitor_object *
-new_performance_query(struct gl_context *ctx, GLuint index)
-{
-   unsigned i;
-   struct gl_perf_monitor_object *m = ctx->Driver.NewPerfMonitor(ctx);
-
-   if (m == NULL)
-      return NULL;
-
-   m->Name = index;
-
-   m->Active = false;
-
-   m->ActiveGroups =
-      rzalloc_array(NULL, unsigned, ctx->PerfQuery.NumGroups);
-
-   m->ActiveCounters =
-      ralloc_array(NULL, BITSET_WORD *, ctx->PerfQuery.NumGroups);
-
-   if (m->ActiveGroups == NULL || m->ActiveCounters == NULL)
-      goto fail;
-
-   for (i = 0; i < ctx->PerfQuery.NumGroups; i++) {
-      const struct gl_perf_monitor_group *g = &ctx->PerfQuery.Groups[i];
-
-      m->ActiveCounters[i] = rzalloc_array(m->ActiveCounters, BITSET_WORD,
-                                           BITSET_WORDS(g->NumCounters));
-      if (m->ActiveCounters[i] == NULL)
-         goto fail;
-   }
-
-   return m;
-
-fail:
-   ralloc_free(m->ActiveGroups);
-   ralloc_free(m->ActiveCounters);
-   ctx->Driver.DeletePerfMonitor(ctx, m);
-   return NULL;
 }
 
 static void
 free_performance_query(GLuint key, void *data, void *user)
 {
-   struct gl_perf_monitor_object *m = data;
+   struct gl_perf_query_object *m = data;
    struct gl_context *ctx = user;
 
-   ralloc_free(m->ActiveGroups);
-   ralloc_free(m->ActiveCounters);
-   ctx->Driver.DeletePerfMonitor(ctx, m);
+   ctx->Driver.DeletePerfQuery(ctx, m);
 }
 
 void
@@ -112,34 +59,13 @@ _mesa_free_performance_queries(struct gl_context *ctx)
    _mesa_DeleteHashTable(ctx->PerfQuery.Objects);
 }
 
-static inline struct gl_perf_monitor_object *
-lookup_query(struct gl_context *ctx, GLuint id)
-{
-   return (struct gl_perf_monitor_object *)
-      _mesa_HashLookup(ctx->PerfQuery.Objects, id);
-}
-
-static inline const struct gl_perf_monitor_group *
-get_group(const struct gl_context *ctx, GLuint id)
+static inline struct gl_perf_query_object *
+lookup_object(struct gl_context *ctx, GLuint id)
 {
-   if (id >= ctx->PerfQuery.NumGroups)
-      return NULL;
-
-   return &ctx->PerfQuery.Groups[id];
-}
-
-static inline const struct gl_perf_monitor_counter *
-get_counter(const struct gl_perf_monitor_group *group_obj, GLuint id)
-{
-   if (id >= group_obj->NumCounters)
-      return NULL;
-
-   return &group_obj->Counters[id];
+   return _mesa_HashLookup(ctx->PerfQuery.Objects, id);
 }
 
-/* For INTEL_performance_query, query id 0 is reserved to be invalid. We use
- * index to Groups array + 1 as the query id. Same applies to counter id.
- */
+/* For INTEL_performance_query, query id 0 is reserved to be invalid. */
 static inline GLuint
 queryid_to_index(GLuint queryid)
 {
@@ -155,7 +81,8 @@ index_to_queryid(GLuint index)
 static inline bool
 queryid_valid(const struct gl_context *ctx, GLuint queryid)
 {
-   return get_group(ctx, queryid_to_index(queryid)) != NULL;
+   GLuint index = queryid_to_index(queryid);
+   return index >= 0 && index < ctx->PerfQuery.NumQueries;
 }
 
 static inline GLuint
@@ -164,33 +91,29 @@ counterid_to_index(GLuint counterid)
    return counterid - 1;
 }
 
-/*****************************************************************************/
-
-/**
- * Returns how many bytes a counter's value takes up.
- */
-unsigned
-_mesa_perf_query_counter_size(const struct gl_perf_monitor_counter *c)
+static void
+return_string(char *stringRet, GLuint stringMaxLen, const char *string)
 {
-   switch (c->Type) {
-   case GL_FLOAT:
-   case GL_PERCENTAGE_AMD:
-      return sizeof(GLfloat);
-   case GL_UNSIGNED_INT:
-      return sizeof(GLuint);
-   case GL_UNSIGNED_INT64_AMD:
-      return sizeof(uint64_t);
-   default:
-      assert(!"Should not get here: invalid counter type");
-      return 0;
-   }
+   if (!stringRet)
+      return;
+
+   strncpy(stringRet, string ? string : "", stringMaxLen);
+
+   /* No specification given about whether returned strings needs
+    * to be zero-terminated. Zero-terminate the string always as we
+    * don't otherwise communicate the length of the returned
+    * string.
+    */
+   if (stringMaxLen > 0)
+      stringRet[stringMaxLen - 1] = '\0';
 }
 
+/*****************************************************************************/
+
 extern void GLAPIENTRY
 _mesa_GetFirstPerfQueryIdINTEL(GLuint *queryId)
 {
    GET_CURRENT_CONTEXT(ctx);
-   unsigned numGroups;
 
    /* The GL_INTEL_performance_query spec says:
     *
@@ -202,15 +125,13 @@ _mesa_GetFirstPerfQueryIdINTEL(GLuint *queryId)
       return;
    }
 
-   numGroups = ctx->PerfQuery.NumGroups;
-
    /* The GL_INTEL_performance_query spec says:
     *
     *    "If the given hardware platform doesn't support any performance
     *    queries, then the value of 0 is returned and INVALID_OPERATION error
     *    is raised."
     */
-   if (numGroups == 0) {
+   if (ctx->PerfQuery.NumQueries == 0) {
       *queryId = 0;
       _mesa_error(ctx, GL_INVALID_OPERATION,
                   "glGetFirstPerfQueryIdINTEL(no queries supported)");
@@ -242,26 +163,23 @@ _mesa_GetNextPerfQueryIdINTEL(GLuint queryId, GLuint *nextQueryId)
    }
 
    if (!queryid_valid(ctx, queryId)) {
-      *nextQueryId = 0;
       _mesa_error(ctx, GL_INVALID_VALUE,
                   "glGetNextPerfQueryIdINTEL(invalid query)");
       return;
    }
 
-   ++queryId;
-
-   if (!queryid_valid(ctx, queryId)) {
-      *nextQueryId = 0;
-   } else {
+   if (queryid_valid(ctx, ++queryId))
       *nextQueryId = queryId;
-   }
+   else
+      *nextQueryId = 0;
 }
 
 extern void GLAPIENTRY
 _mesa_GetPerfQueryIdByNameINTEL(char *queryName, GLuint *queryId)
 {
    GET_CURRENT_CONTEXT(ctx);
-   unsigned i;
+
+   int i;
 
    /* The GL_INTEL_performance_query spec says:
     *
@@ -274,15 +192,22 @@ _mesa_GetPerfQueryIdByNameINTEL(char *queryName, GLuint *queryId)
       return;
    }
 
-   /* The specification does not state that this produces an error. */
+   /* The specification does not state that this produces an error but
+    * to be consistent with glGetFirstPerfQueryIdINTEL we generate an
+    * INVALID_VALUE error */
    if (!queryId) {
-      _mesa_warning(ctx, "glGetPerfQueryIdByNameINTEL(queryId == NULL)");
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                  "glGetPerfQueryIdByNameINTEL(queryId == NULL)");
       return;
    }
 
-   for (i = 0; i < ctx->PerfQuery.NumGroups; ++i) {
-      const struct gl_perf_monitor_group *group_obj = get_group(ctx, i);
-      if (strcmp(group_obj->Name, queryName) == 0) {
+   for (i = 0; i < ctx->PerfQuery.NumQueries; ++i) {
+      const GLchar *name;
+      GLuint ignore;
+
+      ctx->Driver.GetPerfQueryInfo(ctx, i, &name, &ignore, &ignore, &ignore);
+
+      if (strcmp(name, queryName) == 0) {
          *queryId = index_to_queryid(i);
          return;
       }
@@ -294,18 +219,21 @@ _mesa_GetPerfQueryIdByNameINTEL(char *queryName, GLuint *queryId)
 
 extern void GLAPIENTRY
 _mesa_GetPerfQueryInfoINTEL(GLuint queryId,
-                            GLuint queryNameLength, char *queryName,
-                            GLuint *dataSize, GLuint *noCounters,
-                            GLuint *noActiveInstances,
+                            GLuint nameLength, char *name,
+                            GLuint *dataSize,
+                            GLuint *numCounters,
+                            GLuint *numActive,
                             GLuint *capsMask)
 {
    GET_CURRENT_CONTEXT(ctx);
-   unsigned i;
 
-   const struct gl_perf_monitor_group *group_obj =
-      get_group(ctx, queryid_to_index(queryId));
+   GLuint queryIndex = queryid_to_index(queryId);
+   const GLchar *queryName;
+   GLuint queryDataSize;
+   GLuint queryNumCounters;
+   GLuint queryNumActive;
 
-   if (group_obj == NULL) {
+   if (!queryid_valid(ctx, queryId)) {
       /* The GL_INTEL_performance_query spec says:
        *
        *    "If queryId does not reference a valid query type, an
@@ -316,32 +244,19 @@ _mesa_GetPerfQueryInfoINTEL(GLuint queryId,
       return;
    }
 
-   if (queryName) {
-      strncpy(queryName, group_obj->Name, queryNameLength);
+   ctx->Driver.GetPerfQueryInfo(ctx, queryIndex,
+                                &queryName,
+                                &queryDataSize,
+                                &queryNumCounters,
+                                &queryNumActive);
 
-      /* No specification given about whether the string needs to be
-       * zero-terminated. Zero-terminate the string always as we don't
-       * otherwise communicate the length of the returned string.
-       */
-      if (queryNameLength > 0) {
-         queryName[queryNameLength - 1] = '\0';
-      }
-   }
+   return_string(name, nameLength, queryName);
 
-   if (dataSize) {
-      unsigned size = 0;
-      for (i = 0; i < group_obj->NumCounters; ++i) {
-         /* What we get from the driver is group id (uint32_t) + counter id
-          * (uint32_t) + value.
-          */
-         size += 2 * sizeof(uint32_t) + _mesa_perf_query_counter_size(&group_obj->Counters[i]);
-      }
-      *dataSize = size;
-   }
+   if (dataSize)
+      *dataSize = queryDataSize;
 
-   if (noCounters) {
-      *noCounters = group_obj->NumCounters;
-   }
+   if (numCounters)
+      *numCounters = queryNumCounters;
 
    /* The GL_INTEL_performance_query spec says:
     *
@@ -352,128 +267,93 @@ _mesa_GetPerfQueryInfoINTEL(GLuint queryId,
     * 2) Another typo in the specification, maxInstances parameter is not listed
     *    in the declaration of this function in the list of new functions.
     */
-   if (noActiveInstances) {
-      *noActiveInstances = _mesa_HashNumEntries(ctx->PerfQuery.Objects);
-   }
+   if (numActive)
+      *numActive = queryNumActive;
 
-   if (capsMask) {
-      /* TODO: This information not yet available in the monitor structs. For
-       * now, we hardcode SINGLE_CONTEXT, since that's what the implementation
-       * currently tries very hard to do.
-       */
+   /* Assume for now that all queries are per-context */
+   if (capsMask)
       *capsMask = GL_PERFQUERY_SINGLE_CONTEXT_INTEL;
-   }
 }
 
 extern void GLAPIENTRY
 _mesa_GetPerfCounterInfoINTEL(GLuint queryId, GLuint counterId,
-                              GLuint counterNameLength, char *counterName,
-                              GLuint counterDescLength, char *counterDesc,
-                              GLuint *counterOffset, GLuint *counterDataSize, GLuint *counterTypeEnum,
-                              GLuint *counterDataTypeEnum, GLuint64 *rawCounterMaxValue)
+                              GLuint nameLength, char *name,
+                              GLuint descLength, char *desc,
+                              GLuint *offset,
+                              GLuint *dataSize,
+                              GLuint *typeEnum,
+                              GLuint *dataTypeEnum,
+                              GLuint64 *rawCounterMaxValue)
 {
    GET_CURRENT_CONTEXT(ctx);
 
-   const struct gl_perf_monitor_group *group_obj;
-   const struct gl_perf_monitor_counter *counter_obj;
-   unsigned counterIndex;
-   unsigned i;
-
-   group_obj = get_group(ctx, queryid_to_index(queryId));
+   GLuint queryIndex = queryid_to_index(queryId);
+   const GLchar *queryName;
+   GLuint queryDataSize;
+   GLuint queryNumCounters;
+   GLuint queryNumActive;
+   GLuint counterIndex;
+   const GLchar *counterName;
+   const GLchar *counterDesc;
+   GLuint counterOffset;
+   GLuint counterDataSize;
+   GLuint counterTypeEnum;
+   GLuint counterDataTypeEnum;
+   GLuint64 counterRawMax;
 
-   /* The GL_INTEL_performance_query spec says:
-    *
-    *    "If the pair of queryId and counterId does not reference a valid
-    *    counter, an INVALID_VALUE error is generated."
-    */
-   if (group_obj == NULL) {
+   if (!queryid_valid(ctx, queryId)) {
+      /* The GL_INTEL_performance_query spec says:
+       *
+       *    "If the pair of queryId and counterId does not reference a valid
+       *    counter, an INVALID_VALUE error is generated."
+       */
       _mesa_error(ctx, GL_INVALID_VALUE,
                   "glGetPerfCounterInfoINTEL(invalid queryId)");
       return;
    }
 
+   ctx->Driver.GetPerfQueryInfo(ctx, queryIndex,
+                                &queryName,
+                                &queryDataSize,
+                                &queryNumCounters,
+                                &queryNumActive);
+
    counterIndex = counterid_to_index(counterId);
-   counter_obj = get_counter(group_obj, counterIndex);
 
-   if (counter_obj == NULL) {
+   if (counterIndex < 0 || counterIndex >= queryNumCounters) {
       _mesa_error(ctx, GL_INVALID_VALUE,
                   "glGetPerfCounterInfoINTEL(invalid counterId)");
       return;
    }
 
-   if (counterName) {
-      strncpy(counterName, counter_obj->Name, counterNameLength);
+   ctx->Driver.GetPerfCounterInfo(ctx, queryIndex, counterIndex,
+                                  &counterName,
+                                  &counterDesc,
+                                  &counterOffset,
+                                  &counterDataSize,
+                                  &counterTypeEnum,
+                                  &counterDataTypeEnum,
+                                  &counterRawMax);
 
-      /* No specification given about whether the string needs to be
-       * zero-terminated. Zero-terminate the string always as we don't
-       * otherwise communicate the length of the returned string.
-       */
-      if (counterNameLength > 0) {
-         counterName[counterNameLength - 1] = '\0';
-      }
-   }
+   return_string(name, nameLength, counterName);
+   return_string(desc, descLength, counterDesc);
 
-   if (counterDesc) {
-      /* TODO: No separate description text at the moment. We pass the name
-       * again for the moment.
-       */
-      strncpy(counterDesc, counter_obj->Name, counterDescLength);
-
-      /* No specification given about whether the string needs to be
-       * zero-terminated. Zero-terminate the string always as we don't
-       * otherwise communicate the length of the returned string.
-       */
-      if (counterDescLength > 0) {
-         counterDesc[counterDescLength - 1] = '\0';
-      }
-   }
+   if (offset)
+      *offset = counterOffset;
 
-   if (counterOffset) {
-      unsigned offset = 0;
-      for (i = 0; i < counterIndex; ++i) {
-         /* What we get from the driver is group id (uint32_t) + counter id
-          * (uint32_t) + value.
-          */
-         offset += 2 * sizeof(uint32_t) + _mesa_perf_query_counter_size(&group_obj->Counters[i]);
-      }
-      *counterOffset = 2 * sizeof(uint32_t) + offset;
-   }
+   if (dataSize)
+      *dataSize = counterDataSize;
 
-   if (counterDataSize) {
-      *counterDataSize = _mesa_perf_query_counter_size(counter_obj);
-   }
+   if (typeEnum)
+      *typeEnum = counterTypeEnum;
 
-   if (counterTypeEnum) {
-      /* TODO: Different counter types (semantic type, not data type) not
-       * supported as of yet.
-       */
-      *counterTypeEnum = GL_PERFQUERY_COUNTER_RAW_INTEL;
-   }
+   if (dataTypeEnum)
+      *dataTypeEnum = counterDataTypeEnum;
 
-   if (counterDataTypeEnum) {
-      switch (counter_obj->Type) {
-      case GL_FLOAT:
-      case GL_PERCENTAGE_AMD:
-         *counterDataTypeEnum = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
-         break;
-      case GL_UNSIGNED_INT:
-         *counterDataTypeEnum = GL_PERFQUERY_COUNTER_DATA_UINT32_INTEL;
-         break;
-      case GL_UNSIGNED_INT64_AMD:
-         *counterDataTypeEnum = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
-         break;
-      default:
-         assert(!"Should not get here: invalid counter type");
-         return;
-      }
-   }
+   if (rawCounterMaxValue)
+      *rawCounterMaxValue = counterRawMax;
 
    if (rawCounterMaxValue) {
-      /* This value is (implicitly) specified to be used only with
-       * GL_PERFQUERY_COUNTER_RAW_INTEL counters. When semantic types for
-       * counters are added, that needs to be checked.
-       */
-
       /* The GL_INTEL_performance_query spec says:
        *
        *    "for some raw counters for which the maximal value is
@@ -481,10 +361,17 @@ _mesa_GetPerfCounterInfoINTEL(GLuint queryId, GLuint counterId,
        *    returned in the location pointed by rawCounterMaxValue, otherwise,
        *    the location is written with the value of 0."
        *
-       * The maximum value reported by the driver at the moment is not with
-       * these semantics, so write 0 always to be safe.
+       *    Since it's very useful to be able to report a maximum value for
+       *    more that just counters using the _COUNTER_RAW_INTEL or
+       *    _COUNTER_DURATION_RAW_INTEL enums (e.g. for a _THROUGHPUT tools
+       *    want to be able to visualize the absolute throughput with respect
+       *    to the theoretical maximum that's possible) and there doesn't seem
+       *    to be any reason not to allow _THROUGHPUT counters to also be
+       *    considerer "raw" here, we always leave it up to the backend to
+       *    decide when it's appropriate to report a maximum counter value or 0
+       *    if not.
        */
-      *rawCounterMaxValue = 0;
+      *rawCounterMaxValue = counterRawMax;
    }
 }
 
@@ -492,44 +379,32 @@ extern void GLAPIENTRY
 _mesa_CreatePerfQueryINTEL(GLuint queryId, GLuint *queryHandle)
 {
    GET_CURRENT_CONTEXT(ctx);
-   GLuint first;
-   GLuint group;
-   const struct gl_perf_monitor_group *group_obj;
-   struct gl_perf_monitor_object *m;
-   unsigned i;
-
-   /* This is not specified in the extension, but is the only sane thing to
-    * do.
-    */
-   if (queryHandle == NULL) {
-      _mesa_error(ctx, GL_INVALID_VALUE,
-                  "glCreatePerfQueryINTEL(queryHandle == NULL)");
-      return;
-   }
 
-   group = queryid_to_index(queryId);
-   group_obj = get_group(ctx, group);
+   GLuint id;
+   struct gl_perf_query_object *obj;
 
    /* The GL_INTEL_performance_query spec says:
     *
     *    "If queryId does not reference a valid query type, an INVALID_VALUE
     *    error is generated."
     */
-   if (group_obj == NULL) {
+   if (!queryid_valid(ctx, queryId)) {
       _mesa_error(ctx, GL_INVALID_VALUE,
                   "glCreatePerfQueryINTEL(invalid queryId)");
       return;
    }
 
-   /* The query object created here is the counterpart of a `monitor' in
-    * AMD_performance_monitor. This call is equivalent to calling
-    * GenPerfMonitorsAMD and SelectPerfMonitorCountersAMD with a list of all
-    * counters in a group.
+   /* This is not specified in the extension, but is the only sane thing to
+    * do.
     */
+   if (queryHandle == NULL) {
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                  "glCreatePerfQueryINTEL(queryHandle == NULL)");
+      return;
+   }
 
-   /* We keep the query ids contiguous */
-   first = _mesa_HashFindFreeKeyBlock(ctx->PerfQuery.Objects, 1);
-   if (!first) {
+   id = _mesa_HashFindFreeKeyBlock(ctx->PerfQuery.Objects, 1);
+   if (!id) {
       /* The GL_INTEL_performance_query spec says:
        *
        *    "If the query instance cannot be created due to exceeding the
@@ -537,82 +412,72 @@ _mesa_CreatePerfQueryINTEL(GLuint queryId, GLuint *queryHandle)
        *    an insufficient memory reason, an OUT_OF_MEMORY error is
        *    generated, and the location pointed by queryHandle returns NULL."
       */
-      _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCreatePerfQueryINTEL");
+      _mesa_error_no_memory(__func__);
       return;
    }
 
-   m = new_performance_query(ctx, first);
-   if (m == NULL) {
+   obj = ctx->Driver.NewPerfQueryObject(ctx, queryid_to_index(queryId));
+   if (obj == NULL) {
       _mesa_error_no_memory(__func__);
       return;
    }
 
-   _mesa_HashInsert(ctx->PerfQuery.Objects, first, m);
-   *queryHandle = first;
+   obj->Id = id;
+   obj->Active = false;
+   obj->Ready = false;
 
-   ctx->Driver.ResetPerfMonitor(ctx, m);
-
-   for (i = 0; i < group_obj->NumCounters; ++i) {
-      ++m->ActiveGroups[group];
-      /* Counters are a continuous range of integers, 0 to NumCounters (excl),
-       * so i is the counter value to use here.
-       */
-      BITSET_SET(m->ActiveCounters[group], i);
-   }
+   _mesa_HashInsert(ctx->PerfQuery.Objects, id, obj);
+   *queryHandle = id;
 }
 
 extern void GLAPIENTRY
 _mesa_DeletePerfQueryINTEL(GLuint queryHandle)
 {
    GET_CURRENT_CONTEXT(ctx);
-   struct gl_perf_monitor_object *m;
 
-   /* The queryHandle is the counterpart to AMD_performance_monitor's monitor
-    * id.
-    */
-   m = lookup_query(ctx, queryHandle);
+   struct gl_perf_query_object *obj = lookup_object(ctx, queryHandle);
 
    /* The GL_INTEL_performance_query spec says:
     *
     *    "If a query handle doesn't reference a previously created performance
     *    query instance, an INVALID_VALUE error is generated."
     */
-   if (m == NULL) {
+   if (obj == NULL) {
       _mesa_error(ctx, GL_INVALID_VALUE,
                   "glDeletePerfQueryINTEL(invalid queryHandle)");
       return;
    }
 
-   /* Let the driver stop the query if it's active. */
-   if (m->Active) {
-      ctx->Driver.ResetPerfMonitor(ctx, m);
-      m->Ended = false;
+   /* To avoid complications in the backed we never ask the backend to
+    * delete an active query or a query object while we are still
+    * waiting for data.
+    */
+
+   if (obj->Active)
+      _mesa_EndPerfQueryINTEL(queryHandle);
+
+   if (obj->Used && !obj->Ready) {
+      ctx->Driver.WaitPerfQuery(ctx, obj);
+      obj->Ready = true;
    }
 
    _mesa_HashRemove(ctx->PerfQuery.Objects, queryHandle);
-   ralloc_free(m->ActiveGroups);
-   ralloc_free(m->ActiveCounters);
-   ctx->Driver.DeletePerfMonitor(ctx, m);
+   ctx->Driver.DeletePerfQuery(ctx, obj);
 }
 
 extern void GLAPIENTRY
 _mesa_BeginPerfQueryINTEL(GLuint queryHandle)
 {
    GET_CURRENT_CONTEXT(ctx);
-   struct gl_perf_monitor_object *m;
-
-   /* The queryHandle is the counterpart to AMD_performance_monitor's monitor
-    * id.
-    */
 
-   m = lookup_query(ctx, queryHandle);
+   struct gl_perf_query_object *obj = lookup_object(ctx, queryHandle);
 
    /* The GL_INTEL_performance_query spec says:
     *
     *    "If a query handle doesn't reference a previously created performance
     *    query instance, an INVALID_VALUE error is generated."
     */
-   if (m == NULL) {
+   if (obj == NULL) {
       _mesa_error(ctx, GL_INVALID_VALUE,
                   "glBeginPerfQueryINTEL(invalid queryHandle)");
       return;
@@ -628,15 +493,24 @@ _mesa_BeginPerfQueryINTEL(GLuint queryHandle)
     * We also generate an INVALID_OPERATION error if the driver can't begin
     * a query for its own reasons, and for nesting the same query.
     */
-   if (m->Active) {
+   if (obj->Active) {
       _mesa_error(ctx, GL_INVALID_OPERATION,
                   "glBeginPerfQueryINTEL(already active)");
       return;
    }
 
-   if (ctx->Driver.BeginPerfMonitor(ctx, m)) {
-      m->Active = true;
-      m->Ended = false;
+   /* To avoid complications in the backed we never ask the backend to
+    * reuse a query object and begin a new query while we are still
+    * waiting for data on that object. */
+   if (obj->Used && !obj->Ready) {
+      ctx->Driver.WaitPerfQuery(ctx, obj);
+      obj->Ready = true;
+   }
+
+   if (ctx->Driver.BeginPerfQuery(ctx, obj)) {
+      obj->Used = true;
+      obj->Active = true;
+      obj->Ready = false;
    } else {
       _mesa_error(ctx, GL_INVALID_OPERATION,
                   "glBeginPerfQueryINTEL(driver unable to begin query)");
@@ -647,39 +521,32 @@ extern void GLAPIENTRY
 _mesa_EndPerfQueryINTEL(GLuint queryHandle)
 {
    GET_CURRENT_CONTEXT(ctx);
-   struct gl_perf_monitor_object *m;
 
-   /* The queryHandle is the counterpart to AMD_performance_monitor's monitor
-    * id.
-    */
+   struct gl_perf_query_object *obj = lookup_object(ctx, queryHandle);
 
-   m = lookup_query(ctx, queryHandle);
+   /* Not explicitly covered in the spec, but for consistency... */
+   if (obj == NULL) {
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                  "glEndPerfQueryINTEL(invalid queryHandle)");
+      return;
+   }
 
    /* The GL_INTEL_performance_query spec says:
     *
     *    "If a performance query is not currently started, an
     *    INVALID_OPERATION error will be generated."
-    *
-    * The specification doesn't state that an invalid handle would be an
-    * INVALID_VALUE error. Regardless, query for such a handle will not be
-    * started, so we generate an INVALID_OPERATION in that case too.
     */
-   if (m == NULL) {
-      _mesa_error(ctx, GL_INVALID_OPERATION,
-                  "glEndPerfQueryINTEL(invalid queryHandle)");
-      return;
-   }
 
-   if (!m->Active) {
+   if (!obj->Active) {
       _mesa_error(ctx, GL_INVALID_OPERATION,
                   "glEndPerfQueryINTEL(not active)");
       return;
    }
 
-   ctx->Driver.EndPerfMonitor(ctx, m);
+   ctx->Driver.EndPerfQuery(ctx, obj);
 
-   m->Active = false;
-   m->Ended = true;
+   obj->Active = false;
+   obj->Ready = false;
 }
 
 extern void GLAPIENTRY
@@ -687,8 +554,15 @@ _mesa_GetPerfQueryDataINTEL(GLuint queryHandle, GLuint flags,
                             GLsizei dataSize, void *data, GLuint *bytesWritten)
 {
    GET_CURRENT_CONTEXT(ctx);
-   struct gl_perf_monitor_object *m;
-   bool result_available;
+
+   struct gl_perf_query_object *obj = lookup_object(ctx, queryHandle);
+
+   /* Not explicitly covered in the spec, but for consistency... */
+   if (obj == NULL) {
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                  "glEndPerfQueryINTEL(invalid queryHandle)");
+      return;
+   }
 
    /* The GL_INTEL_performance_query spec says:
     *
@@ -701,66 +575,34 @@ _mesa_GetPerfQueryDataINTEL(GLuint queryHandle, GLuint flags,
       return;
    }
 
-   /* The queryHandle is the counterpart to AMD_performance_monitor's monitor
-    * id.
-    */
+   /* Just for good measure in case a lazy application is only
+    * checking this and not checking for errors... */
+   *bytesWritten = 0;
 
-   m = lookup_query(ctx, queryHandle);
-
-   /* The specification doesn't state that an invalid handle generates an
-    * error. We could interpret that to mean the case should be handled as
-    * "measurement not ready for this query", but what should be done if
-    * `flags' equals PERFQUERY_WAIT_INTEL?
-    *
-    * To resolve this, we just generate an INVALID_VALUE from an invalid query
-    * handle.
-    */
-   if (m == NULL) {
-      _mesa_error(ctx, GL_INVALID_VALUE,
-                  "glGetPerfQueryDataINTEL(invalid queryHandle)");
-      return;
-   }
-
-   /* We need at least enough room for a single value. */
-   if (dataSize < sizeof(GLuint)) {
-      *bytesWritten = 0;
-      return;
-   }
-
-   /* The GL_INTEL_performance_query spec says:
-    *
-    *    "The call may end without returning any data if they are not ready
-    *    for reading as the measurement session is still pending (the
-    *    EndPerfQueryINTEL() command processing is not finished by
-    *    hardware). In this case location pointed by the bytesWritten
-    *    parameter will be set to 0."
-    *
-    * If EndPerfQueryINTEL() is not called at all, we follow this.
+   /* Not explicitly covered in the spec but to be consistent with
+    * EndPerfQuery which validates that an application only ends an
+    * active query we also validate that an application doesn't try
+    * and get the data for a still active query...
     */
-   if (!m->Ended) {
-      *bytesWritten = 0;
+   if (obj->Active) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "glGetPerfQueryDataINTEL(query still active)");
       return;
    }
 
-   result_available = ctx->Driver.IsPerfMonitorResultAvailable(ctx, m);
+   obj->Ready = ctx->Driver.IsPerfQueryReady(ctx, obj);
 
-   if (!result_available) {
+   if (!obj->Ready) {
       if (flags == GL_PERFQUERY_FLUSH_INTEL) {
          ctx->Driver.Flush(ctx);
       } else if (flags == GL_PERFQUERY_WAIT_INTEL) {
-         /* Assume Finish() is both enough and not too much to wait for
-          * results. If results are still not available after Finish(), the
-          * later code automatically bails out with 0 for bytesWritten.
-          */
-         ctx->Driver.Finish(ctx);
-         result_available =
-            ctx->Driver.IsPerfMonitorResultAvailable(ctx, m);
+         ctx->Driver.WaitPerfQuery(ctx, obj);
+         obj->Ready = true;
       }
    }
 
-   if (result_available) {
-      ctx->Driver.GetPerfMonitorResult(ctx, m, dataSize, data, (GLint*)bytesWritten);
-   } else {
+   if (obj->Ready)
+      ctx->Driver.GetPerfQueryData(ctx, obj, dataSize, data, bytesWritten);
+   else
       *bytesWritten = 0;
-   }
 }
diff --git a/src/mesa/main/performance_query.h b/src/mesa/main/performance_query.h
index 3fed5ea..8268f0e 100644
--- a/src/mesa/main/performance_query.h
+++ b/src/mesa/main/performance_query.h
@@ -1,5 +1,5 @@
 /*
- * Copyright © 2012 Intel Corporation
+ * Copyright © 2012,2015 Intel Corporation
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -38,10 +38,6 @@ _mesa_init_performance_queries(struct gl_context *ctx);
 extern void
 _mesa_free_performance_queries(struct gl_context *ctx);
 
-unsigned
-_mesa_perf_query_counter_size(const struct gl_perf_monitor_counter *);
-
-
 extern void GLAPIENTRY
 _mesa_GetFirstPerfQueryIdINTEL(GLuint *queryId);
 
-- 
2.3.2