[Mesa-dev] [PATCH 2/3] Model INTEL perf query backend after query object BE

Wed Feb 15 21:37:36 UTC 2017

Instead of using the same backend interface as AMD_performance_monitor
this defines a dedicated INTEL_performance_query interface that is
modelled more on the ARB_query_buffer_object interface (considering the
similarity of the extensions) with the addition of vfuncs for
initializing and enumerating query and counter info.

Compared to the previous backend, some notable differences are:

- The backend is free to represent counters using whatever data
  structures are optimal/convenient since queries and counters are
  enumerated via an iterator api instead of declaring them using
  structures directly shared with the frontend.

  This is also done to help us support the full range of data and
  semantic types available with INTEL_performance_query which is awkward
  while using a structure shared with the AMD_performance_monitor
  backend since neither extension's types are a subset of the other.

- The backend must support waiting for a query instead of the frontend
  simply using glFinish().

- Objects go through 'Active' and 'Ready' states consistent with the
  query object backend (hopefully making them more familiar). There is
  no 'Ended' state (which used to show that a query has ended at least
  once for a given object). There is a new 'Used' state similar to the
  'EverBound' state of query objects, set when a query is first begun
  which implies that we are expecting to get results back for the object
  at some point.

The INTEL_performance_query and AMD_performance_monitor extensions are
now completely orthogonal within Mesa main (though a driver could
optionally choose to implement both extensions within a unified backend
if that were convenient for the sake of sharing state/code).

v2: (Samuel Pitoiset)
- init PerfQuery.NumQueries in frontend
- s/return_string/output_clipped_string/
- s/backed/backend/ typo
- remove redundant *bytesWritten = 0
v3:
- Add InitPerfQueryInfo for lazy probing of available queries

Signed-off-by: Robert Bragg <robert at sixbynine.org>
---
 src/mesa/main/dd.h                |  41 +++
 src/mesa/main/mtypes.h            |  24 +-
 src/mesa/main/performance_query.c | 625 ++++++++++++++------------------------
 src/mesa/main/performance_query.h |   6 +-
 4 files changed, 295 insertions(+), 401 deletions(-)

diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index 7ebd084ca3..e77df31cf2 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -780,6 +780,47 @@ struct dd_function_table {
    /*@}*/
 
    /**
+    * \name Performance Query objects
+    */
+   /*@{*/
+   GLuint (*InitPerfQueryInfo)(struct gl_context *ctx);
+   void (*GetPerfQueryInfo)(struct gl_context *ctx,
+                            int queryIndex,
+                            const char **name,
+                            GLuint *dataSize,
+                            GLuint *numCounters,
+                            GLuint *numActive);
+   void (*GetPerfCounterInfo)(struct gl_context *ctx,
+                              int queryIndex,
+                              int counterIndex,
+                              const char **name,
+                              const char **desc,
+                              GLuint *offset,
+                              GLuint *data_size,
+                              GLuint *type_enum,
+                              GLuint *data_type_enum,
+                              GLuint64 *raw_max);
+   struct gl_perf_query_object * (*NewPerfQueryObject)(struct gl_context *ctx,
+                                                       int queryIndex);
+   void (*DeletePerfQuery)(struct gl_context *ctx,
+                           struct gl_perf_query_object *obj);
+   GLboolean (*BeginPerfQuery)(struct gl_context *ctx,
+                               struct gl_perf_query_object *obj);
+   void (*EndPerfQuery)(struct gl_context *ctx,
+                        struct gl_perf_query_object *obj);
+   void (*WaitPerfQuery)(struct gl_context *ctx,
+                         struct gl_perf_query_object *obj);
+   GLboolean (*IsPerfQueryReady)(struct gl_context *ctx,
+                                 struct gl_perf_query_object *obj);
+   void (*GetPerfQueryData)(struct gl_context *ctx,
+                            struct gl_perf_query_object *obj,
+                            GLsizei dataSize,
+                            GLuint *data,
+                            GLuint *bytesWritten);
+   /*@}*/
+
+
+   /**
     * \name GREMEDY debug/marker functions
     */
    /*@{*/
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index f3a24df589..e6cf1f4af6 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -1860,6 +1860,23 @@ struct gl_perf_monitor_group
 
 
 /**
+ * A query object instance as described in INTEL_performance_query.
+ *
+ * NB: We want to keep this and the corresponding backend structure
+ * relatively lean considering that applications may expect to
+ * allocate enough objects to be able to query around all draw calls
+ * in a frame.
+ */
+struct gl_perf_query_object
+{
+   GLuint Id;        /**< hash table ID/name */
+   GLuint Used:1;    /**< has been used for 1 or more queries */
+   GLuint Active:1;  /**< inside Begin/EndPerfQuery */
+   GLuint Ready:1;   /**< result is ready? */
+};
+
+
+/**
  * Context state for AMD_performance_monitor.
  */
 struct gl_perf_monitor_state
@@ -1878,12 +1895,7 @@ struct gl_perf_monitor_state
  */
 struct gl_perf_query_state
 {
-   /** Array of performance monitor groups (indexed by group ID) */
-   const struct gl_perf_monitor_group *Groups;
-   GLuint NumGroups;
-
-   /** The table of all performance query objects. */
-   struct _mesa_HashTable *Objects;
+   struct _mesa_HashTable *Objects; /**< The table of all performance query objects */
 };
 
 
diff --git a/src/mesa/main/performance_query.c b/src/mesa/main/performance_query.c
index f0ecabe3b5..02e58633f6 100644
--- a/src/mesa/main/performance_query.c
+++ b/src/mesa/main/performance_query.c
@@ -1,5 +1,5 @@
 /*
- * Copyright © 2012 Intel Corporation
+ * Copyright © 2012-2016 Intel Corporation
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -24,14 +24,6 @@
 /**
  * \file performance_query.c
  * Core Mesa support for the INTEL_performance_query extension.
- *
- * In order to implement this extension, start by defining two enums:
- * one for Groups, and one for Counters.  These will be used as indexes into
- * arrays, so they should start at 0 and increment from there.
- *
- * Counter IDs need to be globally unique.  That is, you can't have counter 7
- * in group A and counter 7 in group B.  A global enum of all available
- * counters is a convenient way to guarantee this.
  */
 
 #include <stdbool.h>
@@ -42,73 +34,21 @@
 #include "macros.h"
 #include "mtypes.h"
 #include "performance_query.h"
-#include "util/bitset.h"
 #include "util/ralloc.h"
 
 void
 _mesa_init_performance_queries(struct gl_context *ctx)
 {
    ctx->PerfQuery.Objects = _mesa_NewHashTable();
-   ctx->PerfQuery.NumGroups = 0;
-   ctx->PerfQuery.Groups = NULL;
-}
-
-static inline void
-init_groups(struct gl_context *ctx)
-{
-   if (unlikely(!ctx->PerfMonitor.Groups))
-      ctx->Driver.InitPerfMonitorGroups(ctx);
-}
-
-static struct gl_perf_monitor_object *
-new_performance_query(struct gl_context *ctx, GLuint index)
-{
-   unsigned i;
-   struct gl_perf_monitor_object *m = ctx->Driver.NewPerfMonitor(ctx);
-
-   if (m == NULL)
-      return NULL;
-
-   m->Name = index;
-
-   m->Active = false;
-
-   m->ActiveGroups =
-      rzalloc_array(NULL, unsigned, ctx->PerfQuery.NumGroups);
-
-   m->ActiveCounters =
-      ralloc_array(NULL, BITSET_WORD *, ctx->PerfQuery.NumGroups);
-
-   if (m->ActiveGroups == NULL || m->ActiveCounters == NULL)
-      goto fail;
-
-   for (i = 0; i < ctx->PerfQuery.NumGroups; i++) {
-      const struct gl_perf_monitor_group *g = &ctx->PerfQuery.Groups[i];
-
-      m->ActiveCounters[i] = rzalloc_array(m->ActiveCounters, BITSET_WORD,
-                                           BITSET_WORDS(g->NumCounters));
-      if (m->ActiveCounters[i] == NULL)
-         goto fail;
-   }
-
-   return m;
-
-fail:
-   ralloc_free(m->ActiveGroups);
-   ralloc_free(m->ActiveCounters);
-   ctx->Driver.DeletePerfMonitor(ctx, m);
-   return NULL;
 }
 
 static void
 free_performance_query(GLuint key, void *data, void *user)
 {
-   struct gl_perf_monitor_object *m = data;
+   struct gl_perf_query_object *m = data;
    struct gl_context *ctx = user;
 
-   ralloc_free(m->ActiveGroups);
-   ralloc_free(m->ActiveCounters);
-   ctx->Driver.DeletePerfMonitor(ctx, m);
+   ctx->Driver.DeletePerfQuery(ctx, m);
 }
 
 void
@@ -119,34 +59,22 @@ _mesa_free_performance_queries(struct gl_context *ctx)
    _mesa_DeleteHashTable(ctx->PerfQuery.Objects);
 }
 
-static inline struct gl_perf_monitor_object *
-lookup_query(struct gl_context *ctx, GLuint id)
+static inline struct gl_perf_query_object *
+lookup_object(struct gl_context *ctx, GLuint id)
 {
-   return (struct gl_perf_monitor_object *)
-      _mesa_HashLookup(ctx->PerfQuery.Objects, id);
+   return _mesa_HashLookup(ctx->PerfQuery.Objects, id);
 }
 
-static inline const struct gl_perf_monitor_group *
-get_group(const struct gl_context *ctx, GLuint id)
+static GLuint
+init_performance_query_info(struct gl_context *ctx)
 {
-   if (id >= ctx->PerfQuery.NumGroups)
-      return NULL;
-
-   return &ctx->PerfQuery.Groups[id];
-}
-
-static inline const struct gl_perf_monitor_counter *
-get_counter(const struct gl_perf_monitor_group *group_obj, GLuint id)
-{
-   if (id >= group_obj->NumCounters)
-      return NULL;
-
-   return &group_obj->Counters[id];
+   if (ctx->Driver.InitPerfQueryInfo)
+      return ctx->Driver.InitPerfQueryInfo(ctx);
+   else
+      return 0;
 }
 
-/* For INTEL_performance_query, query id 0 is reserved to be invalid. We use
- * index to Groups array + 1 as the query id. Same applies to counter id.
- */
+/* For INTEL_performance_query, query id 0 is reserved to be invalid. */
 static inline GLuint
 queryid_to_index(GLuint queryid)
 {
@@ -160,9 +88,10 @@ index_to_queryid(GLuint index)
 }
 
 static inline bool
-queryid_valid(const struct gl_context *ctx, GLuint queryid)
+queryid_valid(const struct gl_context *ctx, GLuint numQueries, GLuint queryid)
 {
-   return get_group(ctx, queryid_to_index(queryid)) != NULL;
+   GLuint index = queryid_to_index(queryid);
+   return index >= 0 && index < numQueries;
 }
 
 static inline GLuint
@@ -171,35 +100,31 @@ counterid_to_index(GLuint counterid)
    return counterid - 1;
 }
 
-/*****************************************************************************/
-
-/**
- * Returns how many bytes a counter's value takes up.
- */
-unsigned
-_mesa_perf_query_counter_size(const struct gl_perf_monitor_counter *c)
+static void
+output_clipped_string(char *stringRet, GLuint stringMaxLen, const char *string)
 {
-   switch (c->Type) {
-   case GL_FLOAT:
-   case GL_PERCENTAGE_AMD:
-      return sizeof(GLfloat);
-   case GL_UNSIGNED_INT:
-      return sizeof(GLuint);
-   case GL_UNSIGNED_INT64_AMD:
-      return sizeof(uint64_t);
-   default:
-      assert(!"Should not get here: invalid counter type");
-      return 0;
-   }
+   if (!stringRet)
+      return;
+
+   strncpy(stringRet, string ? string : "", stringMaxLen);
+
+   /* No specification given about whether returned strings needs
+    * to be zero-terminated. Zero-terminate the string always as we
+    * don't otherwise communicate the length of the returned
+    * string.
+    */
+   if (stringMaxLen > 0)
+      stringRet[stringMaxLen - 1] = '\0';
 }
 
+/*****************************************************************************/
+
 extern void GLAPIENTRY
 _mesa_GetFirstPerfQueryIdINTEL(GLuint *queryId)
 {
    GET_CURRENT_CONTEXT(ctx);
-   unsigned numGroups;
 
-   init_groups(ctx);
+   GLuint numQueries;
 
    /* The GL_INTEL_performance_query spec says:
     *
@@ -211,7 +136,7 @@ _mesa_GetFirstPerfQueryIdINTEL(GLuint *queryId)
       return;
    }
 
-   numGroups = ctx->PerfQuery.NumGroups;
+   numQueries = init_performance_query_info(ctx);
 
    /* The GL_INTEL_performance_query spec says:
     *
@@ -219,7 +144,7 @@ _mesa_GetFirstPerfQueryIdINTEL(GLuint *queryId)
     *    queries, then the value of 0 is returned and INVALID_OPERATION error
     *    is raised."
     */
-   if (numGroups == 0) {
+   if (numQueries == 0) {
       *queryId = 0;
       _mesa_error(ctx, GL_INVALID_OPERATION,
                   "glGetFirstPerfQueryIdINTEL(no queries supported)");
@@ -233,7 +158,8 @@ extern void GLAPIENTRY
 _mesa_GetNextPerfQueryIdINTEL(GLuint queryId, GLuint *nextQueryId)
 {
    GET_CURRENT_CONTEXT(ctx);
-   init_groups(ctx);
+
+   GLuint numQueries;
 
    /* The GL_INTEL_performance_query spec says:
     *
@@ -251,29 +177,27 @@ _mesa_GetNextPerfQueryIdINTEL(GLuint queryId, GLuint *nextQueryId)
       return;
    }
 
-   if (!queryid_valid(ctx, queryId)) {
-      *nextQueryId = 0;
+   numQueries = init_performance_query_info(ctx);
+
+   if (!queryid_valid(ctx, numQueries, queryId)) {
       _mesa_error(ctx, GL_INVALID_VALUE,
                   "glGetNextPerfQueryIdINTEL(invalid query)");
       return;
    }
 
-   ++queryId;
-
-   if (!queryid_valid(ctx, queryId)) {
-      *nextQueryId = 0;
-   } else {
+   if (queryid_valid(ctx, numQueries, ++queryId))
       *nextQueryId = queryId;
-   }
+   else
+      *nextQueryId = 0;
 }
 
 extern void GLAPIENTRY
 _mesa_GetPerfQueryIdByNameINTEL(char *queryName, GLuint *queryId)
 {
    GET_CURRENT_CONTEXT(ctx);
-   unsigned i;
 
-   init_groups(ctx);
+   GLuint numQueries;
+   int i;
 
    /* The GL_INTEL_performance_query spec says:
     *
@@ -286,15 +210,24 @@ _mesa_GetPerfQueryIdByNameINTEL(char *queryName, GLuint *queryId)
       return;
    }
 
-   /* The specification does not state that this produces an error. */
+   /* The specification does not state that this produces an error but
+    * to be consistent with glGetFirstPerfQueryIdINTEL we generate an
+    * INVALID_VALUE error */
    if (!queryId) {
-      _mesa_warning(ctx, "glGetPerfQueryIdByNameINTEL(queryId == NULL)");
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                  "glGetPerfQueryIdByNameINTEL(queryId == NULL)");
       return;
    }
 
-   for (i = 0; i < ctx->PerfQuery.NumGroups; ++i) {
-      const struct gl_perf_monitor_group *group_obj = get_group(ctx, i);
-      if (strcmp(group_obj->Name, queryName) == 0) {
+   numQueries = init_performance_query_info(ctx);
+
+   for (i = 0; i < numQueries; ++i) {
+      const GLchar *name;
+      GLuint ignore;
+
+      ctx->Driver.GetPerfQueryInfo(ctx, i, &name, &ignore, &ignore, &ignore);
+
+      if (strcmp(name, queryName) == 0) {
          *queryId = index_to_queryid(i);
          return;
       }
@@ -306,20 +239,22 @@ _mesa_GetPerfQueryIdByNameINTEL(char *queryName, GLuint *queryId)
 
 extern void GLAPIENTRY
 _mesa_GetPerfQueryInfoINTEL(GLuint queryId,
-                            GLuint queryNameLength, char *queryName,
-                            GLuint *dataSize, GLuint *noCounters,
-                            GLuint *noActiveInstances,
+                            GLuint nameLength, char *name,
+                            GLuint *dataSize,
+                            GLuint *numCounters,
+                            GLuint *numActive,
                             GLuint *capsMask)
 {
    GET_CURRENT_CONTEXT(ctx);
-   unsigned i;
 
-   const struct gl_perf_monitor_group *group_obj;
+   GLuint numQueries = init_performance_query_info(ctx);
+   GLuint queryIndex = queryid_to_index(queryId);
+   const GLchar *queryName;
+   GLuint queryDataSize;
+   GLuint queryNumCounters;
+   GLuint queryNumActive;
 
-   init_groups(ctx);
-
-   group_obj = get_group(ctx, queryid_to_index(queryId));
-   if (group_obj == NULL) {
+   if (!queryid_valid(ctx, numQueries, queryId)) {
       /* The GL_INTEL_performance_query spec says:
        *
        *    "If queryId does not reference a valid query type, an
@@ -330,32 +265,19 @@ _mesa_GetPerfQueryInfoINTEL(GLuint queryId,
       return;
    }
 
-   if (queryName) {
-      strncpy(queryName, group_obj->Name, queryNameLength);
+   ctx->Driver.GetPerfQueryInfo(ctx, queryIndex,
+                                &queryName,
+                                &queryDataSize,
+                                &queryNumCounters,
+                                &queryNumActive);
 
-      /* No specification given about whether the string needs to be
-       * zero-terminated. Zero-terminate the string always as we don't
-       * otherwise communicate the length of the returned string.
-       */
-      if (queryNameLength > 0) {
-         queryName[queryNameLength - 1] = '\0';
-      }
-   }
+   output_clipped_string(name, nameLength, queryName);
 
-   if (dataSize) {
-      unsigned size = 0;
-      for (i = 0; i < group_obj->NumCounters; ++i) {
-         /* What we get from the driver is group id (uint32_t) + counter id
-          * (uint32_t) + value.
-          */
-         size += 2 * sizeof(uint32_t) + _mesa_perf_query_counter_size(&group_obj->Counters[i]);
-      }
-      *dataSize = size;
-   }
+   if (dataSize)
+      *dataSize = queryDataSize;
 
-   if (noCounters) {
-      *noCounters = group_obj->NumCounters;
-   }
+   if (numCounters)
+      *numCounters = queryNumCounters;
 
    /* The GL_INTEL_performance_query spec says:
     *
@@ -366,130 +288,94 @@ _mesa_GetPerfQueryInfoINTEL(GLuint queryId,
     * 2) Another typo in the specification, maxInstances parameter is not listed
     *    in the declaration of this function in the list of new functions.
     */
-   if (noActiveInstances) {
-      *noActiveInstances = _mesa_HashNumEntries(ctx->PerfQuery.Objects);
-   }
+   if (numActive)
+      *numActive = queryNumActive;
 
-   if (capsMask) {
-      /* TODO: This information not yet available in the monitor structs. For
-       * now, we hardcode SINGLE_CONTEXT, since that's what the implementation
-       * currently tries very hard to do.
-       */
+   /* Assume for now that all queries are per-context */
+   if (capsMask)
       *capsMask = GL_PERFQUERY_SINGLE_CONTEXT_INTEL;
-   }
 }
 
 extern void GLAPIENTRY
 _mesa_GetPerfCounterInfoINTEL(GLuint queryId, GLuint counterId,
-                              GLuint counterNameLength, char *counterName,
-                              GLuint counterDescLength, char *counterDesc,
-                              GLuint *counterOffset, GLuint *counterDataSize, GLuint *counterTypeEnum,
-                              GLuint *counterDataTypeEnum, GLuint64 *rawCounterMaxValue)
+                              GLuint nameLength, char *name,
+                              GLuint descLength, char *desc,
+                              GLuint *offset,
+                              GLuint *dataSize,
+                              GLuint *typeEnum,
+                              GLuint *dataTypeEnum,
+                              GLuint64 *rawCounterMaxValue)
 {
    GET_CURRENT_CONTEXT(ctx);
 
-   const struct gl_perf_monitor_group *group_obj;
-   const struct gl_perf_monitor_counter *counter_obj;
-   unsigned counterIndex;
-   unsigned i;
-
-   init_groups(ctx);
-
-   group_obj = get_group(ctx, queryid_to_index(queryId));
-
-   /* The GL_INTEL_performance_query spec says:
-    *
-    *    "If the pair of queryId and counterId does not reference a valid
-    *    counter, an INVALID_VALUE error is generated."
-    */
-   if (group_obj == NULL) {
+   GLuint numQueries = init_performance_query_info(ctx);
+   GLuint queryIndex = queryid_to_index(queryId);
+   const GLchar *queryName;
+   GLuint queryDataSize;
+   GLuint queryNumCounters;
+   GLuint queryNumActive;
+   GLuint counterIndex;
+   const GLchar *counterName;
+   const GLchar *counterDesc;
+   GLuint counterOffset;
+   GLuint counterDataSize;
+   GLuint counterTypeEnum;
+   GLuint counterDataTypeEnum;
+   GLuint64 counterRawMax;
+
+   if (!queryid_valid(ctx, numQueries, queryId)) {
+      /* The GL_INTEL_performance_query spec says:
+       *
+       *    "If the pair of queryId and counterId does not reference a valid
+       *    counter, an INVALID_VALUE error is generated."
+       */
       _mesa_error(ctx, GL_INVALID_VALUE,
                   "glGetPerfCounterInfoINTEL(invalid queryId)");
       return;
    }
 
+   ctx->Driver.GetPerfQueryInfo(ctx, queryIndex,
+                                &queryName,
+                                &queryDataSize,
+                                &queryNumCounters,
+                                &queryNumActive);
+
    counterIndex = counterid_to_index(counterId);
-   counter_obj = get_counter(group_obj, counterIndex);
 
-   if (counter_obj == NULL) {
+   if (counterIndex < 0 || counterIndex >= queryNumCounters) {
       _mesa_error(ctx, GL_INVALID_VALUE,
                   "glGetPerfCounterInfoINTEL(invalid counterId)");
       return;
    }
 
-   if (counterName) {
-      strncpy(counterName, counter_obj->Name, counterNameLength);
+   ctx->Driver.GetPerfCounterInfo(ctx, queryIndex, counterIndex,
+                                  &counterName,
+                                  &counterDesc,
+                                  &counterOffset,
+                                  &counterDataSize,
+                                  &counterTypeEnum,
+                                  &counterDataTypeEnum,
+                                  &counterRawMax);
 
-      /* No specification given about whether the string needs to be
-       * zero-terminated. Zero-terminate the string always as we don't
-       * otherwise communicate the length of the returned string.
-       */
-      if (counterNameLength > 0) {
-         counterName[counterNameLength - 1] = '\0';
-      }
-   }
+   output_clipped_string(name, nameLength, counterName);
+   output_clipped_string(desc, descLength, counterDesc);
 
-   if (counterDesc) {
-      /* TODO: No separate description text at the moment. We pass the name
-       * again for the moment.
-       */
-      strncpy(counterDesc, counter_obj->Name, counterDescLength);
+   if (offset)
+      *offset = counterOffset;
 
-      /* No specification given about whether the string needs to be
-       * zero-terminated. Zero-terminate the string always as we don't
-       * otherwise communicate the length of the returned string.
-       */
-      if (counterDescLength > 0) {
-         counterDesc[counterDescLength - 1] = '\0';
-      }
-   }
+   if (dataSize)
+      *dataSize = counterDataSize;
 
-   if (counterOffset) {
-      unsigned offset = 0;
-      for (i = 0; i < counterIndex; ++i) {
-         /* What we get from the driver is group id (uint32_t) + counter id
-          * (uint32_t) + value.
-          */
-         offset += 2 * sizeof(uint32_t) + _mesa_perf_query_counter_size(&group_obj->Counters[i]);
-      }
-      *counterOffset = 2 * sizeof(uint32_t) + offset;
-   }
+   if (typeEnum)
+      *typeEnum = counterTypeEnum;
 
-   if (counterDataSize) {
-      *counterDataSize = _mesa_perf_query_counter_size(counter_obj);
-   }
+   if (dataTypeEnum)
+      *dataTypeEnum = counterDataTypeEnum;
 
-   if (counterTypeEnum) {
-      /* TODO: Different counter types (semantic type, not data type) not
-       * supported as of yet.
-       */
-      *counterTypeEnum = GL_PERFQUERY_COUNTER_RAW_INTEL;
-   }
-
-   if (counterDataTypeEnum) {
-      switch (counter_obj->Type) {
-      case GL_FLOAT:
-      case GL_PERCENTAGE_AMD:
-         *counterDataTypeEnum = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
-         break;
-      case GL_UNSIGNED_INT:
-         *counterDataTypeEnum = GL_PERFQUERY_COUNTER_DATA_UINT32_INTEL;
-         break;
-      case GL_UNSIGNED_INT64_AMD:
-         *counterDataTypeEnum = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
-         break;
-      default:
-         assert(!"Should not get here: invalid counter type");
-         return;
-      }
-   }
+   if (rawCounterMaxValue)
+      *rawCounterMaxValue = counterRawMax;
 
    if (rawCounterMaxValue) {
-      /* This value is (implicitly) specified to be used only with
-       * GL_PERFQUERY_COUNTER_RAW_INTEL counters. When semantic types for
-       * counters are added, that needs to be checked.
-       */
-
       /* The GL_INTEL_performance_query spec says:
        *
        *    "for some raw counters for which the maximal value is
@@ -497,10 +383,17 @@ _mesa_GetPerfCounterInfoINTEL(GLuint queryId, GLuint counterId,
        *    returned in the location pointed by rawCounterMaxValue, otherwise,
        *    the location is written with the value of 0."
        *
-       * The maximum value reported by the driver at the moment is not with
-       * these semantics, so write 0 always to be safe.
+       *    Since it's very useful to be able to report a maximum value for
+       *    more that just counters using the _COUNTER_RAW_INTEL or
+       *    _COUNTER_DURATION_RAW_INTEL enums (e.g. for a _THROUGHPUT tools
+       *    want to be able to visualize the absolute throughput with respect
+       *    to the theoretical maximum that's possible) and there doesn't seem
+       *    to be any reason not to allow _THROUGHPUT counters to also be
+       *    considerer "raw" here, we always leave it up to the backend to
+       *    decide when it's appropriate to report a maximum counter value or 0
+       *    if not.
        */
-      *rawCounterMaxValue = 0;
+      *rawCounterMaxValue = counterRawMax;
    }
 }
 
@@ -508,46 +401,33 @@ extern void GLAPIENTRY
 _mesa_CreatePerfQueryINTEL(GLuint queryId, GLuint *queryHandle)
 {
    GET_CURRENT_CONTEXT(ctx);
-   GLuint first;
-   GLuint group;
-   const struct gl_perf_monitor_group *group_obj;
-   struct gl_perf_monitor_object *m;
-   unsigned i;
-
-   init_groups(ctx);
-
-   /* This is not specified in the extension, but is the only sane thing to
-    * do.
-    */
-   if (queryHandle == NULL) {
-      _mesa_error(ctx, GL_INVALID_VALUE,
-                  "glCreatePerfQueryINTEL(queryHandle == NULL)");
-      return;
-   }
 
-   group = queryid_to_index(queryId);
-   group_obj = get_group(ctx, group);
+   GLuint numQueries = init_performance_query_info(ctx);
+   GLuint id;
+   struct gl_perf_query_object *obj;
 
    /* The GL_INTEL_performance_query spec says:
     *
     *    "If queryId does not reference a valid query type, an INVALID_VALUE
     *    error is generated."
     */
-   if (group_obj == NULL) {
+   if (!queryid_valid(ctx, numQueries, queryId)) {
       _mesa_error(ctx, GL_INVALID_VALUE,
                   "glCreatePerfQueryINTEL(invalid queryId)");
       return;
    }
 
-   /* The query object created here is the counterpart of a `monitor' in
-    * AMD_performance_monitor. This call is equivalent to calling
-    * GenPerfMonitorsAMD and SelectPerfMonitorCountersAMD with a list of all
-    * counters in a group.
+   /* This is not specified in the extension, but is the only sane thing to
+    * do.
     */
+   if (queryHandle == NULL) {
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                  "glCreatePerfQueryINTEL(queryHandle == NULL)");
+      return;
+   }
 
-   /* We keep the query ids contiguous */
-   first = _mesa_HashFindFreeKeyBlock(ctx->PerfQuery.Objects, 1);
-   if (!first) {
+   id = _mesa_HashFindFreeKeyBlock(ctx->PerfQuery.Objects, 1);
+   if (!id) {
       /* The GL_INTEL_performance_query spec says:
        *
        *    "If the query instance cannot be created due to exceeding the
@@ -555,82 +435,72 @@ _mesa_CreatePerfQueryINTEL(GLuint queryId, GLuint *queryHandle)
        *    an insufficient memory reason, an OUT_OF_MEMORY error is
        *    generated, and the location pointed by queryHandle returns NULL."
       */
-      _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCreatePerfQueryINTEL");
+      _mesa_error_no_memory(__func__);
       return;
    }
 
-   m = new_performance_query(ctx, first);
-   if (m == NULL) {
+   obj = ctx->Driver.NewPerfQueryObject(ctx, queryid_to_index(queryId));
+   if (obj == NULL) {
       _mesa_error_no_memory(__func__);
       return;
    }
 
-   _mesa_HashInsert(ctx->PerfQuery.Objects, first, m);
-   *queryHandle = first;
-
-   ctx->Driver.ResetPerfMonitor(ctx, m);
+   obj->Id = id;
+   obj->Active = false;
+   obj->Ready = false;
 
-   for (i = 0; i < group_obj->NumCounters; ++i) {
-      ++m->ActiveGroups[group];
-      /* Counters are a continuous range of integers, 0 to NumCounters (excl),
-       * so i is the counter value to use here.
-       */
-      BITSET_SET(m->ActiveCounters[group], i);
-   }
+   _mesa_HashInsert(ctx->PerfQuery.Objects, id, obj);
+   *queryHandle = id;
 }
 
 extern void GLAPIENTRY
 _mesa_DeletePerfQueryINTEL(GLuint queryHandle)
 {
    GET_CURRENT_CONTEXT(ctx);
-   struct gl_perf_monitor_object *m;
 
-   /* The queryHandle is the counterpart to AMD_performance_monitor's monitor
-    * id.
-    */
-   m = lookup_query(ctx, queryHandle);
+   struct gl_perf_query_object *obj = lookup_object(ctx, queryHandle);
 
    /* The GL_INTEL_performance_query spec says:
     *
     *    "If a query handle doesn't reference a previously created performance
     *    query instance, an INVALID_VALUE error is generated."
     */
-   if (m == NULL) {
+   if (obj == NULL) {
       _mesa_error(ctx, GL_INVALID_VALUE,
                   "glDeletePerfQueryINTEL(invalid queryHandle)");
       return;
    }
 
-   /* Let the driver stop the query if it's active. */
-   if (m->Active) {
-      ctx->Driver.ResetPerfMonitor(ctx, m);
-      m->Ended = false;
+   /* To avoid complications in the backend we never ask the backend to
+    * delete an active query or a query object while we are still
+    * waiting for data.
+    */
+
+   if (obj->Active)
+      _mesa_EndPerfQueryINTEL(queryHandle);
+
+   if (obj->Used && !obj->Ready) {
+      ctx->Driver.WaitPerfQuery(ctx, obj);
+      obj->Ready = true;
    }
 
    _mesa_HashRemove(ctx->PerfQuery.Objects, queryHandle);
-   ralloc_free(m->ActiveGroups);
-   ralloc_free(m->ActiveCounters);
-   ctx->Driver.DeletePerfMonitor(ctx, m);
+   ctx->Driver.DeletePerfQuery(ctx, obj);
 }
 
 extern void GLAPIENTRY
 _mesa_BeginPerfQueryINTEL(GLuint queryHandle)
 {
    GET_CURRENT_CONTEXT(ctx);
-   struct gl_perf_monitor_object *m;
 
-   /* The queryHandle is the counterpart to AMD_performance_monitor's monitor
-    * id.
-    */
-
-   m = lookup_query(ctx, queryHandle);
+   struct gl_perf_query_object *obj = lookup_object(ctx, queryHandle);
 
    /* The GL_INTEL_performance_query spec says:
     *
     *    "If a query handle doesn't reference a previously created performance
     *    query instance, an INVALID_VALUE error is generated."
     */
-   if (m == NULL) {
+   if (obj == NULL) {
       _mesa_error(ctx, GL_INVALID_VALUE,
                   "glBeginPerfQueryINTEL(invalid queryHandle)");
       return;
@@ -646,15 +516,24 @@ _mesa_BeginPerfQueryINTEL(GLuint queryHandle)
     * We also generate an INVALID_OPERATION error if the driver can't begin
     * a query for its own reasons, and for nesting the same query.
     */
-   if (m->Active) {
+   if (obj->Active) {
       _mesa_error(ctx, GL_INVALID_OPERATION,
                   "glBeginPerfQueryINTEL(already active)");
       return;
    }
 
-   if (ctx->Driver.BeginPerfMonitor(ctx, m)) {
-      m->Active = true;
-      m->Ended = false;
+   /* To avoid complications in the backend we never ask the backend to
+    * reuse a query object and begin a new query while we are still
+    * waiting for data on that object. */
+   if (obj->Used && !obj->Ready) {
+      ctx->Driver.WaitPerfQuery(ctx, obj);
+      obj->Ready = true;
+   }
+
+   if (ctx->Driver.BeginPerfQuery(ctx, obj)) {
+      obj->Used = true;
+      obj->Active = true;
+      obj->Ready = false;
    } else {
       _mesa_error(ctx, GL_INVALID_OPERATION,
                   "glBeginPerfQueryINTEL(driver unable to begin query)");
@@ -665,39 +544,32 @@ extern void GLAPIENTRY
 _mesa_EndPerfQueryINTEL(GLuint queryHandle)
 {
    GET_CURRENT_CONTEXT(ctx);
-   struct gl_perf_monitor_object *m;
 
-   /* The queryHandle is the counterpart to AMD_performance_monitor's monitor
-    * id.
-    */
+   struct gl_perf_query_object *obj = lookup_object(ctx, queryHandle);
 
-   m = lookup_query(ctx, queryHandle);
+   /* Not explicitly covered in the spec, but for consistency... */
+   if (obj == NULL) {
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                  "glEndPerfQueryINTEL(invalid queryHandle)");
+      return;
+   }
 
    /* The GL_INTEL_performance_query spec says:
     *
     *    "If a performance query is not currently started, an
     *    INVALID_OPERATION error will be generated."
-    *
-    * The specification doesn't state that an invalid handle would be an
-    * INVALID_VALUE error. Regardless, query for such a handle will not be
-    * started, so we generate an INVALID_OPERATION in that case too.
     */
-   if (m == NULL) {
-      _mesa_error(ctx, GL_INVALID_OPERATION,
-                  "glEndPerfQueryINTEL(invalid queryHandle)");
-      return;
-   }
 
-   if (!m->Active) {
+   if (!obj->Active) {
       _mesa_error(ctx, GL_INVALID_OPERATION,
                   "glEndPerfQueryINTEL(not active)");
       return;
    }
 
-   ctx->Driver.EndPerfMonitor(ctx, m);
+   ctx->Driver.EndPerfQuery(ctx, obj);
 
-   m->Active = false;
-   m->Ended = true;
+   obj->Active = false;
+   obj->Ready = false;
 }
 
 extern void GLAPIENTRY
@@ -705,8 +577,15 @@ _mesa_GetPerfQueryDataINTEL(GLuint queryHandle, GLuint flags,
                             GLsizei dataSize, void *data, GLuint *bytesWritten)
 {
    GET_CURRENT_CONTEXT(ctx);
-   struct gl_perf_monitor_object *m;
-   bool result_available;
+
+   struct gl_perf_query_object *obj = lookup_object(ctx, queryHandle);
+
+   /* Not explicitly covered in the spec, but for consistency... */
+   if (obj == NULL) {
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                  "glEndPerfQueryINTEL(invalid queryHandle)");
+      return;
+   }
 
    /* The GL_INTEL_performance_query spec says:
     *
@@ -719,66 +598,32 @@ _mesa_GetPerfQueryDataINTEL(GLuint queryHandle, GLuint flags,
       return;
    }
 
-   /* The queryHandle is the counterpart to AMD_performance_monitor's monitor
-    * id.
-    */
-
-   m = lookup_query(ctx, queryHandle);
-
-   /* The specification doesn't state that an invalid handle generates an
-    * error. We could interpret that to mean the case should be handled as
-    * "measurement not ready for this query", but what should be done if
-    * `flags' equals PERFQUERY_WAIT_INTEL?
-    *
-    * To resolve this, we just generate an INVALID_VALUE from an invalid query
-    * handle.
-    */
-   if (m == NULL) {
-      _mesa_error(ctx, GL_INVALID_VALUE,
-                  "glGetPerfQueryDataINTEL(invalid queryHandle)");
-      return;
-   }
-
-   /* We need at least enough room for a single value. */
-   if (dataSize < sizeof(GLuint)) {
-      *bytesWritten = 0;
-      return;
-   }
+   /* Just for good measure in case a lazy application is only
+    * checking this and not checking for errors... */
+   *bytesWritten = 0;
 
-   /* The GL_INTEL_performance_query spec says:
-    *
-    *    "The call may end without returning any data if they are not ready
-    *    for reading as the measurement session is still pending (the
-    *    EndPerfQueryINTEL() command processing is not finished by
-    *    hardware). In this case location pointed by the bytesWritten
-    *    parameter will be set to 0."
-    *
-    * If EndPerfQueryINTEL() is not called at all, we follow this.
+   /* Not explicitly covered in the spec but to be consistent with
+    * EndPerfQuery which validates that an application only ends an
+    * active query we also validate that an application doesn't try
+    * and get the data for a still active query...
     */
-   if (!m->Ended) {
-      *bytesWritten = 0;
+   if (obj->Active) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "glGetPerfQueryDataINTEL(query still active)");
       return;
    }
 
-   result_available = ctx->Driver.IsPerfMonitorResultAvailable(ctx, m);
+   obj->Ready = ctx->Driver.IsPerfQueryReady(ctx, obj);
 
-   if (!result_available) {
+   if (!obj->Ready) {
       if (flags == GL_PERFQUERY_FLUSH_INTEL) {
          ctx->Driver.Flush(ctx);
       } else if (flags == GL_PERFQUERY_WAIT_INTEL) {
-         /* Assume Finish() is both enough and not too much to wait for
-          * results. If results are still not available after Finish(), the
-          * later code automatically bails out with 0 for bytesWritten.
-          */
-         ctx->Driver.Finish(ctx);
-         result_available =
-            ctx->Driver.IsPerfMonitorResultAvailable(ctx, m);
+         ctx->Driver.WaitPerfQuery(ctx, obj);
+         obj->Ready = true;
       }
    }
 
-   if (result_available) {
-      ctx->Driver.GetPerfMonitorResult(ctx, m, dataSize, data, (GLint*)bytesWritten);
-   } else {
-      *bytesWritten = 0;
-   }
+   if (obj->Ready)
+      ctx->Driver.GetPerfQueryData(ctx, obj, dataSize, data, bytesWritten);
 }
diff --git a/src/mesa/main/performance_query.h b/src/mesa/main/performance_query.h
index 3fed5eae1b..8268f0ef19 100644
--- a/src/mesa/main/performance_query.h
+++ b/src/mesa/main/performance_query.h
@@ -1,5 +1,5 @@
 /*
- * Copyright © 2012 Intel Corporation
+ * Copyright © 2012,2015 Intel Corporation
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -38,10 +38,6 @@ _mesa_init_performance_queries(struct gl_context *ctx);
 extern void
 _mesa_free_performance_queries(struct gl_context *ctx);
 
-unsigned
-_mesa_perf_query_counter_size(const struct gl_perf_monitor_counter *);
-
-
 extern void GLAPIENTRY
 _mesa_GetFirstPerfQueryIdINTEL(GLuint *queryId);
 
-- 
2.11.1