[Mesa-dev] [RFC 3/6] Model INTEL perf query backend after query object BE

Mon May 11 08:11:11 PDT 2015

Patches 1 and 2 look fine to me.

See my comments below for this one.

On 05/06/2015 02:53 AM, Robert Bragg wrote:
> Instead of using the same backend interface as AMD_performance_monitor
> this defines a dedicated INTEL_performance_query interface that is based
> on the ARB_query_buffer_object interface (considering the similarity of
> the extensions) with the addition of vfuncs for enumerating queries and
> their counters.
>
> Compared to the previous backend, some notable differences are:
>
> - The backend is free to represent counters using whatever data
>    structures are optimal/convenient since queries and counters are
>    enumerated via an iterator api instead of declaring them using
>    structures directly shared with the frontend.
>
>    This is also done to help us support the full range of data and
>    semantic types available with INTEL_performance_query which is awkward
>    while using a structure shared with the AMD_performance_monitor
>    backend since neither extension's types are a subset of the other.
>
> - The backend must support waiting for a query instead of the frontend
>    simply using glFinish().
>
> - Objects go through 'Active' and 'Ready' states consistent with the
>    query object backend (hopefully making them more familiar). There is
>    no 'Ended' state (which used to show that a query has ended at least
>    once for a given object). There is a new 'Used' state similar to the
>    'EverBound' state of query objects, set when a query is first begun
>    which implies that we are expecting to get results back for the object
>    at some point.
>
> The INTEL_performance_query and AMD_performance_monitor extensions are
> now completely orthogonal within Mesa (though a driver could optionally
> choose to implement both extensions within a unified backend if that
> were convenient for the sake of sharing state/code).
>
> Signed-off-by: Robert Bragg <robert at sixbynine.org>
> ---
>   src/mesa/main/dd.h                |  39 +++
>   src/mesa/main/mtypes.h            |  25 +-
>   src/mesa/main/performance_query.c | 590 ++++++++++++++------------------------
>   src/mesa/main/performance_query.h |   6 +-
>   4 files changed, 275 insertions(+), 385 deletions(-)
>
> diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
> index 0c1a13f..4ba1524 100644
> --- a/src/mesa/main/dd.h
> +++ b/src/mesa/main/dd.h
> @@ -759,6 +759,45 @@ struct dd_function_table {
>                                   GLint *bytesWritten);
>      /*@}*/
>   
> +   /**
> +    * \name Performance Query objects
> +    */
> +   /*@{*/
> +   void (*GetPerfQueryInfo)(struct gl_context *ctx,
> +                            int queryIndex,
> +                            const char **name,
> +                            GLuint *dataSize,
> +                            GLuint *numCounters,
> +                            GLuint *numActive);
> +   void (*GetPerfCounterInfo)(struct gl_context *ctx,
> +                              int queryIndex,
> +                              int counterIndex,
> +                              const char **name,
> +                              const char **desc,
> +                              GLuint *offset,
> +                              GLuint *data_size,
> +                              GLuint *type_enum,
> +                              GLuint *data_type_enum,
> +                              GLuint64 *raw_max);
> +   struct gl_perf_query_object * (*NewPerfQueryObject)(struct gl_context *ctx,
> +                                                       int queryIndex);
> +   void (*DeletePerfQuery)(struct gl_context *ctx,
> +                           struct gl_perf_query_object *obj);
> +   GLboolean (*BeginPerfQuery)(struct gl_context *ctx,
> +                               struct gl_perf_query_object *obj);
> +   void (*EndPerfQuery)(struct gl_context *ctx,
> +                        struct gl_perf_query_object *obj);
> +   void (*WaitPerfQuery)(struct gl_context *ctx,
> +                         struct gl_perf_query_object *obj);
> +   GLboolean (*IsPerfQueryReady)(struct gl_context *ctx,
> +                                 struct gl_perf_query_object *obj);
> +   void (*GetPerfQueryData)(struct gl_context *ctx,
> +                            struct gl_perf_query_object *obj,
> +                            GLsizei dataSize,
> +                            GLuint *data,
> +                            GLuint *bytesWritten);
> +   /*@}*/
> +
>   
>      /**
>       * \name Vertex Array objects
> diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
> index b1e5fa9..a26109d 100644
> --- a/src/mesa/main/mtypes.h
> +++ b/src/mesa/main/mtypes.h
> @@ -2014,6 +2014,23 @@ struct gl_perf_monitor_group
>   
>   
>   /**
> + * A query object instance as described in INTEL_performance_query.
> + *
> + * NB: We want to keep this and the corresponding backend structure
> + * relatively lean considering that applications may expect to
> + * allocate enough objects to be able to query around all draw calls
> + * in a frame.
> + */
> +struct gl_perf_query_object
> +{
> +   GLuint Id;        /**< hash table ID/name */
> +   GLuint Used:1;    /**< has been used for 1 or more queries */
> +   GLuint Active:1;  /**< inside Begin/EndPerfQuery */
> +   GLuint Ready:1;   /**< result is ready? */
> +};
> +
> +
> +/**
>    * Context state for AMD_performance_monitor.
>    */
>   struct gl_perf_monitor_state
> @@ -2032,12 +2049,8 @@ struct gl_perf_monitor_state
>    */
>   struct gl_perf_query_state
>   {
> -   /** Array of performance monitor groups (indexed by group ID) */
> -   const struct gl_perf_monitor_group *Groups;
> -   GLuint NumGroups;
> -
> -   /** The table of all performance query objects. */
> -   struct _mesa_HashTable *Objects;
> +   struct _mesa_HashTable *Objects; /**< The table of all performance query objects */
> +   GLuint NumQueries;
>   };
>   
>   
> diff --git a/src/mesa/main/performance_query.c b/src/mesa/main/performance_query.c
> index 355836e..57d6f8e 100644
> --- a/src/mesa/main/performance_query.c
> +++ b/src/mesa/main/performance_query.c
> @@ -1,5 +1,5 @@
>   /*
> - * Copyright © 2012 Intel Corporation
> + * Copyright © 2012-2015 Intel Corporation
>    *
>    * Permission is hereby granted, free of charge, to any person obtaining a
>    * copy of this software and associated documentation files (the "Software"),
> @@ -24,14 +24,6 @@
>   /**
>    * \file performance_query.c
>    * Core Mesa support for the INTEL_performance_query extension.
> - *
> - * In order to implement this extension, start by defining two enums:
> - * one for Groups, and one for Counters.  These will be used as indexes into
> - * arrays, so they should start at 0 and increment from there.
> - *
> - * Counter IDs need to be globally unique.  That is, you can't have counter 7
> - * in group A and counter 7 in group B.  A global enum of all available
> - * counters is a convenient way to guarantee this.
>    */
>   
>   #include <stdbool.h>
> @@ -42,66 +34,21 @@
>   #include "macros.h"
>   #include "mtypes.h"
>   #include "performance_query.h"
> -#include "util/bitset.h"
>   #include "util/ralloc.h"
>   
>   void
>   _mesa_init_performance_queries(struct gl_context *ctx)
>   {
>      ctx->PerfQuery.Objects = _mesa_NewHashTable();
> -   ctx->PerfQuery.NumGroups = 0;

Why don't you initialize NumQueries here?

> -   ctx->PerfQuery.Groups = NULL;
> -}
> -
> -static struct gl_perf_monitor_object *
> -new_performance_query(struct gl_context *ctx, GLuint index)
> -{
> -   unsigned i;
> -   struct gl_perf_monitor_object *m = ctx->Driver.NewPerfMonitor(ctx);
> -
> -   if (m == NULL)
> -      return NULL;
> -
> -   m->Name = index;
> -
> -   m->Active = false;
> -
> -   m->ActiveGroups =
> -      rzalloc_array(NULL, unsigned, ctx->PerfQuery.NumGroups);
> -
> -   m->ActiveCounters =
> -      ralloc_array(NULL, BITSET_WORD *, ctx->PerfQuery.NumGroups);
> -
> -   if (m->ActiveGroups == NULL || m->ActiveCounters == NULL)
> -      goto fail;
> -
> -   for (i = 0; i < ctx->PerfQuery.NumGroups; i++) {
> -      const struct gl_perf_monitor_group *g = &ctx->PerfQuery.Groups[i];
> -
> -      m->ActiveCounters[i] = rzalloc_array(m->ActiveCounters, BITSET_WORD,
> -                                           BITSET_WORDS(g->NumCounters));
> -      if (m->ActiveCounters[i] == NULL)
> -         goto fail;
> -   }
> -
> -   return m;
> -
> -fail:
> -   ralloc_free(m->ActiveGroups);
> -   ralloc_free(m->ActiveCounters);
> -   ctx->Driver.DeletePerfMonitor(ctx, m);
> -   return NULL;
>   }
>   
>   static void
>   free_performance_query(GLuint key, void *data, void *user)
>   {
> -   struct gl_perf_monitor_object *m = data;
> +   struct gl_perf_query_object *m = data;
>      struct gl_context *ctx = user;
>   
> -   ralloc_free(m->ActiveGroups);
> -   ralloc_free(m->ActiveCounters);
> -   ctx->Driver.DeletePerfMonitor(ctx, m);
> +   ctx->Driver.DeletePerfQuery(ctx, m);
>   }
>   
>   void
> @@ -112,34 +59,13 @@ _mesa_free_performance_queries(struct gl_context *ctx)
>      _mesa_DeleteHashTable(ctx->PerfQuery.Objects);
>   }
>   
> -static inline struct gl_perf_monitor_object *
> -lookup_query(struct gl_context *ctx, GLuint id)
> -{
> -   return (struct gl_perf_monitor_object *)
> -      _mesa_HashLookup(ctx->PerfQuery.Objects, id);
> -}
> -
> -static inline const struct gl_perf_monitor_group *
> -get_group(const struct gl_context *ctx, GLuint id)
> +static inline struct gl_perf_query_object *
> +lookup_object(struct gl_context *ctx, GLuint id)
>   {
> -   if (id >= ctx->PerfQuery.NumGroups)
> -      return NULL;
> -
> -   return &ctx->PerfQuery.Groups[id];
> -}
> -
> -static inline const struct gl_perf_monitor_counter *
> -get_counter(const struct gl_perf_monitor_group *group_obj, GLuint id)
> -{
> -   if (id >= group_obj->NumCounters)
> -      return NULL;
> -
> -   return &group_obj->Counters[id];
> +   return _mesa_HashLookup(ctx->PerfQuery.Objects, id);

I guess _mesa_HashLookup() returns NULL when it cannot find the element, 
right?

>   }
>   
> -/* For INTEL_performance_query, query id 0 is reserved to be invalid. We use
> - * index to Groups array + 1 as the query id. Same applies to counter id.
> - */
> +/* For INTEL_performance_query, query id 0 is reserved to be invalid. */
>   static inline GLuint
>   queryid_to_index(GLuint queryid)
>   {
> @@ -155,7 +81,8 @@ index_to_queryid(GLuint index)
>   static inline bool
>   queryid_valid(const struct gl_context *ctx, GLuint queryid)
>   {
> -   return get_group(ctx, queryid_to_index(queryid)) != NULL;
> +   GLuint index = queryid_to_index(queryid);
> +   return index >= 0 && index < ctx->PerfQuery.NumQueries;
>   }
>   
>   static inline GLuint
> @@ -164,33 +91,29 @@ counterid_to_index(GLuint counterid)
>      return counterid - 1;
>   }
>   
> -/*****************************************************************************/
> -
> -/**
> - * Returns how many bytes a counter's value takes up.
> - */
> -unsigned
> -_mesa_perf_query_counter_size(const struct gl_perf_monitor_counter *c)
> +static void
> +return_string(char *stringRet, GLuint stringMaxLen, const char *string)

In my opinion, the name of this function is a bit ambiguous. :-)

>   {
> -   switch (c->Type) {
> -   case GL_FLOAT:
> -   case GL_PERCENTAGE_AMD:
> -      return sizeof(GLfloat);
> -   case GL_UNSIGNED_INT:
> -      return sizeof(GLuint);
> -   case GL_UNSIGNED_INT64_AMD:
> -      return sizeof(uint64_t);
> -   default:
> -      assert(!"Should not get here: invalid counter type");
> -      return 0;
> -   }
> +   if (!stringRet)
> +      return;
> +
> +   strncpy(stringRet, string ? string : "", stringMaxLen);
> +
> +   /* No specification given about whether returned strings needs
> +    * to be zero-terminated. Zero-terminate the string always as we
> +    * don't otherwise communicate the length of the returned
> +    * string.
> +    */
> +   if (stringMaxLen > 0)
> +      stringRet[stringMaxLen - 1] = '\0';
>   }
>   
> +/*****************************************************************************/
> +
>   extern void GLAPIENTRY
>   _mesa_GetFirstPerfQueryIdINTEL(GLuint *queryId)
>   {
>      GET_CURRENT_CONTEXT(ctx);
> -   unsigned numGroups;
>   
>      /* The GL_INTEL_performance_query spec says:
>       *
> @@ -202,15 +125,13 @@ _mesa_GetFirstPerfQueryIdINTEL(GLuint *queryId)
>         return;
>      }
>   
> -   numGroups = ctx->PerfQuery.NumGroups;
> -
>      /* The GL_INTEL_performance_query spec says:
>       *
>       *    "If the given hardware platform doesn't support any performance
>       *    queries, then the value of 0 is returned and INVALID_OPERATION error
>       *    is raised."
>       */
> -   if (numGroups == 0) {
> +   if (ctx->PerfQuery.NumQueries == 0) {
>         *queryId = 0;
>         _mesa_error(ctx, GL_INVALID_OPERATION,
>                     "glGetFirstPerfQueryIdINTEL(no queries supported)");
> @@ -242,26 +163,23 @@ _mesa_GetNextPerfQueryIdINTEL(GLuint queryId, GLuint *nextQueryId)
>      }
>   
>      if (!queryid_valid(ctx, queryId)) {
> -      *nextQueryId = 0;
>         _mesa_error(ctx, GL_INVALID_VALUE,
>                     "glGetNextPerfQueryIdINTEL(invalid query)");
>         return;
>      }
>   
> -   ++queryId;
> -
> -   if (!queryid_valid(ctx, queryId)) {
> -      *nextQueryId = 0;
> -   } else {
> +   if (queryid_valid(ctx, ++queryId))
>         *nextQueryId = queryId;
> -   }
> +   else
> +      *nextQueryId = 0;
>   }
>   
>   extern void GLAPIENTRY
>   _mesa_GetPerfQueryIdByNameINTEL(char *queryName, GLuint *queryId)
>   {
>      GET_CURRENT_CONTEXT(ctx);
> -   unsigned i;
> +
> +   int i;
>   
>      /* The GL_INTEL_performance_query spec says:
>       *
> @@ -274,15 +192,22 @@ _mesa_GetPerfQueryIdByNameINTEL(char *queryName, GLuint *queryId)
>         return;
>      }
>   
> -   /* The specification does not state that this produces an error. */
> +   /* The specification does not state that this produces an error but
> +    * to be consistent with glGetFirstPerfQueryIdINTEL we generate an
> +    * INVALID_VALUE error */
>      if (!queryId) {
> -      _mesa_warning(ctx, "glGetPerfQueryIdByNameINTEL(queryId == NULL)");
> +      _mesa_error(ctx, GL_INVALID_VALUE,
> +                  "glGetPerfQueryIdByNameINTEL(queryId == NULL)");
>         return;
>      }
>   
> -   for (i = 0; i < ctx->PerfQuery.NumGroups; ++i) {
> -      const struct gl_perf_monitor_group *group_obj = get_group(ctx, i);
> -      if (strcmp(group_obj->Name, queryName) == 0) {
> +   for (i = 0; i < ctx->PerfQuery.NumQueries; ++i) {
> +      const GLchar *name;
> +      GLuint ignore;
> +
> +      ctx->Driver.GetPerfQueryInfo(ctx, i, &name, &ignore, &ignore, &ignore);

Why these parameters are currently ignored?

> +
> +      if (strcmp(name, queryName) == 0) {
>            *queryId = index_to_queryid(i);
>            return;
>         }
> @@ -294,18 +219,21 @@ _mesa_GetPerfQueryIdByNameINTEL(char *queryName, GLuint *queryId)
>   
>   extern void GLAPIENTRY
>   _mesa_GetPerfQueryInfoINTEL(GLuint queryId,
> -                            GLuint queryNameLength, char *queryName,
> -                            GLuint *dataSize, GLuint *noCounters,
> -                            GLuint *noActiveInstances,
> +                            GLuint nameLength, char *name,
> +                            GLuint *dataSize,
> +                            GLuint *numCounters,
> +                            GLuint *numActive,
>                               GLuint *capsMask)
>   {
>      GET_CURRENT_CONTEXT(ctx);
> -   unsigned i;
>   
> -   const struct gl_perf_monitor_group *group_obj =
> -      get_group(ctx, queryid_to_index(queryId));
> +   GLuint queryIndex = queryid_to_index(queryId);
> +   const GLchar *queryName;
> +   GLuint queryDataSize;
> +   GLuint queryNumCounters;
> +   GLuint queryNumActive;
>   
> -   if (group_obj == NULL) {
> +   if (!queryid_valid(ctx, queryId)) {
>         /* The GL_INTEL_performance_query spec says:
>          *
>          *    "If queryId does not reference a valid query type, an
> @@ -316,32 +244,19 @@ _mesa_GetPerfQueryInfoINTEL(GLuint queryId,
>         return;
>      }
>   
> -   if (queryName) {
> -      strncpy(queryName, group_obj->Name, queryNameLength);
> +   ctx->Driver.GetPerfQueryInfo(ctx, queryIndex,
> +                                &queryName,
> +                                &queryDataSize,
> +                                &queryNumCounters,
> +                                &queryNumActive);
>   
> -      /* No specification given about whether the string needs to be
> -       * zero-terminated. Zero-terminate the string always as we don't
> -       * otherwise communicate the length of the returned string.
> -       */
> -      if (queryNameLength > 0) {
> -         queryName[queryNameLength - 1] = '\0';
> -      }
> -   }
> +   return_string(name, nameLength, queryName);
>   
> -   if (dataSize) {
> -      unsigned size = 0;
> -      for (i = 0; i < group_obj->NumCounters; ++i) {
> -         /* What we get from the driver is group id (uint32_t) + counter id
> -          * (uint32_t) + value.
> -          */
> -         size += 2 * sizeof(uint32_t) + _mesa_perf_query_counter_size(&group_obj->Counters[i]);
> -      }
> -      *dataSize = size;
> -   }
> +   if (dataSize)
> +      *dataSize = queryDataSize;
>   
> -   if (noCounters) {
> -      *noCounters = group_obj->NumCounters;
> -   }
> +   if (numCounters)
> +      *numCounters = queryNumCounters;
>   
>      /* The GL_INTEL_performance_query spec says:
>       *
> @@ -352,128 +267,93 @@ _mesa_GetPerfQueryInfoINTEL(GLuint queryId,
>       * 2) Another typo in the specification, maxInstances parameter is not listed
>       *    in the declaration of this function in the list of new functions.
>       */
> -   if (noActiveInstances) {
> -      *noActiveInstances = _mesa_HashNumEntries(ctx->PerfQuery.Objects);
> -   }
> +   if (numActive)
> +      *numActive = queryNumActive;
>   
> -   if (capsMask) {
> -      /* TODO: This information not yet available in the monitor structs. For
> -       * now, we hardcode SINGLE_CONTEXT, since that's what the implementation
> -       * currently tries very hard to do.
> -       */
> +   /* Assume for now that all queries are per-context */
> +   if (capsMask)
>         *capsMask = GL_PERFQUERY_SINGLE_CONTEXT_INTEL;
> -   }

What is your plan about this? Are you going to support multiple contexts 
in the near future?

>   }
>   
>   extern void GLAPIENTRY
>   _mesa_GetPerfCounterInfoINTEL(GLuint queryId, GLuint counterId,
> -                              GLuint counterNameLength, char *counterName,
> -                              GLuint counterDescLength, char *counterDesc,
> -                              GLuint *counterOffset, GLuint *counterDataSize, GLuint *counterTypeEnum,
> -                              GLuint *counterDataTypeEnum, GLuint64 *rawCounterMaxValue)
> +                              GLuint nameLength, char *name,
> +                              GLuint descLength, char *desc,
> +                              GLuint *offset,
> +                              GLuint *dataSize,
> +                              GLuint *typeEnum,
> +                              GLuint *dataTypeEnum,
> +                              GLuint64 *rawCounterMaxValue)
>   {
>      GET_CURRENT_CONTEXT(ctx);
>   
> -   const struct gl_perf_monitor_group *group_obj;
> -   const struct gl_perf_monitor_counter *counter_obj;
> -   unsigned counterIndex;
> -   unsigned i;
> -
> -   group_obj = get_group(ctx, queryid_to_index(queryId));
> +   GLuint queryIndex = queryid_to_index(queryId);
> +   const GLchar *queryName;
> +   GLuint queryDataSize;
> +   GLuint queryNumCounters;
> +   GLuint queryNumActive;
> +   GLuint counterIndex;
> +   const GLchar *counterName;
> +   const GLchar *counterDesc;
> +   GLuint counterOffset;
> +   GLuint counterDataSize;
> +   GLuint counterTypeEnum;
> +   GLuint counterDataTypeEnum;
> +   GLuint64 counterRawMax;
>   
> -   /* The GL_INTEL_performance_query spec says:
> -    *
> -    *    "If the pair of queryId and counterId does not reference a valid
> -    *    counter, an INVALID_VALUE error is generated."
> -    */
> -   if (group_obj == NULL) {
> +   if (!queryid_valid(ctx, queryId)) {
> +      /* The GL_INTEL_performance_query spec says:
> +       *
> +       *    "If the pair of queryId and counterId does not reference a valid
> +       *    counter, an INVALID_VALUE error is generated."
> +       */
>         _mesa_error(ctx, GL_INVALID_VALUE,
>                     "glGetPerfCounterInfoINTEL(invalid queryId)");
>         return;
>      }
>   
> +   ctx->Driver.GetPerfQueryInfo(ctx, queryIndex,
> +                                &queryName,
> +                                &queryDataSize,
> +                                &queryNumCounters,
> +                                &queryNumActive);
> +
>      counterIndex = counterid_to_index(counterId);
> -   counter_obj = get_counter(group_obj, counterIndex);
>   
> -   if (counter_obj == NULL) {
> +   if (counterIndex < 0 || counterIndex >= queryNumCounters) {
>         _mesa_error(ctx, GL_INVALID_VALUE,
>                     "glGetPerfCounterInfoINTEL(invalid counterId)");
>         return;
>      }
>   
> -   if (counterName) {
> -      strncpy(counterName, counter_obj->Name, counterNameLength);
> +   ctx->Driver.GetPerfCounterInfo(ctx, queryIndex, counterIndex,
> +                                  &counterName,
> +                                  &counterDesc,
> +                                  &counterOffset,
> +                                  &counterDataSize,
> +                                  &counterTypeEnum,
> +                                  &counterDataTypeEnum,
> +                                  &counterRawMax);
>   
> -      /* No specification given about whether the string needs to be
> -       * zero-terminated. Zero-terminate the string always as we don't
> -       * otherwise communicate the length of the returned string.
> -       */
> -      if (counterNameLength > 0) {
> -         counterName[counterNameLength - 1] = '\0';
> -      }
> -   }
> +   return_string(name, nameLength, counterName);
> +   return_string(desc, descLength, counterDesc);
>   
> -   if (counterDesc) {
> -      /* TODO: No separate description text at the moment. We pass the name
> -       * again for the moment.
> -       */
> -      strncpy(counterDesc, counter_obj->Name, counterDescLength);
> -
> -      /* No specification given about whether the string needs to be
> -       * zero-terminated. Zero-terminate the string always as we don't
> -       * otherwise communicate the length of the returned string.
> -       */
> -      if (counterDescLength > 0) {
> -         counterDesc[counterDescLength - 1] = '\0';
> -      }
> -   }
> +   if (offset)
> +      *offset = counterOffset;
>   
> -   if (counterOffset) {
> -      unsigned offset = 0;
> -      for (i = 0; i < counterIndex; ++i) {
> -         /* What we get from the driver is group id (uint32_t) + counter id
> -          * (uint32_t) + value.
> -          */
> -         offset += 2 * sizeof(uint32_t) + _mesa_perf_query_counter_size(&group_obj->Counters[i]);
> -      }
> -      *counterOffset = 2 * sizeof(uint32_t) + offset;
> -   }
> +   if (dataSize)
> +      *dataSize = counterDataSize;
>   
> -   if (counterDataSize) {
> -      *counterDataSize = _mesa_perf_query_counter_size(counter_obj);
> -   }
> +   if (typeEnum)
> +      *typeEnum = counterTypeEnum;
>   
> -   if (counterTypeEnum) {
> -      /* TODO: Different counter types (semantic type, not data type) not
> -       * supported as of yet.
> -       */
> -      *counterTypeEnum = GL_PERFQUERY_COUNTER_RAW_INTEL;
> -   }
> +   if (dataTypeEnum)
> +      *dataTypeEnum = counterDataTypeEnum;
>   
> -   if (counterDataTypeEnum) {
> -      switch (counter_obj->Type) {
> -      case GL_FLOAT:
> -      case GL_PERCENTAGE_AMD:
> -         *counterDataTypeEnum = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
> -         break;
> -      case GL_UNSIGNED_INT:
> -         *counterDataTypeEnum = GL_PERFQUERY_COUNTER_DATA_UINT32_INTEL;
> -         break;
> -      case GL_UNSIGNED_INT64_AMD:
> -         *counterDataTypeEnum = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
> -         break;
> -      default:
> -         assert(!"Should not get here: invalid counter type");
> -         return;
> -      }
> -   }
> +   if (rawCounterMaxValue)
> +      *rawCounterMaxValue = counterRawMax;
>   
>      if (rawCounterMaxValue) {
> -      /* This value is (implicitly) specified to be used only with
> -       * GL_PERFQUERY_COUNTER_RAW_INTEL counters. When semantic types for
> -       * counters are added, that needs to be checked.
> -       */
> -
>         /* The GL_INTEL_performance_query spec says:
>          *
>          *    "for some raw counters for which the maximal value is
> @@ -481,10 +361,17 @@ _mesa_GetPerfCounterInfoINTEL(GLuint queryId, GLuint counterId,
>          *    returned in the location pointed by rawCounterMaxValue, otherwise,
>          *    the location is written with the value of 0."
>          *
> -       * The maximum value reported by the driver at the moment is not with
> -       * these semantics, so write 0 always to be safe.
> +       *    Since it's very useful to be able to report a maximum value for
> +       *    more that just counters using the _COUNTER_RAW_INTEL or
> +       *    _COUNTER_DURATION_RAW_INTEL enums (e.g. for a _THROUGHPUT tools
> +       *    want to be able to visualize the absolute throughput with respect
> +       *    to the theoretical maximum that's possible) and there doesn't seem
> +       *    to be any reason not to allow _THROUGHPUT counters to also be
> +       *    considerer "raw" here, we always leave it up to the backend to
> +       *    decide when it's appropriate to report a maximum counter value or 0
> +       *    if not.
>          */
> -      *rawCounterMaxValue = 0;
> +      *rawCounterMaxValue = counterRawMax;
>      }
>   }
>   
> @@ -492,44 +379,32 @@ extern void GLAPIENTRY
>   _mesa_CreatePerfQueryINTEL(GLuint queryId, GLuint *queryHandle)
>   {
>      GET_CURRENT_CONTEXT(ctx);
> -   GLuint first;
> -   GLuint group;
> -   const struct gl_perf_monitor_group *group_obj;
> -   struct gl_perf_monitor_object *m;
> -   unsigned i;
> -
> -   /* This is not specified in the extension, but is the only sane thing to
> -    * do.
> -    */
> -   if (queryHandle == NULL) {
> -      _mesa_error(ctx, GL_INVALID_VALUE,
> -                  "glCreatePerfQueryINTEL(queryHandle == NULL)");
> -      return;
> -   }
>   
> -   group = queryid_to_index(queryId);
> -   group_obj = get_group(ctx, group);
> +   GLuint id;
> +   struct gl_perf_query_object *obj;
>   
>      /* The GL_INTEL_performance_query spec says:
>       *
>       *    "If queryId does not reference a valid query type, an INVALID_VALUE
>       *    error is generated."
>       */
> -   if (group_obj == NULL) {
> +   if (!queryid_valid(ctx, queryId)) {
>         _mesa_error(ctx, GL_INVALID_VALUE,
>                     "glCreatePerfQueryINTEL(invalid queryId)");
>         return;
>      }
>   
> -   /* The query object created here is the counterpart of a `monitor' in
> -    * AMD_performance_monitor. This call is equivalent to calling
> -    * GenPerfMonitorsAMD and SelectPerfMonitorCountersAMD with a list of all
> -    * counters in a group.
> +   /* This is not specified in the extension, but is the only sane thing to
> +    * do.
>       */
> +   if (queryHandle == NULL) {
> +      _mesa_error(ctx, GL_INVALID_VALUE,
> +                  "glCreatePerfQueryINTEL(queryHandle == NULL)");
> +      return;
> +   }
>   
> -   /* We keep the query ids contiguous */
> -   first = _mesa_HashFindFreeKeyBlock(ctx->PerfQuery.Objects, 1);
> -   if (!first) {
> +   id = _mesa_HashFindFreeKeyBlock(ctx->PerfQuery.Objects, 1);
> +   if (!id) {
>         /* The GL_INTEL_performance_query spec says:
>          *
>          *    "If the query instance cannot be created due to exceeding the
> @@ -537,82 +412,72 @@ _mesa_CreatePerfQueryINTEL(GLuint queryId, GLuint *queryHandle)
>          *    an insufficient memory reason, an OUT_OF_MEMORY error is
>          *    generated, and the location pointed by queryHandle returns NULL."
>         */
> -      _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCreatePerfQueryINTEL");
> +      _mesa_error_no_memory(__func__);
>         return;
>      }
>   
> -   m = new_performance_query(ctx, first);
> -   if (m == NULL) {
> +   obj = ctx->Driver.NewPerfQueryObject(ctx, queryid_to_index(queryId));
> +   if (obj == NULL) {
>         _mesa_error_no_memory(__func__);
>         return;
>      }
>   
> -   _mesa_HashInsert(ctx->PerfQuery.Objects, first, m);
> -   *queryHandle = first;
> +   obj->Id = id;
> +   obj->Active = false;
> +   obj->Ready = false;
>   
> -   ctx->Driver.ResetPerfMonitor(ctx, m);
> -
> -   for (i = 0; i < group_obj->NumCounters; ++i) {
> -      ++m->ActiveGroups[group];
> -      /* Counters are a continuous range of integers, 0 to NumCounters (excl),
> -       * so i is the counter value to use here.
> -       */
> -      BITSET_SET(m->ActiveCounters[group], i);
> -   }
> +   _mesa_HashInsert(ctx->PerfQuery.Objects, id, obj);
> +   *queryHandle = id;
>   }
>   
>   extern void GLAPIENTRY
>   _mesa_DeletePerfQueryINTEL(GLuint queryHandle)
>   {
>      GET_CURRENT_CONTEXT(ctx);
> -   struct gl_perf_monitor_object *m;
>   
> -   /* The queryHandle is the counterpart to AMD_performance_monitor's monitor
> -    * id.
> -    */
> -   m = lookup_query(ctx, queryHandle);
> +   struct gl_perf_query_object *obj = lookup_object(ctx, queryHandle);

I think you can directly use _mesa_HashLookup() instead of this somewhat 
useless wrapper function.

>   
>      /* The GL_INTEL_performance_query spec says:
>       *
>       *    "If a query handle doesn't reference a previously created performance
>       *    query instance, an INVALID_VALUE error is generated."
>       */
> -   if (m == NULL) {
> +   if (obj == NULL) {
>         _mesa_error(ctx, GL_INVALID_VALUE,
>                     "glDeletePerfQueryINTEL(invalid queryHandle)");
>         return;
>      }
>   
> -   /* Let the driver stop the query if it's active. */
> -   if (m->Active) {
> -      ctx->Driver.ResetPerfMonitor(ctx, m);
> -      m->Ended = false;
> +   /* To avoid complications in the backed we never ask the backend to

Typo: backend

> +    * delete an active query or a query object while we are still
> +    * waiting for data.
> +    */
> +
> +   if (obj->Active)
> +      _mesa_EndPerfQueryINTEL(queryHandle);
> +
> +   if (obj->Used && !obj->Ready) {
> +      ctx->Driver.WaitPerfQuery(ctx, obj);
> +      obj->Ready = true;
>      }
>   
>      _mesa_HashRemove(ctx->PerfQuery.Objects, queryHandle);
> -   ralloc_free(m->ActiveGroups);
> -   ralloc_free(m->ActiveCounters);
> -   ctx->Driver.DeletePerfMonitor(ctx, m);
> +   ctx->Driver.DeletePerfQuery(ctx, obj);
>   }
>   
>   extern void GLAPIENTRY
>   _mesa_BeginPerfQueryINTEL(GLuint queryHandle)
>   {
>      GET_CURRENT_CONTEXT(ctx);
> -   struct gl_perf_monitor_object *m;
> -
> -   /* The queryHandle is the counterpart to AMD_performance_monitor's monitor
> -    * id.
> -    */
>   
> -   m = lookup_query(ctx, queryHandle);
> +   struct gl_perf_query_object *obj = lookup_object(ctx, queryHandle);
>   
>      /* The GL_INTEL_performance_query spec says:
>       *
>       *    "If a query handle doesn't reference a previously created performance
>       *    query instance, an INVALID_VALUE error is generated."
>       */
> -   if (m == NULL) {
> +   if (obj == NULL) {
>         _mesa_error(ctx, GL_INVALID_VALUE,
>                     "glBeginPerfQueryINTEL(invalid queryHandle)");
>         return;
> @@ -628,15 +493,24 @@ _mesa_BeginPerfQueryINTEL(GLuint queryHandle)
>       * We also generate an INVALID_OPERATION error if the driver can't begin
>       * a query for its own reasons, and for nesting the same query.
>       */
> -   if (m->Active) {
> +   if (obj->Active) {
>         _mesa_error(ctx, GL_INVALID_OPERATION,
>                     "glBeginPerfQueryINTEL(already active)");
>         return;
>      }
>   
> -   if (ctx->Driver.BeginPerfMonitor(ctx, m)) {
> -      m->Active = true;
> -      m->Ended = false;
> +   /* To avoid complications in the backed we never ask the backend to
> +    * reuse a query object and begin a new query while we are still
> +    * waiting for data on that object. */

What the spec says about that?
My understanding is that with your code we can't abort a query, right?

> +   if (obj->Used && !obj->Ready) {
> +      ctx->Driver.WaitPerfQuery(ctx, obj);
> +      obj->Ready = true;
> +   }
> +
> +   if (ctx->Driver.BeginPerfQuery(ctx, obj)) {
> +      obj->Used = true;
> +      obj->Active = true;
> +      obj->Ready = false;
>      } else {
>         _mesa_error(ctx, GL_INVALID_OPERATION,
>                     "glBeginPerfQueryINTEL(driver unable to begin query)");
> @@ -647,39 +521,32 @@ extern void GLAPIENTRY
>   _mesa_EndPerfQueryINTEL(GLuint queryHandle)
>   {
>      GET_CURRENT_CONTEXT(ctx);
> -   struct gl_perf_monitor_object *m;
>   
> -   /* The queryHandle is the counterpart to AMD_performance_monitor's monitor
> -    * id.
> -    */
> +   struct gl_perf_query_object *obj = lookup_object(ctx, queryHandle);
>   
> -   m = lookup_query(ctx, queryHandle);
> +   /* Not explicitly covered in the spec, but for consistency... */
> +   if (obj == NULL) {
> +      _mesa_error(ctx, GL_INVALID_VALUE,
> +                  "glEndPerfQueryINTEL(invalid queryHandle)");
> +      return;
> +   }
>   
>      /* The GL_INTEL_performance_query spec says:
>       *
>       *    "If a performance query is not currently started, an
>       *    INVALID_OPERATION error will be generated."
> -    *
> -    * The specification doesn't state that an invalid handle would be an
> -    * INVALID_VALUE error. Regardless, query for such a handle will not be
> -    * started, so we generate an INVALID_OPERATION in that case too.
>       */
> -   if (m == NULL) {
> -      _mesa_error(ctx, GL_INVALID_OPERATION,
> -                  "glEndPerfQueryINTEL(invalid queryHandle)");
> -      return;
> -   }
>   
> -   if (!m->Active) {
> +   if (!obj->Active) {
>         _mesa_error(ctx, GL_INVALID_OPERATION,
>                     "glEndPerfQueryINTEL(not active)");
>         return;
>      }
>   
> -   ctx->Driver.EndPerfMonitor(ctx, m);
> +   ctx->Driver.EndPerfQuery(ctx, obj);
>   
> -   m->Active = false;
> -   m->Ended = true;
> +   obj->Active = false;
> +   obj->Ready = false;

What about obj->Used flag?

>   }
>   
>   extern void GLAPIENTRY
> @@ -687,8 +554,15 @@ _mesa_GetPerfQueryDataINTEL(GLuint queryHandle, GLuint flags,
>                               GLsizei dataSize, void *data, GLuint *bytesWritten)
>   {
>      GET_CURRENT_CONTEXT(ctx);
> -   struct gl_perf_monitor_object *m;
> -   bool result_available;
> +
> +   struct gl_perf_query_object *obj = lookup_object(ctx, queryHandle);
> +
> +   /* Not explicitly covered in the spec, but for consistency... */
> +   if (obj == NULL) {
> +      _mesa_error(ctx, GL_INVALID_VALUE,
> +                  "glEndPerfQueryINTEL(invalid queryHandle)");
> +      return;
> +   }
>   
>      /* The GL_INTEL_performance_query spec says:
>       *
> @@ -701,66 +575,34 @@ _mesa_GetPerfQueryDataINTEL(GLuint queryHandle, GLuint flags,
>         return;
>      }
>   
> -   /* The queryHandle is the counterpart to AMD_performance_monitor's monitor
> -    * id.
> -    */
> +   /* Just for good measure in case a lazy application is only
> +    * checking this and not checking for errors... */
> +   *bytesWritten = 0;
>   
> -   m = lookup_query(ctx, queryHandle);
> -
> -   /* The specification doesn't state that an invalid handle generates an
> -    * error. We could interpret that to mean the case should be handled as
> -    * "measurement not ready for this query", but what should be done if
> -    * `flags' equals PERFQUERY_WAIT_INTEL?
> -    *
> -    * To resolve this, we just generate an INVALID_VALUE from an invalid query
> -    * handle.
> -    */
> -   if (m == NULL) {
> -      _mesa_error(ctx, GL_INVALID_VALUE,
> -                  "glGetPerfQueryDataINTEL(invalid queryHandle)");
> -      return;
> -   }
> -
> -   /* We need at least enough room for a single value. */
> -   if (dataSize < sizeof(GLuint)) {
> -      *bytesWritten = 0;
> -      return;
> -   }
> -
> -   /* The GL_INTEL_performance_query spec says:
> -    *
> -    *    "The call may end without returning any data if they are not ready
> -    *    for reading as the measurement session is still pending (the
> -    *    EndPerfQueryINTEL() command processing is not finished by
> -    *    hardware). In this case location pointed by the bytesWritten
> -    *    parameter will be set to 0."
> -    *
> -    * If EndPerfQueryINTEL() is not called at all, we follow this.
> +   /* Not explicitly covered in the spec but to be consistent with
> +    * EndPerfQuery which validates that an application only ends an
> +    * active query we also validate that an application doesn't try
> +    * and get the data for a still active query...
>       */
> -   if (!m->Ended) {
> -      *bytesWritten = 0;
> +   if (obj->Active) {
> +      _mesa_error(ctx, GL_INVALID_OPERATION,
> +                  "glGetPerfQueryDataINTEL(query still active)");
>         return;
>      }
>   
> -   result_available = ctx->Driver.IsPerfMonitorResultAvailable(ctx, m);
> +   obj->Ready = ctx->Driver.IsPerfQueryReady(ctx, obj);
>   
> -   if (!result_available) {
> +   if (!obj->Ready) {
>         if (flags == GL_PERFQUERY_FLUSH_INTEL) {
>            ctx->Driver.Flush(ctx);
>         } else if (flags == GL_PERFQUERY_WAIT_INTEL) {
> -         /* Assume Finish() is both enough and not too much to wait for
> -          * results. If results are still not available after Finish(), the
> -          * later code automatically bails out with 0 for bytesWritten.
> -          */
> -         ctx->Driver.Finish(ctx);
> -         result_available =
> -            ctx->Driver.IsPerfMonitorResultAvailable(ctx, m);
> +         ctx->Driver.WaitPerfQuery(ctx, obj);
> +         obj->Ready = true;
>         }
>      }
>   
> -   if (result_available) {
> -      ctx->Driver.GetPerfMonitorResult(ctx, m, dataSize, data, (GLint*)bytesWritten);
> -   } else {
> +   if (obj->Ready)
> +      ctx->Driver.GetPerfQueryData(ctx, obj, dataSize, data, bytesWritten);
> +   else
>         *bytesWritten = 0;

This variable is already initialized above.

> -   }
>   }
> diff --git a/src/mesa/main/performance_query.h b/src/mesa/main/performance_query.h
> index 3fed5ea..8268f0e 100644
> --- a/src/mesa/main/performance_query.h
> +++ b/src/mesa/main/performance_query.h
> @@ -1,5 +1,5 @@
>   /*
> - * Copyright © 2012 Intel Corporation
> + * Copyright © 2012,2015 Intel Corporation
>    *
>    * Permission is hereby granted, free of charge, to any person obtaining a
>    * copy of this software and associated documentation files (the "Software"),
> @@ -38,10 +38,6 @@ _mesa_init_performance_queries(struct gl_context *ctx);
>   extern void
>   _mesa_free_performance_queries(struct gl_context *ctx);
>   
> -unsigned
> -_mesa_perf_query_counter_size(const struct gl_perf_monitor_counter *);
> -
> -
>   extern void GLAPIENTRY
>   _mesa_GetFirstPerfQueryIdINTEL(GLuint *queryId);
>