[Mesa-dev] [RFC 3/6] Model INTEL perf query backend after query object BE
Samuel Pitoiset
samuel.pitoiset at gmail.com
Mon May 11 08:11:11 PDT 2015
Patches 1 and 2 look fine to me.
See my comments below for this one.
On 05/06/2015 02:53 AM, Robert Bragg wrote:
> Instead of using the same backend interface as AMD_performance_monitor
> this defines a dedicated INTEL_performance_query interface that is based
> on the ARB_query_buffer_object interface (considering the similarity of
> the extensions) with the addition of vfuncs for enumerating queries and
> their counters.
>
> Compared to the previous backend, some notable differences are:
>
> - The backend is free to represent counters using whatever data
> structures are optimal/convenient since queries and counters are
> enumerated via an iterator api instead of declaring them using
> structures directly shared with the frontend.
>
> This is also done to help us support the full range of data and
> semantic types available with INTEL_performance_query which is awkward
> while using a structure shared with the AMD_performance_monitor
> backend since neither extension's types are a subset of the other.
>
> - The backend must support waiting for a query instead of the frontend
> simply using glFinish().
>
> - Objects go through 'Active' and 'Ready' states consistent with the
> query object backend (hopefully making them more familiar). There is
> no 'Ended' state (which used to show that a query has ended at least
> once for a given object). There is a new 'Used' state similar to the
> 'EverBound' state of query objects, set when a query is first begun
> which implies that we are expecting to get results back for the object
> at some point.
>
> The INTEL_performance_query and AMD_performance_monitor extensions are
> now completely orthogonal within Mesa (though a driver could optionally
> choose to implement both extensions within a unified backend if that
> were convenient for the sake of sharing state/code).
>
> Signed-off-by: Robert Bragg <robert at sixbynine.org>
> ---
> src/mesa/main/dd.h | 39 +++
> src/mesa/main/mtypes.h | 25 +-
> src/mesa/main/performance_query.c | 590 ++++++++++++++------------------------
> src/mesa/main/performance_query.h | 6 +-
> 4 files changed, 275 insertions(+), 385 deletions(-)
>
> diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
> index 0c1a13f..4ba1524 100644
> --- a/src/mesa/main/dd.h
> +++ b/src/mesa/main/dd.h
> @@ -759,6 +759,45 @@ struct dd_function_table {
> GLint *bytesWritten);
> /*@}*/
>
> + /**
> + * \name Performance Query objects
> + */
> + /*@{*/
> + void (*GetPerfQueryInfo)(struct gl_context *ctx,
> + int queryIndex,
> + const char **name,
> + GLuint *dataSize,
> + GLuint *numCounters,
> + GLuint *numActive);
> + void (*GetPerfCounterInfo)(struct gl_context *ctx,
> + int queryIndex,
> + int counterIndex,
> + const char **name,
> + const char **desc,
> + GLuint *offset,
> + GLuint *data_size,
> + GLuint *type_enum,
> + GLuint *data_type_enum,
> + GLuint64 *raw_max);
> + struct gl_perf_query_object * (*NewPerfQueryObject)(struct gl_context *ctx,
> + int queryIndex);
> + void (*DeletePerfQuery)(struct gl_context *ctx,
> + struct gl_perf_query_object *obj);
> + GLboolean (*BeginPerfQuery)(struct gl_context *ctx,
> + struct gl_perf_query_object *obj);
> + void (*EndPerfQuery)(struct gl_context *ctx,
> + struct gl_perf_query_object *obj);
> + void (*WaitPerfQuery)(struct gl_context *ctx,
> + struct gl_perf_query_object *obj);
> + GLboolean (*IsPerfQueryReady)(struct gl_context *ctx,
> + struct gl_perf_query_object *obj);
> + void (*GetPerfQueryData)(struct gl_context *ctx,
> + struct gl_perf_query_object *obj,
> + GLsizei dataSize,
> + GLuint *data,
> + GLuint *bytesWritten);
> + /*@}*/
> +
>
> /**
> * \name Vertex Array objects
> diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
> index b1e5fa9..a26109d 100644
> --- a/src/mesa/main/mtypes.h
> +++ b/src/mesa/main/mtypes.h
> @@ -2014,6 +2014,23 @@ struct gl_perf_monitor_group
>
>
> /**
> + * A query object instance as described in INTEL_performance_query.
> + *
> + * NB: We want to keep this and the corresponding backend structure
> + * relatively lean considering that applications may expect to
> + * allocate enough objects to be able to query around all draw calls
> + * in a frame.
> + */
> +struct gl_perf_query_object
> +{
> + GLuint Id; /**< hash table ID/name */
> + GLuint Used:1; /**< has been used for 1 or more queries */
> + GLuint Active:1; /**< inside Begin/EndPerfQuery */
> + GLuint Ready:1; /**< result is ready? */
> +};
> +
> +
> +/**
> * Context state for AMD_performance_monitor.
> */
> struct gl_perf_monitor_state
> @@ -2032,12 +2049,8 @@ struct gl_perf_monitor_state
> */
> struct gl_perf_query_state
> {
> - /** Array of performance monitor groups (indexed by group ID) */
> - const struct gl_perf_monitor_group *Groups;
> - GLuint NumGroups;
> -
> - /** The table of all performance query objects. */
> - struct _mesa_HashTable *Objects;
> + struct _mesa_HashTable *Objects; /**< The table of all performance query objects */
> + GLuint NumQueries;
> };
>
>
> diff --git a/src/mesa/main/performance_query.c b/src/mesa/main/performance_query.c
> index 355836e..57d6f8e 100644
> --- a/src/mesa/main/performance_query.c
> +++ b/src/mesa/main/performance_query.c
> @@ -1,5 +1,5 @@
> /*
> - * Copyright © 2012 Intel Corporation
> + * Copyright © 2012-2015 Intel Corporation
> *
> * Permission is hereby granted, free of charge, to any person obtaining a
> * copy of this software and associated documentation files (the "Software"),
> @@ -24,14 +24,6 @@
> /**
> * \file performance_query.c
> * Core Mesa support for the INTEL_performance_query extension.
> - *
> - * In order to implement this extension, start by defining two enums:
> - * one for Groups, and one for Counters. These will be used as indexes into
> - * arrays, so they should start at 0 and increment from there.
> - *
> - * Counter IDs need to be globally unique. That is, you can't have counter 7
> - * in group A and counter 7 in group B. A global enum of all available
> - * counters is a convenient way to guarantee this.
> */
>
> #include <stdbool.h>
> @@ -42,66 +34,21 @@
> #include "macros.h"
> #include "mtypes.h"
> #include "performance_query.h"
> -#include "util/bitset.h"
> #include "util/ralloc.h"
>
> void
> _mesa_init_performance_queries(struct gl_context *ctx)
> {
> ctx->PerfQuery.Objects = _mesa_NewHashTable();
> - ctx->PerfQuery.NumGroups = 0;
Why don't you initialize NumQueries here?
> - ctx->PerfQuery.Groups = NULL;
> -}
> -
> -static struct gl_perf_monitor_object *
> -new_performance_query(struct gl_context *ctx, GLuint index)
> -{
> - unsigned i;
> - struct gl_perf_monitor_object *m = ctx->Driver.NewPerfMonitor(ctx);
> -
> - if (m == NULL)
> - return NULL;
> -
> - m->Name = index;
> -
> - m->Active = false;
> -
> - m->ActiveGroups =
> - rzalloc_array(NULL, unsigned, ctx->PerfQuery.NumGroups);
> -
> - m->ActiveCounters =
> - ralloc_array(NULL, BITSET_WORD *, ctx->PerfQuery.NumGroups);
> -
> - if (m->ActiveGroups == NULL || m->ActiveCounters == NULL)
> - goto fail;
> -
> - for (i = 0; i < ctx->PerfQuery.NumGroups; i++) {
> - const struct gl_perf_monitor_group *g = &ctx->PerfQuery.Groups[i];
> -
> - m->ActiveCounters[i] = rzalloc_array(m->ActiveCounters, BITSET_WORD,
> - BITSET_WORDS(g->NumCounters));
> - if (m->ActiveCounters[i] == NULL)
> - goto fail;
> - }
> -
> - return m;
> -
> -fail:
> - ralloc_free(m->ActiveGroups);
> - ralloc_free(m->ActiveCounters);
> - ctx->Driver.DeletePerfMonitor(ctx, m);
> - return NULL;
> }
>
> static void
> free_performance_query(GLuint key, void *data, void *user)
> {
> - struct gl_perf_monitor_object *m = data;
> + struct gl_perf_query_object *m = data;
> struct gl_context *ctx = user;
>
> - ralloc_free(m->ActiveGroups);
> - ralloc_free(m->ActiveCounters);
> - ctx->Driver.DeletePerfMonitor(ctx, m);
> + ctx->Driver.DeletePerfQuery(ctx, m);
> }
>
> void
> @@ -112,34 +59,13 @@ _mesa_free_performance_queries(struct gl_context *ctx)
> _mesa_DeleteHashTable(ctx->PerfQuery.Objects);
> }
>
> -static inline struct gl_perf_monitor_object *
> -lookup_query(struct gl_context *ctx, GLuint id)
> -{
> - return (struct gl_perf_monitor_object *)
> - _mesa_HashLookup(ctx->PerfQuery.Objects, id);
> -}
> -
> -static inline const struct gl_perf_monitor_group *
> -get_group(const struct gl_context *ctx, GLuint id)
> +static inline struct gl_perf_query_object *
> +lookup_object(struct gl_context *ctx, GLuint id)
> {
> - if (id >= ctx->PerfQuery.NumGroups)
> - return NULL;
> -
> - return &ctx->PerfQuery.Groups[id];
> -}
> -
> -static inline const struct gl_perf_monitor_counter *
> -get_counter(const struct gl_perf_monitor_group *group_obj, GLuint id)
> -{
> - if (id >= group_obj->NumCounters)
> - return NULL;
> -
> - return &group_obj->Counters[id];
> + return _mesa_HashLookup(ctx->PerfQuery.Objects, id);
I guess _mesa_HashLookup() returns NULL when it cannot find the element,
right?
> }
>
> -/* For INTEL_performance_query, query id 0 is reserved to be invalid. We use
> - * index to Groups array + 1 as the query id. Same applies to counter id.
> - */
> +/* For INTEL_performance_query, query id 0 is reserved to be invalid. */
> static inline GLuint
> queryid_to_index(GLuint queryid)
> {
> @@ -155,7 +81,8 @@ index_to_queryid(GLuint index)
> static inline bool
> queryid_valid(const struct gl_context *ctx, GLuint queryid)
> {
> - return get_group(ctx, queryid_to_index(queryid)) != NULL;
> + GLuint index = queryid_to_index(queryid);
> + return index >= 0 && index < ctx->PerfQuery.NumQueries;
> }
>
> static inline GLuint
> @@ -164,33 +91,29 @@ counterid_to_index(GLuint counterid)
> return counterid - 1;
> }
>
> -/*****************************************************************************/
> -
> -/**
> - * Returns how many bytes a counter's value takes up.
> - */
> -unsigned
> -_mesa_perf_query_counter_size(const struct gl_perf_monitor_counter *c)
> +static void
> +return_string(char *stringRet, GLuint stringMaxLen, const char *string)
In my opinion, the name of this function is a bit ambiguous. :-)
> {
> - switch (c->Type) {
> - case GL_FLOAT:
> - case GL_PERCENTAGE_AMD:
> - return sizeof(GLfloat);
> - case GL_UNSIGNED_INT:
> - return sizeof(GLuint);
> - case GL_UNSIGNED_INT64_AMD:
> - return sizeof(uint64_t);
> - default:
> - assert(!"Should not get here: invalid counter type");
> - return 0;
> - }
> + if (!stringRet)
> + return;
> +
> + strncpy(stringRet, string ? string : "", stringMaxLen);
> +
> + /* No specification given about whether returned strings needs
> + * to be zero-terminated. Zero-terminate the string always as we
> + * don't otherwise communicate the length of the returned
> + * string.
> + */
> + if (stringMaxLen > 0)
> + stringRet[stringMaxLen - 1] = '\0';
> }
>
> +/*****************************************************************************/
> +
> extern void GLAPIENTRY
> _mesa_GetFirstPerfQueryIdINTEL(GLuint *queryId)
> {
> GET_CURRENT_CONTEXT(ctx);
> - unsigned numGroups;
>
> /* The GL_INTEL_performance_query spec says:
> *
> @@ -202,15 +125,13 @@ _mesa_GetFirstPerfQueryIdINTEL(GLuint *queryId)
> return;
> }
>
> - numGroups = ctx->PerfQuery.NumGroups;
> -
> /* The GL_INTEL_performance_query spec says:
> *
> * "If the given hardware platform doesn't support any performance
> * queries, then the value of 0 is returned and INVALID_OPERATION error
> * is raised."
> */
> - if (numGroups == 0) {
> + if (ctx->PerfQuery.NumQueries == 0) {
> *queryId = 0;
> _mesa_error(ctx, GL_INVALID_OPERATION,
> "glGetFirstPerfQueryIdINTEL(no queries supported)");
> @@ -242,26 +163,23 @@ _mesa_GetNextPerfQueryIdINTEL(GLuint queryId, GLuint *nextQueryId)
> }
>
> if (!queryid_valid(ctx, queryId)) {
> - *nextQueryId = 0;
> _mesa_error(ctx, GL_INVALID_VALUE,
> "glGetNextPerfQueryIdINTEL(invalid query)");
> return;
> }
>
> - ++queryId;
> -
> - if (!queryid_valid(ctx, queryId)) {
> - *nextQueryId = 0;
> - } else {
> + if (queryid_valid(ctx, ++queryId))
> *nextQueryId = queryId;
> - }
> + else
> + *nextQueryId = 0;
> }
>
> extern void GLAPIENTRY
> _mesa_GetPerfQueryIdByNameINTEL(char *queryName, GLuint *queryId)
> {
> GET_CURRENT_CONTEXT(ctx);
> - unsigned i;
> +
> + int i;
>
> /* The GL_INTEL_performance_query spec says:
> *
> @@ -274,15 +192,22 @@ _mesa_GetPerfQueryIdByNameINTEL(char *queryName, GLuint *queryId)
> return;
> }
>
> - /* The specification does not state that this produces an error. */
> + /* The specification does not state that this produces an error but
> + * to be consistent with glGetFirstPerfQueryIdINTEL we generate an
> + * INVALID_VALUE error */
> if (!queryId) {
> - _mesa_warning(ctx, "glGetPerfQueryIdByNameINTEL(queryId == NULL)");
> + _mesa_error(ctx, GL_INVALID_VALUE,
> + "glGetPerfQueryIdByNameINTEL(queryId == NULL)");
> return;
> }
>
> - for (i = 0; i < ctx->PerfQuery.NumGroups; ++i) {
> - const struct gl_perf_monitor_group *group_obj = get_group(ctx, i);
> - if (strcmp(group_obj->Name, queryName) == 0) {
> + for (i = 0; i < ctx->PerfQuery.NumQueries; ++i) {
> + const GLchar *name;
> + GLuint ignore;
> +
> + ctx->Driver.GetPerfQueryInfo(ctx, i, &name, &ignore, &ignore, &ignore);
Why these parameters are currently ignored?
> +
> + if (strcmp(name, queryName) == 0) {
> *queryId = index_to_queryid(i);
> return;
> }
> @@ -294,18 +219,21 @@ _mesa_GetPerfQueryIdByNameINTEL(char *queryName, GLuint *queryId)
>
> extern void GLAPIENTRY
> _mesa_GetPerfQueryInfoINTEL(GLuint queryId,
> - GLuint queryNameLength, char *queryName,
> - GLuint *dataSize, GLuint *noCounters,
> - GLuint *noActiveInstances,
> + GLuint nameLength, char *name,
> + GLuint *dataSize,
> + GLuint *numCounters,
> + GLuint *numActive,
> GLuint *capsMask)
> {
> GET_CURRENT_CONTEXT(ctx);
> - unsigned i;
>
> - const struct gl_perf_monitor_group *group_obj =
> - get_group(ctx, queryid_to_index(queryId));
> + GLuint queryIndex = queryid_to_index(queryId);
> + const GLchar *queryName;
> + GLuint queryDataSize;
> + GLuint queryNumCounters;
> + GLuint queryNumActive;
>
> - if (group_obj == NULL) {
> + if (!queryid_valid(ctx, queryId)) {
> /* The GL_INTEL_performance_query spec says:
> *
> * "If queryId does not reference a valid query type, an
> @@ -316,32 +244,19 @@ _mesa_GetPerfQueryInfoINTEL(GLuint queryId,
> return;
> }
>
> - if (queryName) {
> - strncpy(queryName, group_obj->Name, queryNameLength);
> + ctx->Driver.GetPerfQueryInfo(ctx, queryIndex,
> + &queryName,
> + &queryDataSize,
> + &queryNumCounters,
> + &queryNumActive);
>
> - /* No specification given about whether the string needs to be
> - * zero-terminated. Zero-terminate the string always as we don't
> - * otherwise communicate the length of the returned string.
> - */
> - if (queryNameLength > 0) {
> - queryName[queryNameLength - 1] = '\0';
> - }
> - }
> + return_string(name, nameLength, queryName);
>
> - if (dataSize) {
> - unsigned size = 0;
> - for (i = 0; i < group_obj->NumCounters; ++i) {
> - /* What we get from the driver is group id (uint32_t) + counter id
> - * (uint32_t) + value.
> - */
> - size += 2 * sizeof(uint32_t) + _mesa_perf_query_counter_size(&group_obj->Counters[i]);
> - }
> - *dataSize = size;
> - }
> + if (dataSize)
> + *dataSize = queryDataSize;
>
> - if (noCounters) {
> - *noCounters = group_obj->NumCounters;
> - }
> + if (numCounters)
> + *numCounters = queryNumCounters;
>
> /* The GL_INTEL_performance_query spec says:
> *
> @@ -352,128 +267,93 @@ _mesa_GetPerfQueryInfoINTEL(GLuint queryId,
> * 2) Another typo in the specification, maxInstances parameter is not listed
> * in the declaration of this function in the list of new functions.
> */
> - if (noActiveInstances) {
> - *noActiveInstances = _mesa_HashNumEntries(ctx->PerfQuery.Objects);
> - }
> + if (numActive)
> + *numActive = queryNumActive;
>
> - if (capsMask) {
> - /* TODO: This information not yet available in the monitor structs. For
> - * now, we hardcode SINGLE_CONTEXT, since that's what the implementation
> - * currently tries very hard to do.
> - */
> + /* Assume for now that all queries are per-context */
> + if (capsMask)
> *capsMask = GL_PERFQUERY_SINGLE_CONTEXT_INTEL;
> - }
What is your plan about this? Are you going to support multiple contexts
in the near future?
> }
>
> extern void GLAPIENTRY
> _mesa_GetPerfCounterInfoINTEL(GLuint queryId, GLuint counterId,
> - GLuint counterNameLength, char *counterName,
> - GLuint counterDescLength, char *counterDesc,
> - GLuint *counterOffset, GLuint *counterDataSize, GLuint *counterTypeEnum,
> - GLuint *counterDataTypeEnum, GLuint64 *rawCounterMaxValue)
> + GLuint nameLength, char *name,
> + GLuint descLength, char *desc,
> + GLuint *offset,
> + GLuint *dataSize,
> + GLuint *typeEnum,
> + GLuint *dataTypeEnum,
> + GLuint64 *rawCounterMaxValue)
> {
> GET_CURRENT_CONTEXT(ctx);
>
> - const struct gl_perf_monitor_group *group_obj;
> - const struct gl_perf_monitor_counter *counter_obj;
> - unsigned counterIndex;
> - unsigned i;
> -
> - group_obj = get_group(ctx, queryid_to_index(queryId));
> + GLuint queryIndex = queryid_to_index(queryId);
> + const GLchar *queryName;
> + GLuint queryDataSize;
> + GLuint queryNumCounters;
> + GLuint queryNumActive;
> + GLuint counterIndex;
> + const GLchar *counterName;
> + const GLchar *counterDesc;
> + GLuint counterOffset;
> + GLuint counterDataSize;
> + GLuint counterTypeEnum;
> + GLuint counterDataTypeEnum;
> + GLuint64 counterRawMax;
>
> - /* The GL_INTEL_performance_query spec says:
> - *
> - * "If the pair of queryId and counterId does not reference a valid
> - * counter, an INVALID_VALUE error is generated."
> - */
> - if (group_obj == NULL) {
> + if (!queryid_valid(ctx, queryId)) {
> + /* The GL_INTEL_performance_query spec says:
> + *
> + * "If the pair of queryId and counterId does not reference a valid
> + * counter, an INVALID_VALUE error is generated."
> + */
> _mesa_error(ctx, GL_INVALID_VALUE,
> "glGetPerfCounterInfoINTEL(invalid queryId)");
> return;
> }
>
> + ctx->Driver.GetPerfQueryInfo(ctx, queryIndex,
> + &queryName,
> + &queryDataSize,
> + &queryNumCounters,
> + &queryNumActive);
> +
> counterIndex = counterid_to_index(counterId);
> - counter_obj = get_counter(group_obj, counterIndex);
>
> - if (counter_obj == NULL) {
> + if (counterIndex < 0 || counterIndex >= queryNumCounters) {
> _mesa_error(ctx, GL_INVALID_VALUE,
> "glGetPerfCounterInfoINTEL(invalid counterId)");
> return;
> }
>
> - if (counterName) {
> - strncpy(counterName, counter_obj->Name, counterNameLength);
> + ctx->Driver.GetPerfCounterInfo(ctx, queryIndex, counterIndex,
> + &counterName,
> + &counterDesc,
> + &counterOffset,
> + &counterDataSize,
> + &counterTypeEnum,
> + &counterDataTypeEnum,
> + &counterRawMax);
>
> - /* No specification given about whether the string needs to be
> - * zero-terminated. Zero-terminate the string always as we don't
> - * otherwise communicate the length of the returned string.
> - */
> - if (counterNameLength > 0) {
> - counterName[counterNameLength - 1] = '\0';
> - }
> - }
> + return_string(name, nameLength, counterName);
> + return_string(desc, descLength, counterDesc);
>
> - if (counterDesc) {
> - /* TODO: No separate description text at the moment. We pass the name
> - * again for the moment.
> - */
> - strncpy(counterDesc, counter_obj->Name, counterDescLength);
> -
> - /* No specification given about whether the string needs to be
> - * zero-terminated. Zero-terminate the string always as we don't
> - * otherwise communicate the length of the returned string.
> - */
> - if (counterDescLength > 0) {
> - counterDesc[counterDescLength - 1] = '\0';
> - }
> - }
> + if (offset)
> + *offset = counterOffset;
>
> - if (counterOffset) {
> - unsigned offset = 0;
> - for (i = 0; i < counterIndex; ++i) {
> - /* What we get from the driver is group id (uint32_t) + counter id
> - * (uint32_t) + value.
> - */
> - offset += 2 * sizeof(uint32_t) + _mesa_perf_query_counter_size(&group_obj->Counters[i]);
> - }
> - *counterOffset = 2 * sizeof(uint32_t) + offset;
> - }
> + if (dataSize)
> + *dataSize = counterDataSize;
>
> - if (counterDataSize) {
> - *counterDataSize = _mesa_perf_query_counter_size(counter_obj);
> - }
> + if (typeEnum)
> + *typeEnum = counterTypeEnum;
>
> - if (counterTypeEnum) {
> - /* TODO: Different counter types (semantic type, not data type) not
> - * supported as of yet.
> - */
> - *counterTypeEnum = GL_PERFQUERY_COUNTER_RAW_INTEL;
> - }
> + if (dataTypeEnum)
> + *dataTypeEnum = counterDataTypeEnum;
>
> - if (counterDataTypeEnum) {
> - switch (counter_obj->Type) {
> - case GL_FLOAT:
> - case GL_PERCENTAGE_AMD:
> - *counterDataTypeEnum = GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL;
> - break;
> - case GL_UNSIGNED_INT:
> - *counterDataTypeEnum = GL_PERFQUERY_COUNTER_DATA_UINT32_INTEL;
> - break;
> - case GL_UNSIGNED_INT64_AMD:
> - *counterDataTypeEnum = GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL;
> - break;
> - default:
> - assert(!"Should not get here: invalid counter type");
> - return;
> - }
> - }
> + if (rawCounterMaxValue)
> + *rawCounterMaxValue = counterRawMax;
>
> if (rawCounterMaxValue) {
> - /* This value is (implicitly) specified to be used only with
> - * GL_PERFQUERY_COUNTER_RAW_INTEL counters. When semantic types for
> - * counters are added, that needs to be checked.
> - */
> -
> /* The GL_INTEL_performance_query spec says:
> *
> * "for some raw counters for which the maximal value is
> @@ -481,10 +361,17 @@ _mesa_GetPerfCounterInfoINTEL(GLuint queryId, GLuint counterId,
> * returned in the location pointed by rawCounterMaxValue, otherwise,
> * the location is written with the value of 0."
> *
> - * The maximum value reported by the driver at the moment is not with
> - * these semantics, so write 0 always to be safe.
> + * Since it's very useful to be able to report a maximum value for
> + * more that just counters using the _COUNTER_RAW_INTEL or
> + * _COUNTER_DURATION_RAW_INTEL enums (e.g. for a _THROUGHPUT tools
> + * want to be able to visualize the absolute throughput with respect
> + * to the theoretical maximum that's possible) and there doesn't seem
> + * to be any reason not to allow _THROUGHPUT counters to also be
> + * considerer "raw" here, we always leave it up to the backend to
> + * decide when it's appropriate to report a maximum counter value or 0
> + * if not.
> */
> - *rawCounterMaxValue = 0;
> + *rawCounterMaxValue = counterRawMax;
> }
> }
>
> @@ -492,44 +379,32 @@ extern void GLAPIENTRY
> _mesa_CreatePerfQueryINTEL(GLuint queryId, GLuint *queryHandle)
> {
> GET_CURRENT_CONTEXT(ctx);
> - GLuint first;
> - GLuint group;
> - const struct gl_perf_monitor_group *group_obj;
> - struct gl_perf_monitor_object *m;
> - unsigned i;
> -
> - /* This is not specified in the extension, but is the only sane thing to
> - * do.
> - */
> - if (queryHandle == NULL) {
> - _mesa_error(ctx, GL_INVALID_VALUE,
> - "glCreatePerfQueryINTEL(queryHandle == NULL)");
> - return;
> - }
>
> - group = queryid_to_index(queryId);
> - group_obj = get_group(ctx, group);
> + GLuint id;
> + struct gl_perf_query_object *obj;
>
> /* The GL_INTEL_performance_query spec says:
> *
> * "If queryId does not reference a valid query type, an INVALID_VALUE
> * error is generated."
> */
> - if (group_obj == NULL) {
> + if (!queryid_valid(ctx, queryId)) {
> _mesa_error(ctx, GL_INVALID_VALUE,
> "glCreatePerfQueryINTEL(invalid queryId)");
> return;
> }
>
> - /* The query object created here is the counterpart of a `monitor' in
> - * AMD_performance_monitor. This call is equivalent to calling
> - * GenPerfMonitorsAMD and SelectPerfMonitorCountersAMD with a list of all
> - * counters in a group.
> + /* This is not specified in the extension, but is the only sane thing to
> + * do.
> */
> + if (queryHandle == NULL) {
> + _mesa_error(ctx, GL_INVALID_VALUE,
> + "glCreatePerfQueryINTEL(queryHandle == NULL)");
> + return;
> + }
>
> - /* We keep the query ids contiguous */
> - first = _mesa_HashFindFreeKeyBlock(ctx->PerfQuery.Objects, 1);
> - if (!first) {
> + id = _mesa_HashFindFreeKeyBlock(ctx->PerfQuery.Objects, 1);
> + if (!id) {
> /* The GL_INTEL_performance_query spec says:
> *
> * "If the query instance cannot be created due to exceeding the
> @@ -537,82 +412,72 @@ _mesa_CreatePerfQueryINTEL(GLuint queryId, GLuint *queryHandle)
> * an insufficient memory reason, an OUT_OF_MEMORY error is
> * generated, and the location pointed by queryHandle returns NULL."
> */
> - _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCreatePerfQueryINTEL");
> + _mesa_error_no_memory(__func__);
> return;
> }
>
> - m = new_performance_query(ctx, first);
> - if (m == NULL) {
> + obj = ctx->Driver.NewPerfQueryObject(ctx, queryid_to_index(queryId));
> + if (obj == NULL) {
> _mesa_error_no_memory(__func__);
> return;
> }
>
> - _mesa_HashInsert(ctx->PerfQuery.Objects, first, m);
> - *queryHandle = first;
> + obj->Id = id;
> + obj->Active = false;
> + obj->Ready = false;
>
> - ctx->Driver.ResetPerfMonitor(ctx, m);
> -
> - for (i = 0; i < group_obj->NumCounters; ++i) {
> - ++m->ActiveGroups[group];
> - /* Counters are a continuous range of integers, 0 to NumCounters (excl),
> - * so i is the counter value to use here.
> - */
> - BITSET_SET(m->ActiveCounters[group], i);
> - }
> + _mesa_HashInsert(ctx->PerfQuery.Objects, id, obj);
> + *queryHandle = id;
> }
>
> extern void GLAPIENTRY
> _mesa_DeletePerfQueryINTEL(GLuint queryHandle)
> {
> GET_CURRENT_CONTEXT(ctx);
> - struct gl_perf_monitor_object *m;
>
> - /* The queryHandle is the counterpart to AMD_performance_monitor's monitor
> - * id.
> - */
> - m = lookup_query(ctx, queryHandle);
> + struct gl_perf_query_object *obj = lookup_object(ctx, queryHandle);
I think you can directly use _mesa_HashLookup() instead of this somewhat
useless wrapper function.
>
> /* The GL_INTEL_performance_query spec says:
> *
> * "If a query handle doesn't reference a previously created performance
> * query instance, an INVALID_VALUE error is generated."
> */
> - if (m == NULL) {
> + if (obj == NULL) {
> _mesa_error(ctx, GL_INVALID_VALUE,
> "glDeletePerfQueryINTEL(invalid queryHandle)");
> return;
> }
>
> - /* Let the driver stop the query if it's active. */
> - if (m->Active) {
> - ctx->Driver.ResetPerfMonitor(ctx, m);
> - m->Ended = false;
> + /* To avoid complications in the backed we never ask the backend to
Typo: backend
> + * delete an active query or a query object while we are still
> + * waiting for data.
> + */
> +
> + if (obj->Active)
> + _mesa_EndPerfQueryINTEL(queryHandle);
> +
> + if (obj->Used && !obj->Ready) {
> + ctx->Driver.WaitPerfQuery(ctx, obj);
> + obj->Ready = true;
> }
>
> _mesa_HashRemove(ctx->PerfQuery.Objects, queryHandle);
> - ralloc_free(m->ActiveGroups);
> - ralloc_free(m->ActiveCounters);
> - ctx->Driver.DeletePerfMonitor(ctx, m);
> + ctx->Driver.DeletePerfQuery(ctx, obj);
> }
>
> extern void GLAPIENTRY
> _mesa_BeginPerfQueryINTEL(GLuint queryHandle)
> {
> GET_CURRENT_CONTEXT(ctx);
> - struct gl_perf_monitor_object *m;
> -
> - /* The queryHandle is the counterpart to AMD_performance_monitor's monitor
> - * id.
> - */
>
> - m = lookup_query(ctx, queryHandle);
> + struct gl_perf_query_object *obj = lookup_object(ctx, queryHandle);
>
> /* The GL_INTEL_performance_query spec says:
> *
> * "If a query handle doesn't reference a previously created performance
> * query instance, an INVALID_VALUE error is generated."
> */
> - if (m == NULL) {
> + if (obj == NULL) {
> _mesa_error(ctx, GL_INVALID_VALUE,
> "glBeginPerfQueryINTEL(invalid queryHandle)");
> return;
> @@ -628,15 +493,24 @@ _mesa_BeginPerfQueryINTEL(GLuint queryHandle)
> * We also generate an INVALID_OPERATION error if the driver can't begin
> * a query for its own reasons, and for nesting the same query.
> */
> - if (m->Active) {
> + if (obj->Active) {
> _mesa_error(ctx, GL_INVALID_OPERATION,
> "glBeginPerfQueryINTEL(already active)");
> return;
> }
>
> - if (ctx->Driver.BeginPerfMonitor(ctx, m)) {
> - m->Active = true;
> - m->Ended = false;
> + /* To avoid complications in the backed we never ask the backend to
> + * reuse a query object and begin a new query while we are still
> + * waiting for data on that object. */
What the spec says about that?
My understanding is that with your code we can't abort a query, right?
> + if (obj->Used && !obj->Ready) {
> + ctx->Driver.WaitPerfQuery(ctx, obj);
> + obj->Ready = true;
> + }
> +
> + if (ctx->Driver.BeginPerfQuery(ctx, obj)) {
> + obj->Used = true;
> + obj->Active = true;
> + obj->Ready = false;
> } else {
> _mesa_error(ctx, GL_INVALID_OPERATION,
> "glBeginPerfQueryINTEL(driver unable to begin query)");
> @@ -647,39 +521,32 @@ extern void GLAPIENTRY
> _mesa_EndPerfQueryINTEL(GLuint queryHandle)
> {
> GET_CURRENT_CONTEXT(ctx);
> - struct gl_perf_monitor_object *m;
>
> - /* The queryHandle is the counterpart to AMD_performance_monitor's monitor
> - * id.
> - */
> + struct gl_perf_query_object *obj = lookup_object(ctx, queryHandle);
>
> - m = lookup_query(ctx, queryHandle);
> + /* Not explicitly covered in the spec, but for consistency... */
> + if (obj == NULL) {
> + _mesa_error(ctx, GL_INVALID_VALUE,
> + "glEndPerfQueryINTEL(invalid queryHandle)");
> + return;
> + }
>
> /* The GL_INTEL_performance_query spec says:
> *
> * "If a performance query is not currently started, an
> * INVALID_OPERATION error will be generated."
> - *
> - * The specification doesn't state that an invalid handle would be an
> - * INVALID_VALUE error. Regardless, query for such a handle will not be
> - * started, so we generate an INVALID_OPERATION in that case too.
> */
> - if (m == NULL) {
> - _mesa_error(ctx, GL_INVALID_OPERATION,
> - "glEndPerfQueryINTEL(invalid queryHandle)");
> - return;
> - }
>
> - if (!m->Active) {
> + if (!obj->Active) {
> _mesa_error(ctx, GL_INVALID_OPERATION,
> "glEndPerfQueryINTEL(not active)");
> return;
> }
>
> - ctx->Driver.EndPerfMonitor(ctx, m);
> + ctx->Driver.EndPerfQuery(ctx, obj);
>
> - m->Active = false;
> - m->Ended = true;
> + obj->Active = false;
> + obj->Ready = false;
What about obj->Used flag?
> }
>
> extern void GLAPIENTRY
> @@ -687,8 +554,15 @@ _mesa_GetPerfQueryDataINTEL(GLuint queryHandle, GLuint flags,
> GLsizei dataSize, void *data, GLuint *bytesWritten)
> {
> GET_CURRENT_CONTEXT(ctx);
> - struct gl_perf_monitor_object *m;
> - bool result_available;
> +
> + struct gl_perf_query_object *obj = lookup_object(ctx, queryHandle);
> +
> + /* Not explicitly covered in the spec, but for consistency... */
> + if (obj == NULL) {
> + _mesa_error(ctx, GL_INVALID_VALUE,
> + "glEndPerfQueryINTEL(invalid queryHandle)");
> + return;
> + }
>
> /* The GL_INTEL_performance_query spec says:
> *
> @@ -701,66 +575,34 @@ _mesa_GetPerfQueryDataINTEL(GLuint queryHandle, GLuint flags,
> return;
> }
>
> - /* The queryHandle is the counterpart to AMD_performance_monitor's monitor
> - * id.
> - */
> + /* Just for good measure in case a lazy application is only
> + * checking this and not checking for errors... */
> + *bytesWritten = 0;
>
> - m = lookup_query(ctx, queryHandle);
> -
> - /* The specification doesn't state that an invalid handle generates an
> - * error. We could interpret that to mean the case should be handled as
> - * "measurement not ready for this query", but what should be done if
> - * `flags' equals PERFQUERY_WAIT_INTEL?
> - *
> - * To resolve this, we just generate an INVALID_VALUE from an invalid query
> - * handle.
> - */
> - if (m == NULL) {
> - _mesa_error(ctx, GL_INVALID_VALUE,
> - "glGetPerfQueryDataINTEL(invalid queryHandle)");
> - return;
> - }
> -
> - /* We need at least enough room for a single value. */
> - if (dataSize < sizeof(GLuint)) {
> - *bytesWritten = 0;
> - return;
> - }
> -
> - /* The GL_INTEL_performance_query spec says:
> - *
> - * "The call may end without returning any data if they are not ready
> - * for reading as the measurement session is still pending (the
> - * EndPerfQueryINTEL() command processing is not finished by
> - * hardware). In this case location pointed by the bytesWritten
> - * parameter will be set to 0."
> - *
> - * If EndPerfQueryINTEL() is not called at all, we follow this.
> + /* Not explicitly covered in the spec but to be consistent with
> + * EndPerfQuery which validates that an application only ends an
> + * active query we also validate that an application doesn't try
> + * and get the data for a still active query...
> */
> - if (!m->Ended) {
> - *bytesWritten = 0;
> + if (obj->Active) {
> + _mesa_error(ctx, GL_INVALID_OPERATION,
> + "glGetPerfQueryDataINTEL(query still active)");
> return;
> }
>
> - result_available = ctx->Driver.IsPerfMonitorResultAvailable(ctx, m);
> + obj->Ready = ctx->Driver.IsPerfQueryReady(ctx, obj);
>
> - if (!result_available) {
> + if (!obj->Ready) {
> if (flags == GL_PERFQUERY_FLUSH_INTEL) {
> ctx->Driver.Flush(ctx);
> } else if (flags == GL_PERFQUERY_WAIT_INTEL) {
> - /* Assume Finish() is both enough and not too much to wait for
> - * results. If results are still not available after Finish(), the
> - * later code automatically bails out with 0 for bytesWritten.
> - */
> - ctx->Driver.Finish(ctx);
> - result_available =
> - ctx->Driver.IsPerfMonitorResultAvailable(ctx, m);
> + ctx->Driver.WaitPerfQuery(ctx, obj);
> + obj->Ready = true;
> }
> }
>
> - if (result_available) {
> - ctx->Driver.GetPerfMonitorResult(ctx, m, dataSize, data, (GLint*)bytesWritten);
> - } else {
> + if (obj->Ready)
> + ctx->Driver.GetPerfQueryData(ctx, obj, dataSize, data, bytesWritten);
> + else
> *bytesWritten = 0;
This variable is already initialized above.
> - }
> }
> diff --git a/src/mesa/main/performance_query.h b/src/mesa/main/performance_query.h
> index 3fed5ea..8268f0e 100644
> --- a/src/mesa/main/performance_query.h
> +++ b/src/mesa/main/performance_query.h
> @@ -1,5 +1,5 @@
> /*
> - * Copyright © 2012 Intel Corporation
> + * Copyright © 2012,2015 Intel Corporation
> *
> * Permission is hereby granted, free of charge, to any person obtaining a
> * copy of this software and associated documentation files (the "Software"),
> @@ -38,10 +38,6 @@ _mesa_init_performance_queries(struct gl_context *ctx);
> extern void
> _mesa_free_performance_queries(struct gl_context *ctx);
>
> -unsigned
> -_mesa_perf_query_counter_size(const struct gl_perf_monitor_counter *);
> -
> -
> extern void GLAPIENTRY
> _mesa_GetFirstPerfQueryIdINTEL(GLuint *queryId);
>
More information about the mesa-dev
mailing list