[Mesa-dev] [RFC 3/6] Model INTEL perf query backend after query object BE

Tue May 12 14:37:34 PDT 2015

On Mon, May 11, 2015 at 4:11 PM, Samuel Pitoiset
<samuel.pitoiset at gmail.com> wrote:
> Patches 1 and 2 look fine to me.
>
> See my comments below for this one.
>
>
> On 05/06/2015 02:53 AM, Robert Bragg wrote:
>>

>> --- a/src/mesa/main/performance_query.c
>> +++ b/src/mesa/main/performance_query.c
>> @@ -1,5 +1,5 @@
>>   /*
>> - * Copyright © 2012 Intel Corporation
>> + * Copyright © 2012-2015 Intel Corporation
>>    *
>>    * Permission is hereby granted, free of charge, to any person obtaining
>> a
>>    * copy of this software and associated documentation files (the
>> "Software"),
>> @@ -24,14 +24,6 @@
>>   /**
>>    * \file performance_query.c
>>    * Core Mesa support for the INTEL_performance_query extension.
>> - *
>> - * In order to implement this extension, start by defining two enums:
>> - * one for Groups, and one for Counters.  These will be used as indexes
>> into
>> - * arrays, so they should start at 0 and increment from there.
>> - *
>> - * Counter IDs need to be globally unique.  That is, you can't have
>> counter 7
>> - * in group A and counter 7 in group B.  A global enum of all available
>> - * counters is a convenient way to guarantee this.
>>    */
>>     #include <stdbool.h>
>> @@ -42,66 +34,21 @@
>>   #include "macros.h"
>>   #include "mtypes.h"
>>   #include "performance_query.h"
>> -#include "util/bitset.h"
>>   #include "util/ralloc.h"
>>     void
>>   _mesa_init_performance_queries(struct gl_context *ctx)
>>   {
>>      ctx->PerfQuery.Objects = _mesa_NewHashTable();
>> -   ctx->PerfQuery.NumGroups = 0;
>
>
> Why don't you initialize NumQueries here?

Hmm, yeah can't think of a good reason. It's initialized in the
backend, but it seems bad to not be initializing it here.

>> @@ -112,34 +59,13 @@ _mesa_free_performance_queries(struct gl_context
>> *ctx)
>>      _mesa_DeleteHashTable(ctx->PerfQuery.Objects);
>>   }
>>   -static inline struct gl_perf_monitor_object *
>> -lookup_query(struct gl_context *ctx, GLuint id)
>> -{
>> -   return (struct gl_perf_monitor_object *)
>> -      _mesa_HashLookup(ctx->PerfQuery.Objects, id);
>> -}
>> -
>> -static inline const struct gl_perf_monitor_group *
>> -get_group(const struct gl_context *ctx, GLuint id)
>> +static inline struct gl_perf_query_object *
>> +lookup_object(struct gl_context *ctx, GLuint id)
>>   {
>> -   if (id >= ctx->PerfQuery.NumGroups)
>> -      return NULL;
>> -
>> -   return &ctx->PerfQuery.Groups[id];
>> -}
>> -
>> -static inline const struct gl_perf_monitor_counter *
>> -get_counter(const struct gl_perf_monitor_group *group_obj, GLuint id)
>> -{
>> -   if (id >= group_obj->NumCounters)
>> -      return NULL;
>> -
>> -   return &group_obj->Counters[id];
>> +   return _mesa_HashLookup(ctx->PerfQuery.Objects, id);
>
>
> I guess _mesa_HashLookup() returns NULL when it cannot find the element,
> right?

I think that's right. The function description says "\return pointer
to user's data or NULL if key not in table",
_mesa_HashLookup_unlocked() seems to imply as much.

>> +static void
>> +return_string(char *stringRet, GLuint stringMaxLen, const char *string)
>
>
> In my opinion, the name of this function is a bit ambiguous. :-)

Right, I'm not very happy with its naming either. Does
output_string_safe() seem clearer, or output_clipped_string() perhaps?

>> @@ -274,15 +192,22 @@ _mesa_GetPerfQueryIdByNameINTEL(char *queryName,
>> GLuint *queryId)
>>         return;
>>      }
>>   -   /* The specification does not state that this produces an error. */
>> +   /* The specification does not state that this produces an error but
>> +    * to be consistent with glGetFirstPerfQueryIdINTEL we generate an
>> +    * INVALID_VALUE error */
>>      if (!queryId) {
>> -      _mesa_warning(ctx, "glGetPerfQueryIdByNameINTEL(queryId == NULL)");
>> +      _mesa_error(ctx, GL_INVALID_VALUE,
>> +                  "glGetPerfQueryIdByNameINTEL(queryId == NULL)");
>>         return;
>>      }
>>   -   for (i = 0; i < ctx->PerfQuery.NumGroups; ++i) {
>> -      const struct gl_perf_monitor_group *group_obj = get_group(ctx, i);
>> -      if (strcmp(group_obj->Name, queryName) == 0) {
>> +   for (i = 0; i < ctx->PerfQuery.NumQueries; ++i) {
>> +      const GLchar *name;
>> +      GLuint ignore;
>> +
>> +      ctx->Driver.GetPerfQueryInfo(ctx, i, &name, &ignore, &ignore,
>> &ignore);
>
>
> Why these parameters are currently ignored?

Driver.GetPerfQueryInfo() is used in two places within the frontend,
and it's mainly geared as the backend for glGetPerfQueryInfo to get a
query's name, report-size, the number of counters and number of active
queries of that type.

In this case though, we're only interested in the name of a query and
so we are going to ignore the extra info that function returns. For
the sake of avoiding the need for the backed to check for NULL
pointers the frontend gives the backend a dummy destination for the
info being ignored.

>> @@ -294,18 +219,21 @@ _mesa_GetPerfQueryIdByNameINTEL(char *queryName,
>> GLuint *queryId)
>>     extern void GLAPIENTRY
>>   _mesa_GetPerfQueryInfoINTEL(GLuint queryId,
>> -                            GLuint queryNameLength, char *queryName,
>> -                            GLuint *dataSize, GLuint *noCounters,
>> -                            GLuint *noActiveInstances,
>> +                            GLuint nameLength, char *name,
>> +                            GLuint *dataSize,
>> +                            GLuint *numCounters,
>> +                            GLuint *numActive,
>>                               GLuint *capsMask)
>>   {
>>      GET_CURRENT_CONTEXT(ctx);
>> -   unsigned i;
>>   -   const struct gl_perf_monitor_group *group_obj =
>> -      get_group(ctx, queryid_to_index(queryId));
>> +   GLuint queryIndex = queryid_to_index(queryId);
>> +   const GLchar *queryName;
>> +   GLuint queryDataSize;
>> +   GLuint queryNumCounters;
>> +   GLuint queryNumActive;
>>   -   if (group_obj == NULL) {
>> +   if (!queryid_valid(ctx, queryId)) {
>>         /* The GL_INTEL_performance_query spec says:
>>          *
>>          *    "If queryId does not reference a valid query type, an
>> @@ -316,32 +244,19 @@ _mesa_GetPerfQueryInfoINTEL(GLuint queryId,
>>         return;
>>      }
>>   -   if (queryName) {
>> -      strncpy(queryName, group_obj->Name, queryNameLength);
>> +   ctx->Driver.GetPerfQueryInfo(ctx, queryIndex,
>> +                                &queryName,
>> +                                &queryDataSize,
>> +                                &queryNumCounters,
>> +                                &queryNumActive);
>>   -      /* No specification given about whether the string needs to be
>> -       * zero-terminated. Zero-terminate the string always as we don't
>> -       * otherwise communicate the length of the returned string.
>> -       */
>> -      if (queryNameLength > 0) {
>> -         queryName[queryNameLength - 1] = '\0';
>> -      }
>> -   }
>> +   return_string(name, nameLength, queryName);
>>   -   if (dataSize) {
>> -      unsigned size = 0;
>> -      for (i = 0; i < group_obj->NumCounters; ++i) {
>> -         /* What we get from the driver is group id (uint32_t) + counter
>> id
>> -          * (uint32_t) + value.
>> -          */
>> -         size += 2 * sizeof(uint32_t) +
>> _mesa_perf_query_counter_size(&group_obj->Counters[i]);
>> -      }
>> -      *dataSize = size;
>> -   }
>> +   if (dataSize)
>> +      *dataSize = queryDataSize;
>>   -   if (noCounters) {
>> -      *noCounters = group_obj->NumCounters;
>> -   }
>> +   if (numCounters)
>> +      *numCounters = queryNumCounters;
>>        /* The GL_INTEL_performance_query spec says:
>>       *
>> @@ -352,128 +267,93 @@ _mesa_GetPerfQueryInfoINTEL(GLuint queryId,
>>       * 2) Another typo in the specification, maxInstances parameter is
>> not listed
>>       *    in the declaration of this function in the list of new
>> functions.
>>       */
>> -   if (noActiveInstances) {
>> -      *noActiveInstances = _mesa_HashNumEntries(ctx->PerfQuery.Objects);
>> -   }
>> +   if (numActive)
>> +      *numActive = queryNumActive;
>>   -   if (capsMask) {
>> -      /* TODO: This information not yet available in the monitor structs.
>> For
>> -       * now, we hardcode SINGLE_CONTEXT, since that's what the
>> implementation
>> -       * currently tries very hard to do.
>> -       */
>> +   /* Assume for now that all queries are per-context */
>> +   if (capsMask)
>>         *capsMask = GL_PERFQUERY_SINGLE_CONTEXT_INTEL;
>> -   }
>
>
> What is your plan about this? Are you going to support multiple contexts in
> the near future?

I guess the GL_PERFQUERY_SINGLE_CONTEXT_INTEL flag is really a spec
left-over from older generations where it wasn't feasible to filter
per-context metrics. I can't really see a usecase for a GL application
to ever be reporting system wide metrics, so we don't even defer to
the backend to determine this.

I'd only imagine changing this as a last resort if we found some
hardware issue that meant we couldn't reliably collect metrics only
for the GL application's context. That said, I am aware of some HW
constraints on single-context filtering for Haswell which could cause
us trouble, but I'm still investigating the details a.t.m. If we did
hit a problem with using HW single-context filtering we should still
be able to filter the metrics manually in the kernel based on the
scheme we're experimenting with a.t.m to help profile media workloads.
Potentially this flag could be used as a stop-gap if we hit a problem
with single-context filtering with some hardware, before we could
implement a manual filtering strategy.

>>     extern void GLAPIENTRY
>>   _mesa_DeletePerfQueryINTEL(GLuint queryHandle)
>>   {
>>      GET_CURRENT_CONTEXT(ctx);
>> -   struct gl_perf_monitor_object *m;
>>   -   /* The queryHandle is the counterpart to AMD_performance_monitor's
>> monitor
>> -    * id.
>> -    */
>> -   m = lookup_query(ctx, queryHandle);
>> +   struct gl_perf_query_object *obj = lookup_object(ctx, queryHandle);
>
>
> I think you can directly use _mesa_HashLookup() instead of this somewhat
> useless wrapper function.

It looks kinda silly yeah :-) I was going to agree and remove this,
but I noticed when I was checking other uses of _mesa_HashLookup (re:
your question about returning NULL) that there's a pattern in mesa of
having these kinds of lookup wrappers, for the sake of avoiding
casting and to also special case doing a lookup with a handle of 0.
E.g. program.c has a very similar _mesa_lookup_program() with a
comment of "Basically just a wrapper for _mesa_HashLookup() to avoid a
lot of casts elsewhere." Tbh I don't quite understand why casting
would be an issue given that this is C not C++ and _mesa_HashLookup
returns void *, but one thing I should check is whether I also need to
be special casing handles of 0. I would guess it's not though if we're
instead careful to never insert something with handle 0.

I guess I'll either remove this as you suggest, or introduce a special
case for ids of zero.

>>   -   /* Let the driver stop the query if it's active. */
>> -   if (m->Active) {
>> -      ctx->Driver.ResetPerfMonitor(ctx, m);
>> -      m->Ended = false;
>> +   /* To avoid complications in the backed we never ask the backend to
>
>
> Typo: backend

Oops.

>>     extern void GLAPIENTRY
>>   _mesa_BeginPerfQueryINTEL(GLuint queryHandle)
>>   {
>>      GET_CURRENT_CONTEXT(ctx);
>> -   struct gl_perf_monitor_object *m;
>> -
>> -   /* The queryHandle is the counterpart to AMD_performance_monitor's
>> monitor
>> -    * id.
>> -    */
>>   -   m = lookup_query(ctx, queryHandle);
>> +   struct gl_perf_query_object *obj = lookup_object(ctx, queryHandle);
>>        /* The GL_INTEL_performance_query spec says:
>>       *
>>       *    "If a query handle doesn't reference a previously created
>> performance
>>       *    query instance, an INVALID_VALUE error is generated."
>>       */
>> -   if (m == NULL) {
>> +   if (obj == NULL) {
>>         _mesa_error(ctx, GL_INVALID_VALUE,
>>                     "glBeginPerfQueryINTEL(invalid queryHandle)");
>>         return;
>> @@ -628,15 +493,24 @@ _mesa_BeginPerfQueryINTEL(GLuint queryHandle)
>>       * We also generate an INVALID_OPERATION error if the driver can't
>> begin
>>       * a query for its own reasons, and for nesting the same query.
>>       */
>> -   if (m->Active) {
>> +   if (obj->Active) {
>>         _mesa_error(ctx, GL_INVALID_OPERATION,
>>                     "glBeginPerfQueryINTEL(already active)");
>>         return;
>>      }
>>   -   if (ctx->Driver.BeginPerfMonitor(ctx, m)) {
>> -      m->Active = true;
>> -      m->Ended = false;
>> +   /* To avoid complications in the backed we never ask the backend to
>> +    * reuse a query object and begin a new query while we are still
>> +    * waiting for data on that object. */
>
>
> What the spec says about that?
> My understanding is that with your code we can't abort a query, right?

The extension doesn't provide a way to explicitly abort a query except
that it doesn't disallow applications from re-using an object to begin
a query even though it might still be waiting for results. So it's up
to the implementation to decide whether to interpret this as an abort,
or if that's not possible then it's also conformant to interpret as an
implicit wait, before starting a new query.

The idea here to simplify things for the backend, so it never has to
worry about being asked to re-begin a query that hasn't finished yet.
I don't think applications should really be doing this, so I'd like to
avoid having backends potentially overlooking this corner case or
getting it wrong.

I was certainly nervous of the i965 backend handling this early on
where I didn't have the WaitPerfQuery mechanism and was concerned we'd
mess up our tracking of in-flight MI_RPC commands which might lead to
a CS hang if we disabled the OA unit too soon.

Synchronizing with the gpu could potentially be avoided in this case
with more complex accounting in the backed to ignore reports for
previously aborted queries, and maybe for other backends aborting
would be more practical than it is for us, but my preference a.t.m
would be to go with a simple and hopefully robust approach of handling
this in the frontend and perhaps consider deferring this to the
backend later if we find this is too naive.

>> @@ -647,39 +521,32 @@ extern void GLAPIENTRY
>>   _mesa_EndPerfQueryINTEL(GLuint queryHandle)
>>   {
>>      GET_CURRENT_CONTEXT(ctx);
>> -   struct gl_perf_monitor_object *m;
>>   -   /* The queryHandle is the counterpart to AMD_performance_monitor's
>> monitor
>> -    * id.
>> -    */
>> +   struct gl_perf_query_object *obj = lookup_object(ctx, queryHandle);
>>   -   m = lookup_query(ctx, queryHandle);
>> +   /* Not explicitly covered in the spec, but for consistency... */
>> +   if (obj == NULL) {
>> +      _mesa_error(ctx, GL_INVALID_VALUE,
>> +                  "glEndPerfQueryINTEL(invalid queryHandle)");
>> +      return;
>> +   }
>>        /* The GL_INTEL_performance_query spec says:
>>       *
>>       *    "If a performance query is not currently started, an
>>       *    INVALID_OPERATION error will be generated."
>> -    *
>> -    * The specification doesn't state that an invalid handle would be an
>> -    * INVALID_VALUE error. Regardless, query for such a handle will not
>> be
>> -    * started, so we generate an INVALID_OPERATION in that case too.
>>       */
>> -   if (m == NULL) {
>> -      _mesa_error(ctx, GL_INVALID_OPERATION,
>> -                  "glEndPerfQueryINTEL(invalid queryHandle)");
>> -      return;
>> -   }
>>   -   if (!m->Active) {
>> +   if (!obj->Active) {
>>         _mesa_error(ctx, GL_INVALID_OPERATION,
>>                     "glEndPerfQueryINTEL(not active)");
>>         return;
>>      }
>>   -   ctx->Driver.EndPerfMonitor(ctx, m);
>> +   ctx->Driver.EndPerfQuery(ctx, obj);
>>   -   m->Active = false;
>> -   m->Ended = true;
>> +   obj->Active = false;
>> +   obj->Ready = false;
>
>
> What about obj->Used flag?

Used is needed to uniquely identify the state after ending a query but
before results being ready, so !Active && !Ready but we don't want to
confuse that with a query object that simply hasn't been used yet.

I think Used only needs to be set the first time we begin a query to
remove the above ambiguity and then never needs to be reset.

>> @@ -701,66 +575,34 @@ _mesa_GetPerfQueryDataINTEL(GLuint queryHandle,
>> GLuint flags,
>>         return;
>>      }
>>   -   /* The queryHandle is the counterpart to AMD_performance_monitor's
>> monitor
>> -    * id.
>> -    */
>> +   /* Just for good measure in case a lazy application is only
>> +    * checking this and not checking for errors... */
>> +   *bytesWritten = 0;
>>   -   m = lookup_query(ctx, queryHandle);
>> -
>> -   /* The specification doesn't state that an invalid handle generates an
>> -    * error. We could interpret that to mean the case should be handled
>> as
>> -    * "measurement not ready for this query", but what should be done if
>> -    * `flags' equals PERFQUERY_WAIT_INTEL?
>> -    *
>> -    * To resolve this, we just generate an INVALID_VALUE from an invalid
>> query
>> -    * handle.
>> -    */
>> -   if (m == NULL) {
>> -      _mesa_error(ctx, GL_INVALID_VALUE,
>> -                  "glGetPerfQueryDataINTEL(invalid queryHandle)");
>> -      return;
>> -   }
>> -
>> -   /* We need at least enough room for a single value. */
>> -   if (dataSize < sizeof(GLuint)) {
>> -      *bytesWritten = 0;
>> -      return;
>> -   }
>> -
>> -   /* The GL_INTEL_performance_query spec says:
>> -    *
>> -    *    "The call may end without returning any data if they are not
>> ready
>> -    *    for reading as the measurement session is still pending (the
>> -    *    EndPerfQueryINTEL() command processing is not finished by
>> -    *    hardware). In this case location pointed by the bytesWritten
>> -    *    parameter will be set to 0."
>> -    *
>> -    * If EndPerfQueryINTEL() is not called at all, we follow this.
>> +   /* Not explicitly covered in the spec but to be consistent with
>> +    * EndPerfQuery which validates that an application only ends an
>> +    * active query we also validate that an application doesn't try
>> +    * and get the data for a still active query...
>>       */
>> -   if (!m->Ended) {
>> -      *bytesWritten = 0;
>> +   if (obj->Active) {
>> +      _mesa_error(ctx, GL_INVALID_OPERATION,
>> +                  "glGetPerfQueryDataINTEL(query still active)");
>>         return;
>>      }
>>   -   result_available = ctx->Driver.IsPerfMonitorResultAvailable(ctx, m);
>> +   obj->Ready = ctx->Driver.IsPerfQueryReady(ctx, obj);
>>   -   if (!result_available) {
>> +   if (!obj->Ready) {
>>         if (flags == GL_PERFQUERY_FLUSH_INTEL) {
>>            ctx->Driver.Flush(ctx);
>>         } else if (flags == GL_PERFQUERY_WAIT_INTEL) {
>> -         /* Assume Finish() is both enough and not too much to wait for
>> -          * results. If results are still not available after Finish(),
>> the
>> -          * later code automatically bails out with 0 for bytesWritten.
>> -          */
>> -         ctx->Driver.Finish(ctx);
>> -         result_available =
>> -            ctx->Driver.IsPerfMonitorResultAvailable(ctx, m);
>> +         ctx->Driver.WaitPerfQuery(ctx, obj);
>> +         obj->Ready = true;
>>         }
>>      }
>>   -   if (result_available) {
>> -      ctx->Driver.GetPerfMonitorResult(ctx, m, dataSize, data,
>> (GLint*)bytesWritten);
>> -   } else {
>> +   if (obj->Ready)
>> +      ctx->Driver.GetPerfQueryData(ctx, obj, dataSize, data,
>> bytesWritten);
>> +   else
>>         *bytesWritten = 0;
>
>
> This variable is already initialized above.

Ah yes, can remove this.

>
>
>> -   }
>>   }
>> diff --git a/src/mesa/main/performance_query.h
>> b/src/mesa/main/performance_query.h
>> index 3fed5ea..8268f0e 100644
>> --- a/src/mesa/main/performance_query.h
>> +++ b/src/mesa/main/performance_query.h
>> @@ -1,5 +1,5 @@
>>   /*
>> - * Copyright © 2012 Intel Corporation
>> + * Copyright © 2012,2015 Intel Corporation
>>    *
>>    * Permission is hereby granted, free of charge, to any person obtaining
>> a
>>    * copy of this software and associated documentation files (the
>> "Software"),
>> @@ -38,10 +38,6 @@ _mesa_init_performance_queries(struct gl_context *ctx);
>>   extern void
>>   _mesa_free_performance_queries(struct gl_context *ctx);
>>   -unsigned
>> -_mesa_perf_query_counter_size(const struct gl_perf_monitor_counter *);
>> -
>> -
>>   extern void GLAPIENTRY
>>   _mesa_GetFirstPerfQueryIdINTEL(GLuint *queryId);
>>
>
>

Thanks a lot for the comments, I'll look at sending out an updated patch soon.

- Robert