[Mesa-dev] [PATCH 1/3] mesa/main: allow delayed initialization of performance monitors

Wed Nov 25 05:21:02 PST 2015

On 11/25/2015 02:17 PM, Nicolai Hähnle wrote:
> On 25.11.2015 14:10, Samuel Pitoiset wrote:
>> This is definitely a good performance improvement at initialization
>> time. This is not going to affect Nouveau because we don't have as much
>> as performance counters as Radeon, but it's great anyway.
>>
>> One comment below.
>>
>> Reviewed-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
>>
>> On 11/25/2015 01:00 PM, Nicolai Hähnle wrote:
>>> Most applications never use performance counters, so allow drivers to
>>> skip potentially expensive initialization steps.
>>>
>>> A driver that wants to use this must enable the appropriate extension(s)
>>> at context initialization and set the InitPerfMonitorGroups driver
>>> function
>>> which will be called the first time information about the performance
>>> monitor
>>> groups is actually used.
>>>
>>> The init_groups helper is called for API functions that can be called
>>> before
>>> a monitor object exists. Functions that require an existing monitor
>>> object
>>> can rely on init_groups having been called before.
>>> ---
>>>   src/mesa/main/dd.h                  |  1 +
>>>   src/mesa/main/performance_monitor.c | 39
>>> +++++++++++++++++++++++++++++++++----
>>>   2 files changed, 36 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
>>> index 496a14f..e5281ce 100644
>>> --- a/src/mesa/main/dd.h
>>> +++ b/src/mesa/main/dd.h
>>> @@ -727,6 +727,7 @@ struct dd_function_table {
>>>       * \name Performance monitors
>>>       */
>>>      /*@{*/
>>> +   void (*InitPerfMonitorGroups)(struct gl_context *ctx);
>>
>> Just a minor comment that your are free to accept or reject. :-)
>> Why don't just call that function InitPerfMonitor()?
>
> Mesa has a function _mesa_init_performance_monitors which initializes
> the monitor object hash, and so I chose the name to make it obvious that
> there is a distinction. I don't feel strongly either way, though.

Fine by me. This doesn't matter.

>
> Cheers,
> Nicolai
>
>>>      struct gl_perf_monitor_object * (*NewPerfMonitor)(struct
>>> gl_context *ctx);
>>>      void (*DeletePerfMonitor)(struct gl_context *ctx,
>>>                                struct gl_perf_monitor_object *m);
>>> diff --git a/src/mesa/main/performance_monitor.c
>>> b/src/mesa/main/performance_monitor.c
>>> index 2d740da..98dfbea 100644
>>> --- a/src/mesa/main/performance_monitor.c
>>> +++ b/src/mesa/main/performance_monitor.c
>>> @@ -53,6 +53,13 @@ _mesa_init_performance_monitors(struct gl_context
>>> *ctx)
>>>      ctx->PerfMonitor.Groups = NULL;
>>>   }
>>>
>>> +static inline void
>>> +init_groups(struct gl_context *ctx)
>>> +{
>>> +   if (unlikely(!ctx->PerfMonitor.Groups))
>>> +      ctx->Driver.InitPerfMonitorGroups(ctx);
>>> +}
>>> +
>>>   static struct gl_perf_monitor_object *
>>>   new_performance_monitor(struct gl_context *ctx, GLuint index)
>>>   {
>>> @@ -171,6 +178,7 @@ _mesa_GetPerfMonitorGroupsAMD(GLint *numGroups,
>>> GLsizei groupsSize,
>>>                                 GLuint *groups)
>>>   {
>>>      GET_CURRENT_CONTEXT(ctx);
>>> +   init_groups(ctx);
>>>
>>>      if (numGroups != NULL)
>>>         *numGroups = ctx->PerfMonitor.NumGroups;
>>> @@ -191,7 +199,11 @@ _mesa_GetPerfMonitorCountersAMD(GLuint group,
>>> GLint *numCounters,
>>>                                   GLsizei countersSize, GLuint
>>> *counters)
>>>   {
>>>      GET_CURRENT_CONTEXT(ctx);
>>> -   const struct gl_perf_monitor_group *group_obj = get_group(ctx,
>>> group);
>>> +   const struct gl_perf_monitor_group *group_obj;
>>> +
>>> +   init_groups(ctx);
>>> +
>>> +   group_obj = get_group(ctx, group);
>>>      if (group_obj == NULL) {
>>>         _mesa_error(ctx, GL_INVALID_VALUE,
>>>                     "glGetPerfMonitorCountersAMD(invalid group)");
>>> @@ -219,9 +231,11 @@ _mesa_GetPerfMonitorGroupStringAMD(GLuint group,
>>> GLsizei bufSize,
>>>                                      GLsizei *length, GLchar
>>> *groupString)
>>>   {
>>>      GET_CURRENT_CONTEXT(ctx);
>>> +   const struct gl_perf_monitor_group *group_obj;
>>>
>>> -   const struct gl_perf_monitor_group *group_obj = get_group(ctx,
>>> group);
>>> +   init_groups(ctx);
>>>
>>> +   group_obj = get_group(ctx, group);
>>>      if (group_obj == NULL) {
>>>         _mesa_error(ctx, GL_INVALID_VALUE,
>>> "glGetPerfMonitorGroupStringAMD");
>>>         return;
>>> @@ -251,6 +265,8 @@ _mesa_GetPerfMonitorCounterStringAMD(GLuint group,
>>> GLuint counter,
>>>      const struct gl_perf_monitor_group *group_obj;
>>>      const struct gl_perf_monitor_counter *counter_obj;
>>>
>>> +   init_groups(ctx);
>>> +
>>>      group_obj = get_group(ctx, group);
>>>
>>>      if (group_obj == NULL) {
>>> @@ -290,6 +306,8 @@ _mesa_GetPerfMonitorCounterInfoAMD(GLuint group,
>>> GLuint counter, GLenum pname,
>>>      const struct gl_perf_monitor_group *group_obj;
>>>      const struct gl_perf_monitor_counter *counter_obj;
>>>
>>> +   init_groups(ctx);
>>> +
>>>      group_obj = get_group(ctx, group);
>>>
>>>      if (group_obj == NULL) {
>>> @@ -353,6 +371,8 @@ _mesa_GenPerfMonitorsAMD(GLsizei n, GLuint
>>> *monitors)
>>>      if (MESA_VERBOSE & VERBOSE_API)
>>>         _mesa_debug(ctx, "glGenPerfMonitorsAMD(%d)\n", n);
>>>
>>> +   init_groups(ctx);
>>> +
>>>      if (n < 0) {
>>>         _mesa_error(ctx, GL_INVALID_VALUE, "glGenPerfMonitorsAMD(n <
>>> 0)");
>>>         return;
>>> @@ -673,6 +693,8 @@ _mesa_GetFirstPerfQueryIdINTEL(GLuint *queryId)
>>>      GET_CURRENT_CONTEXT(ctx);
>>>      unsigned numGroups;
>>>
>>> +   init_groups(ctx);
>>> +
>>>      /* The GL_INTEL_performance_query spec says:
>>>       *
>>>       *    "If queryId pointer is equal to 0, INVALID_VALUE error is
>>> generated."
>>> @@ -705,6 +727,7 @@ extern void GLAPIENTRY
>>>   _mesa_GetNextPerfQueryIdINTEL(GLuint queryId, GLuint *nextQueryId)
>>>   {
>>>      GET_CURRENT_CONTEXT(ctx);
>>> +   init_groups(ctx);
>>>
>>>      /* The GL_INTEL_performance_query spec says:
>>>       *
>>> @@ -744,6 +767,8 @@ _mesa_GetPerfQueryIdByNameINTEL(char *queryName,
>>> GLuint *queryId)
>>>      GET_CURRENT_CONTEXT(ctx);
>>>      unsigned i;
>>>
>>> +   init_groups(ctx);
>>> +
>>>      /* The GL_INTEL_performance_query spec says:
>>>       *
>>>       *    "If queryName does not reference a valid query name, an
>>> INVALID_VALUE
>>> @@ -783,9 +808,11 @@ _mesa_GetPerfQueryInfoINTEL(GLuint queryId,
>>>      GET_CURRENT_CONTEXT(ctx);
>>>      unsigned i;
>>>
>>> -   const struct gl_perf_monitor_group *group_obj =
>>> -      get_group(ctx, queryid_to_index(queryId));
>>> +   const struct gl_perf_monitor_group *group_obj;
>>>
>>> +   init_groups(ctx);
>>> +
>>> +   group_obj = get_group(ctx, queryid_to_index(queryId));
>>>      if (group_obj == NULL) {
>>>         /* The GL_INTEL_performance_query spec says:
>>>          *
>>> @@ -860,6 +887,8 @@ _mesa_GetPerfCounterInfoINTEL(GLuint queryId,
>>> GLuint counterId,
>>>      unsigned counterIndex;
>>>      unsigned i;
>>>
>>> +   init_groups(ctx);
>>> +
>>>      group_obj = get_group(ctx, queryid_to_index(queryId));
>>>
>>>      /* The GL_INTEL_performance_query spec says:
>>> @@ -979,6 +1008,8 @@ _mesa_CreatePerfQueryINTEL(GLuint queryId, GLuint
>>> *queryHandle)
>>>      struct gl_perf_monitor_object *m;
>>>      unsigned i;
>>>
>>> +   init_groups(ctx);
>>> +
>>>      /* This is not specified in the extension, but is the only sane
>>> thing to
>>>       * do.
>>>       */
>>>
>>
>

-- 
-Samuel