[Mesa-dev] [PATCH] nvc0: add support for performance monitoring metrics on Fermi
Samuel Pitoiset
samuel.pitoiset at gmail.com
Fri Oct 16 15:09:19 PDT 2015
On 10/16/2015 11:57 PM, Ilia Mirkin wrote:
> On Fri, Oct 16, 2015 at 5:35 PM, Samuel Pitoiset
> <samuel.pitoiset at gmail.com> wrote:
>>
>> On 10/16/2015 11:22 PM, Ilia Mirkin wrote:
>>> On Fri, Oct 16, 2015 at 5:29 PM, Samuel Pitoiset
>>> <samuel.pitoiset at gmail.com> wrote:
>>>> As explained in the CUDA toolkit documentation, "a metric is a
>>>> characteristic of an application that is calculated from one or more
>>>> event values."
>>>>
>>>> Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
>>>> ---
>>>> src/gallium/drivers/nouveau/Makefile.sources | 2 +
>>>> src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c | 19 +-
>>>> .../drivers/nouveau/nvc0/nvc0_query_hw_metric.c | 444
>>>> +++++++++++++++++++++
>>>> .../drivers/nouveau/nvc0/nvc0_query_hw_metric.h | 42 ++
>>>> 4 files changed, 504 insertions(+), 3 deletions(-)
>>>> create mode 100644
>>>> src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
>>>> create mode 100644
>>>> src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.h
>>>>
>>>> diff --git a/src/gallium/drivers/nouveau/Makefile.sources
>>>> b/src/gallium/drivers/nouveau/Makefile.sources
>>>> index edc6cf4..c18e9f5 100644
>>>> --- a/src/gallium/drivers/nouveau/Makefile.sources
>>>> +++ b/src/gallium/drivers/nouveau/Makefile.sources
>>>> @@ -154,6 +154,8 @@ NVC0_C_SOURCES := \
>>>> nvc0/nvc0_query.h \
>>>> nvc0/nvc0_query_hw.c \
>>>> nvc0/nvc0_query_hw.h \
>>>> + nvc0/nvc0_query_hw_metric.c \
>>>> + nvc0/nvc0_query_hw_metric.h \
>>>> nvc0/nvc0_query_hw_sm.c \
>>>> nvc0/nvc0_query_hw_sm.h \
>>>> nvc0/nvc0_query_sw.c \
>>>> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
>>>> b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
>>>> index 91254be..90ee82f 100644
>>>> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
>>>> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
>>>> @@ -25,6 +25,7 @@
>>>>
>>>> #include "nvc0/nvc0_context.h"
>>>> #include "nvc0/nvc0_query_hw.h"
>>>> +#include "nvc0/nvc0_query_hw_metric.h"
>>>> #include "nvc0/nvc0_query_hw_sm.h"
>>>>
>>>> #define NVC0_HW_QUERY_STATE_READY 0
>>>> @@ -371,6 +372,12 @@ nvc0_hw_create_query(struct nvc0_context *nvc0,
>>>> unsigned type, unsigned index)
>>>> return (struct nvc0_query *)hq;
>>>> }
>>>>
>>>> + hq = nvc0_hw_metric_create_query(nvc0, type);
>>>> + if (hq) {
>>>> + hq->base.funcs = &hw_query_funcs;
>>>> + return (struct nvc0_query *)hq;
>>>> + }
>>>> +
>>>> hq = CALLOC_STRUCT(nvc0_hw_query);
>>>> if (!hq)
>>>> return NULL;
>>>> @@ -435,14 +442,20 @@ int
>>>> nvc0_hw_get_driver_query_info(struct nvc0_screen *screen, unsigned id,
>>>> struct pipe_driver_query_info *info)
>>>> {
>>>> - int num_hw_sm_queries = 0;
>>>> + int num_hw_sm_queries = 0, num_hw_metric_queries = 0;
>>>>
>>>> num_hw_sm_queries = nvc0_hw_sm_get_driver_query_info(screen, 0,
>>>> NULL);
>>>> + num_hw_metric_queries =
>>>> + nvc0_hw_metric_get_driver_query_info(screen, 0, NULL);
>>>>
>>>> if (!info)
>>>> - return num_hw_sm_queries;
>>>> + return num_hw_sm_queries + num_hw_metric_queries;
>>>> +
>>>> + if (id < num_hw_sm_queries)
>>>> + return nvc0_hw_sm_get_driver_query_info(screen, id, info);
>>>>
>>>> - return nvc0_hw_sm_get_driver_query_info(screen, id, info);
>>>> + return nvc0_hw_metric_get_driver_query_info(screen,
>>>> + id - num_hw_sm_queries,
>>>> info);
>>>> }
>>>>
>>>> void
>>>> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
>>>> b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
>>>> new file mode 100644
>>>> index 0000000..dbe350a
>>>> --- /dev/null
>>>> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
>>>> @@ -0,0 +1,444 @@
>>>> +/*
>>>> + * Copyright 2015 Samuel Pitoiset
>>>> + *
>>>> + * Permission is hereby granted, free of charge, to any person obtaining
>>>> a
>>>> + * copy of this software and associated documentation files (the
>>>> "Software"),
>>>> + * to deal in the Software without restriction, including without
>>>> limitation
>>>> + * the rights to use, copy, modify, merge, publish, distribute,
>>>> sublicense,
>>>> + * and/or sell copies of the Software, and to permit persons to whom the
>>>> + * Software is furnished to do so, subject to the following conditions:
>>>> + *
>>>> + * The above copyright notice and this permission notice shall be
>>>> included in
>>>> + * all copies or substantial portions of the Software.
>>>> + *
>>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
>>>> EXPRESS OR
>>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>>>> MERCHANTABILITY,
>>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
>>>> SHALL
>>>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
>>>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
>>>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
>>>> + * OTHER DEALINGS IN THE SOFTWARE.
>>>> + */
>>>> +
>>>> +#include "nvc0/nvc0_context.h"
>>>> +#include "nvc0/nvc0_query_hw_metric.h"
>>>> +#include "nvc0/nvc0_query_hw_sm.h"
>>>> +
>>>> +/* === PERFORMANCE MONITORING METRICS for NVC0:NVE4 === */
>>>> +static const char *nvc0_hw_metric_names[] =
>>>> +{
>>>> + "metric-achieved_occupancy",
>>>> + "metric-branch_efficiency",
>>>> + "metric-inst_issued",
>>>> + "metric-inst_per_wrap",
>>>> + "metric-inst_replay_overhead",
>>>> + "metric-issued_ipc",
>>>> + "metric-issue_slots",
>>>> + "metric-issue_slot_utilization",
>>>> + "metric-ipc",
>>>> +};
>>>> +
>>>> +struct nvc0_hw_metric_query_cfg {
>>>> + uint32_t queries[8];
>>>> + uint32_t num_queries;
>>>> +};
>>>> +
>>>> +#define _SM(n) NVC0_HW_SM_QUERY(NVC0_HW_SM_QUERY_ ##n)
>>>> +#define _M(n, c) [NVC0_HW_METRIC_QUERY_##n] = c
>>>> +
>>>> +/* ==== Compute capability 2.0 (GF100/GF110) ==== */
>>>> +static const struct nvc0_hw_metric_query_cfg
>>>> +sm20_achieved_occupancy =
>>>> +{
>>>> + .queries[0] = _SM(ACTIVE_WARPS),
>>>> + .queries[1] = _SM(ACTIVE_CYCLES),
>>>> + .num_queries = 2,
>>>> +};
>>>> +
>>>> +static const struct nvc0_hw_metric_query_cfg
>>>> +sm20_branch_efficiency =
>>>> +{
>>>> + .queries[0] = _SM(BRANCH),
>>>> + .queries[1] = _SM(DIVERGENT_BRANCH),
>>>> + .num_queries = 2,
>>>> +};
>>>> +
>>>> +static const struct nvc0_hw_metric_query_cfg
>>>> +sm20_inst_per_wrap =
>>>> +{
>>>> + .queries[0] = _SM(INST_EXECUTED),
>>>> + .queries[1] = _SM(WARPS_LAUNCHED),
>>>> + .num_queries = 2,
>>>> +};
>>>> +
>>>> +static const struct nvc0_hw_metric_query_cfg
>>>> +sm20_inst_replay_overhead =
>>>> +{
>>>> + .queries[0] = _SM(INST_ISSUED),
>>>> + .queries[1] = _SM(INST_EXECUTED),
>>>> + .num_queries = 2,
>>>> +};
>>>> +
>>>> +static const struct nvc0_hw_metric_query_cfg
>>>> +sm20_issued_ipc =
>>>> +{
>>>> + .queries[0] = _SM(INST_ISSUED),
>>>> + .queries[1] = _SM(ACTIVE_CYCLES),
>>>> + .num_queries = 2,
>>>> +};
>>>> +
>>>> +static const struct nvc0_hw_metric_query_cfg
>>>> +sm20_ipc =
>>>> +{
>>>> + .queries[0] = _SM(INST_EXECUTED),
>>>> + .queries[1] = _SM(ACTIVE_CYCLES),
>>>> + .num_queries = 2,
>>>> +};
>>>> +
>>>> +static const struct nvc0_hw_metric_query_cfg *sm20_hw_metric_queries[] =
>>>> +{
>>>> + _M(ACHIEVED_OCCUPANCY, &sm20_achieved_occupancy),
>>>> + _M(BRANCH_EFFICIENCY, &sm20_branch_efficiency),
>>>> + _M(INST_ISSUED, NULL),
>>>> + _M(INST_PER_WRAP, &sm20_inst_per_wrap),
>>>> + _M(INST_REPLAY_OVERHEAD, &sm20_inst_replay_overhead),
>>>> + _M(ISSUED_IPC, &sm20_issued_ipc),
>>>> + _M(ISSUE_SLOTS, NULL),
>>>> + _M(ISSUE_SLOT_UTILIZATION, &sm20_issued_ipc),
>>>> + _M(IPC, &sm20_ipc),
>>>> +};
>>>> +
>>>> +/* ==== Compute capability 2.1 (GF108+ except GF110) ==== */
>>>> +static const struct nvc0_hw_metric_query_cfg
>>>> +sm21_inst_issued =
>>>> +{
>>>> + .queries[0] = _SM(INST_ISSUED1_0),
>>>> + .queries[1] = _SM(INST_ISSUED1_1),
>>>> + .queries[2] = _SM(INST_ISSUED2_0),
>>>> + .queries[3] = _SM(INST_ISSUED2_1),
>>>> + .num_queries = 4,
>>>> +};
>>>> +
>>>> +static const struct nvc0_hw_metric_query_cfg
>>>> +sm21_inst_replay_overhead =
>>>> +{
>>>> + .queries[0] = _SM(INST_ISSUED1_0),
>>>> + .queries[1] = _SM(INST_ISSUED1_1),
>>>> + .queries[2] = _SM(INST_ISSUED2_0),
>>>> + .queries[3] = _SM(INST_ISSUED2_1),
>>>> + .queries[4] = _SM(INST_EXECUTED),
>>>> + .num_queries = 5,
>>>> +};
>>>> +
>>>> +static const struct nvc0_hw_metric_query_cfg
>>>> +sm21_issued_ipc =
>>>> +{
>>>> + .queries[0] = _SM(INST_ISSUED1_0),
>>>> + .queries[1] = _SM(INST_ISSUED1_1),
>>>> + .queries[2] = _SM(INST_ISSUED2_0),
>>>> + .queries[3] = _SM(INST_ISSUED2_1),
>>>> + .queries[4] = _SM(ACTIVE_CYCLES),
>>>> + .num_queries = 5,
>>>> +};
>>>> +
>>>> +static const struct nvc0_hw_metric_query_cfg *sm21_hw_metric_queries[] =
>>>> +{
>>>> + _M(ACHIEVED_OCCUPANCY, &sm20_achieved_occupancy),
>>>> + _M(BRANCH_EFFICIENCY, &sm20_branch_efficiency),
>>>> + _M(INST_ISSUED, &sm21_inst_issued),
>>>> + _M(INST_PER_WRAP, &sm20_inst_per_wrap),
>>>> + _M(INST_REPLAY_OVERHEAD, &sm21_inst_replay_overhead),
>>>> + _M(ISSUED_IPC, &sm21_issued_ipc),
>>>> + _M(ISSUE_SLOTS, &sm21_inst_issued),
>>>> + _M(ISSUE_SLOT_UTILIZATION, &sm21_issued_ipc),
>>>> + _M(IPC, &sm20_ipc),
>>>> +};
>>>> +
>>>> +#undef _SM
>>>> +#undef _M
>>>> +
>>>> +static inline const struct nvc0_hw_metric_query_cfg **
>>>> +nvc0_hw_metric_get_queries(struct nvc0_screen *screen)
>>>> +{
>>>> + struct nouveau_device *dev = screen->base.device;
>>>> +
>>>> + if (dev->chipset == 0xc0 || dev->chipset == 0xc8)
>>>> + return sm20_hw_metric_queries;
>>>> + return sm21_hw_metric_queries;
>>>> +}
>>>> +
>>>> +static const struct nvc0_hw_metric_query_cfg *
>>>> +nvc0_hw_metric_query_get_cfg(struct nvc0_context *nvc0,
>>>> + struct nvc0_hw_query *hq)
>>>> +{
>>>> + const struct nvc0_hw_metric_query_cfg **queries;
>>>> + struct nvc0_screen *screen = nvc0->screen;
>>>> + struct nvc0_query *q = &hq->base;
>>>> +
>>>> + queries = nvc0_hw_metric_get_queries(screen);
>>>> + return queries[q->type - NVC0_HW_METRIC_QUERY(0)];
>>>> +}
>>>> +
>>>> +static void
>>>> +nvc0_hw_metric_destroy_query(struct nvc0_context *nvc0,
>>>> + struct nvc0_hw_query *hq)
>>>> +{
>>>> + struct nvc0_hw_metric_query *hmq = nvc0_hw_metric_query(hq);
>>>> + unsigned i;
>>>> +
>>>> + for (i = 0; i < hmq->num_queries; i++)
>>>> + hmq->queries[i]->funcs->destroy_query(nvc0, hmq->queries[i]);
>>>> + FREE(hmq);
>>>> +}
>>>> +
>>>> +static boolean
>>>> +nvc0_hw_metric_begin_query(struct nvc0_context *nvc0, struct
>>>> nvc0_hw_query *hq)
>>>> +{
>>>> + struct nvc0_hw_metric_query *hmq = nvc0_hw_metric_query(hq);
>>>> + boolean ret = false;
>>>> + unsigned i;
>>>> +
>>>> + for (i = 0; i < hmq->num_queries; i++) {
>>>> + ret = hmq->queries[i]->funcs->begin_query(nvc0, hmq->queries[i]);
>>>> + if (!ret)
>>>> + return ret;
>>>> + }
>>>> + return ret;
>>>> +}
>>>> +
>>>> +static void
>>>> +nvc0_hw_metric_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query
>>>> *hq)
>>>> +{
>>>> + struct nvc0_hw_metric_query *hmq = nvc0_hw_metric_query(hq);
>>>> + unsigned i;
>>>> +
>>>> + for (i = 0; i < hmq->num_queries; i++)
>>>> + hmq->queries[i]->funcs->end_query(nvc0, hmq->queries[i]);
>>>> +}
>>>> +
>>>> +static uint64_t
>>>> +sm20_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8])
>>>> +{
>>>> + uint64_t value = 0;
>>>> +
>>>> + switch (hq->base.type - NVC0_HW_METRIC_QUERY(0)) {
>>>> + case NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY:
>>>> + /* (active_warps / active_cycles) / max. number of warps on a MP
>>>> */
>>>> + if (res64[1])
>>>> + value = (res64[0] / (float)res64[1]) / 48;
>>> Why isn't this all just "return ...." and then a return 0 at the end?
>>> i.e. why do you have the value variable?
>>
>> Oh yes! it's better indeed.
>>
>>> Also I don't know how big these values get, but you might want to use
>>> doubles instead of floats.
>>
>> The HUD currently only supports 64-bits integer, and float will be enough in
>> my opinion.
>>
> OK. One issue is that float only accurately represents integers up to
> 1<<23, so if you do (float)res64[0] / (float)res64[1], and either is
> outside that range, you'll lose accuracy on the division.
Well, I'll use doubles instead.
More information about the mesa-dev
mailing list