[Mesa-dev] [PATCH 08/11] i965: perf: snapshot RPSTAT1 register

Kenneth Graunke kenneth at whitecape.org
Tue Apr 3 18:20:08 UTC 2018


> >> diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c b/src/mesa/drivers/dri/i965/brw_performance_query.c
> >> index 98666759d75..7d5b44cf61d 100644
> >> --- a/src/mesa/drivers/dri/i965/brw_performance_query.c
> >> +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
> >> @@ -227,6 +227,8 @@ brw_perf_query(struct gl_perf_query_object *o)
> >>   
> >>   #define MI_RPC_BO_SIZE              4096
> >>   #define MI_RPC_BO_END_OFFSET_BYTES  (MI_RPC_BO_SIZE / 2)
> >> +#define MI_FREQ_START_OFFSET_BYTES  (3072)
> >> +#define MI_FREQ_END_OFFSET_BYTES    (3076)
> > Why these?
> 
> That's where I store the RPSTAT copy (before/after the workload).

Yeah, but I mean...why 3072?  Is it some arbtirary number?

> >>   /******************************************************************************/
> >>   
> >> @@ -1150,6 +1152,9 @@ brw_begin_perf_query(struct gl_context *ctx,
> >>         /* Take a starting OA counter snapshot. */
> >>         brw->vtbl.emit_mi_report_perf_count(brw, obj->oa.bo, 0,
> >>                                             obj->oa.begin_report_id);
> >> +      brw_store_register_mem32(brw, obj->oa.bo, GEN6_RPSTAT1,
> >> +                               MI_FREQ_START_OFFSET_BYTES);
> >> +
> >>         ++brw->perfquery.n_active_oa_queries;
> >>   
> >>         /* No already-buffered samples can possibly be associated with this query
> >> @@ -1233,6 +1238,8 @@ brw_end_perf_query(struct gl_context *ctx,
> >>          */
> >>         if (!obj->oa.results_accumulated) {
> >>            /* Take an ending OA counter snapshot. */
> >> +         brw_store_register_mem32(brw, obj->oa.bo, GEN6_RPSTAT1,
> >> +                                  MI_FREQ_END_OFFSET_BYTES);
> >>            brw->vtbl.emit_mi_report_perf_count(brw, obj->oa.bo,
> >>                                                MI_RPC_BO_END_OFFSET_BYTES,
> >>                                                obj->oa.begin_report_id + 1);
> >> @@ -1333,6 +1340,43 @@ brw_is_perf_query_ready(struct gl_context *ctx,
> >>      return false;
> >>   }
> >>   
> >> +static void
> >> +read_gt_frequency(struct brw_context *brw,
> >> +                  struct brw_perf_query_object *obj)
> >> +{
> >> +   const struct gen_device_info *devinfo = &brw->screen->devinfo;
> >> +   uint32_t *start_reg = obj->oa.map + MI_FREQ_START_OFFSET_BYTES,
> >> +      *end_reg = obj->oa.map + MI_FREQ_END_OFFSET_BYTES;
> >> +
> >> +   switch (devinfo->gen) {
> >> +   case 7:
> >> +   case 8:
> >> +      obj->oa.gt_frequency[0] =
> >> +         ((start_reg[0] & GEN6_RPSTAT1_CURR_GT_FREQ_MASK) >>
> >> +          GEN6_RPSTAT1_CURR_GT_FREQ_SHIFT) * 50ULL;
> > You can just do:
> >
> >    GET_FIELD(start_reg[0], GEN6_RPSTAT1_CURR_GT_FREQ)
> >
> > instead of shifting and masking.
> >
> > I think your conversions may be wrong.  In particular, you don't handle
> > Gen9LP and Gen9 differently, while in the kernel, GT_PM_INTERVAL_TO_US
> > does:
> >
> >    Gen9 LP:      0.833 -> usec
> >    Gen9+ non-LP: 1.33  -> usec
> >    other:        1.28  -> usec
> >
> > #define INTERVAL_1_28_TO_US(interval)  (((interval) << 7) / 100)
> > #define INTERVAL_1_33_TO_US(interval)  (((interval) << 2) / 3)
> > #define INTERVAL_0_833_TO_US(interval) (((interval) * 5)  / 6)
> > #define GT_PM_INTERVAL_TO_US(dev_priv, interval) (INTEL_GEN(dev_priv) >= 9 ? \
> >                             (IS_GEN9_LP(dev_priv) ? \
> >                             INTERVAL_0_833_TO_US(interval) : \
> >                             INTERVAL_1_33_TO_US(interval)) : \
> >                             INTERVAL_1_28_TO_US(interval))
> >
> > I could be mistaken, though.
> 
> Actually the kernel reads rpstat1 already and computes the frequency value.
> I think the current code is equivalent to what the kernel does on big 
> cores & small cores >= gen9.

So...we need to multiply by 50, but don't need to convert to usec?
Otherwise I'm struggling to see how this is equivalent.

> On cherryview/valleyview, we need to read another register to figure out 
> the multipliers...
> 
> So I'll just leave it out for those small cores gens for now.

That seems reasonable...
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: This is a digitally signed message part.
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20180403/717fbb36/attachment.sig>


More information about the mesa-dev mailing list