[Mesa-dev] [PATCH 08/11] i965: perf: snapshot RPSTAT1 register

Kenneth Graunke kenneth at whitecape.org
Fri Mar 23 18:00:10 UTC 2018


On Thursday, March 8, 2018 7:42:53 AM PDT Lionel Landwerlin wrote:
> This register contains the frequency of the GT, it's one of the value
> GPA would like to have as part of their queries.
> 
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
> ---
>  src/mesa/drivers/dri/i965/brw_defines.h           | 10 +++++
>  src/mesa/drivers/dri/i965/brw_performance_query.c | 45 +++++++++++++++++++++++
>  src/mesa/drivers/dri/i965/brw_performance_query.h |  5 +++
>  3 files changed, 60 insertions(+)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
> index 8bf6f68b67c..ead44ebc5e8 100644
> --- a/src/mesa/drivers/dri/i965/brw_defines.h
> +++ b/src/mesa/drivers/dri/i965/brw_defines.h
> @@ -1656,6 +1656,16 @@ enum brw_pixel_shader_coverage_mask_mode {
>  #define CS_DEBUG_MODE2                     0x20d8 /* Gen9+ */
>  # define CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE (1 << 4)
>  
> +#define GEN6_RPSTAT1                       0xA01C
> +#define  GEN6_RPSTAT1_CURR_GT_FREQ_SHIFT   7
> +#define  GEN6_RPSTAT1_CURR_GT_FREQ_MASK    INTEL_MASK(13, 7)
> +#define  GEN6_RPSTAT1_PREV_GT_FREQ_SHIFT   0
> +#define  GEN6_RPSTAT1_PREV_GT_FREQ_MASK    INTEL_MASK(6, 0)
> +#define  GEN9_RPSTAT1_CURR_GT_FREQ_SHIFT   23
> +#define  GEN9_RPSTAT1_CURR_GT_FREQ_MASK    INTEL_MASK(31, 23)
> +#define  GEN9_RPSTAT1_PREV_GT_FREQ_SHIFT   0
> +#define  GEN9_RPSTAT1_PREV_GT_FREQ_MASK    INTEL_MASK(8, 0)
> +

I can confirm that Haswell->Broadwell use 13:7 and 6:0, while
Skylake and Cannonlake use 31:23 and 8:0.  They apparently call this
RPSTAT1 on Haswell and RP_STATUS0 on Gen8+.

These are the wrong masks for Sandybridge, so I would not call them
GEN6_*.  The kernel has code for Sandybridge if we wanted to handle it,
but it looks like we don't expose OA on Sandybridge anyway, so there's
likely little point.

Baytrail and Cherryview should both be excluded, as you have to read the
current frequency from the PUnit.  Broxton and all others should work.

>  #define SLICE_COMMON_ECO_CHICKEN1          0x731c /* Gen9+ */
>  # define GLK_SCEC_BARRIER_MODE_GPGPU       (0 << 7)
>  # define GLK_SCEC_BARRIER_MODE_3D_HULL     (1 << 7)
> diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c b/src/mesa/drivers/dri/i965/brw_performance_query.c
> index 98666759d75..7d5b44cf61d 100644
> --- a/src/mesa/drivers/dri/i965/brw_performance_query.c
> +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
> @@ -227,6 +227,8 @@ brw_perf_query(struct gl_perf_query_object *o)
>  
>  #define MI_RPC_BO_SIZE              4096
>  #define MI_RPC_BO_END_OFFSET_BYTES  (MI_RPC_BO_SIZE / 2)
> +#define MI_FREQ_START_OFFSET_BYTES  (3072)
> +#define MI_FREQ_END_OFFSET_BYTES    (3076)

Why these?

>  /******************************************************************************/
>  
> @@ -1150,6 +1152,9 @@ brw_begin_perf_query(struct gl_context *ctx,
>        /* Take a starting OA counter snapshot. */
>        brw->vtbl.emit_mi_report_perf_count(brw, obj->oa.bo, 0,
>                                            obj->oa.begin_report_id);
> +      brw_store_register_mem32(brw, obj->oa.bo, GEN6_RPSTAT1,
> +                               MI_FREQ_START_OFFSET_BYTES);
> +
>        ++brw->perfquery.n_active_oa_queries;
>  
>        /* No already-buffered samples can possibly be associated with this query
> @@ -1233,6 +1238,8 @@ brw_end_perf_query(struct gl_context *ctx,
>         */
>        if (!obj->oa.results_accumulated) {
>           /* Take an ending OA counter snapshot. */
> +         brw_store_register_mem32(brw, obj->oa.bo, GEN6_RPSTAT1,
> +                                  MI_FREQ_END_OFFSET_BYTES);
>           brw->vtbl.emit_mi_report_perf_count(brw, obj->oa.bo,
>                                               MI_RPC_BO_END_OFFSET_BYTES,
>                                               obj->oa.begin_report_id + 1);
> @@ -1333,6 +1340,43 @@ brw_is_perf_query_ready(struct gl_context *ctx,
>     return false;
>  }
>  
> +static void
> +read_gt_frequency(struct brw_context *brw,
> +                  struct brw_perf_query_object *obj)
> +{
> +   const struct gen_device_info *devinfo = &brw->screen->devinfo;
> +   uint32_t *start_reg = obj->oa.map + MI_FREQ_START_OFFSET_BYTES,
> +      *end_reg = obj->oa.map + MI_FREQ_END_OFFSET_BYTES;
> +
> +   switch (devinfo->gen) {
> +   case 7:
> +   case 8:
> +      obj->oa.gt_frequency[0] =
> +         ((start_reg[0] & GEN6_RPSTAT1_CURR_GT_FREQ_MASK) >>
> +          GEN6_RPSTAT1_CURR_GT_FREQ_SHIFT) * 50ULL;

You can just do:

  GET_FIELD(start_reg[0], GEN6_RPSTAT1_CURR_GT_FREQ)

instead of shifting and masking.

I think your conversions may be wrong.  In particular, you don't handle
Gen9LP and Gen9 differently, while in the kernel, GT_PM_INTERVAL_TO_US
does:

  Gen9 LP:      0.833 -> usec
  Gen9+ non-LP: 1.33  -> usec
  other:        1.28  -> usec

#define INTERVAL_1_28_TO_US(interval)  (((interval) << 7) / 100)
#define INTERVAL_1_33_TO_US(interval)  (((interval) << 2) / 3)
#define INTERVAL_0_833_TO_US(interval) (((interval) * 5)  / 6)
#define GT_PM_INTERVAL_TO_US(dev_priv, interval) (INTEL_GEN(dev_priv) >= 9 ? \
                           (IS_GEN9_LP(dev_priv) ? \
                           INTERVAL_0_833_TO_US(interval) : \
                           INTERVAL_1_33_TO_US(interval)) : \
                           INTERVAL_1_28_TO_US(interval))

I could be mistaken, though.

> +      obj->oa.gt_frequency[1] =
> +         ((end_reg[0] & GEN6_RPSTAT1_CURR_GT_FREQ_MASK) >>
> +          GEN6_RPSTAT1_CURR_GT_FREQ_SHIFT) * 50ULL;
> +      break;
> +   case 9:
> +   case 10:
> +   case 11:
> +      obj->oa.gt_frequency[0] =
> +         ((start_reg[0] & GEN9_RPSTAT1_CURR_GT_FREQ_MASK) >>
> +          GEN9_RPSTAT1_CURR_GT_FREQ_SHIFT) * 100ULL / 6ULL;
> +      obj->oa.gt_frequency[1] =
> +         ((end_reg[0] & GEN9_RPSTAT1_CURR_GT_FREQ_MASK) >>
> +          GEN9_RPSTAT1_CURR_GT_FREQ_SHIFT) * 100ULL / 6ULL;
> +      break;
> +   default:
> +      unreachable("unexpected gen");
> +   }
> +
> +   /* Put the numbers into Hz. */
> +   obj->oa.gt_frequency[0] *= 1000000ULL;
> +   obj->oa.gt_frequency[1] *= 1000000ULL;
> +}
> +
>  static int
>  get_oa_counter_data(struct brw_context *brw,
>                      struct brw_perf_query_object *obj,
> @@ -1344,6 +1388,7 @@ get_oa_counter_data(struct brw_context *brw,
>     int written = 0;
>  
>     if (!obj->oa.results_accumulated) {
> +      read_gt_frequency(brw, obj);
>        accumulate_oa_reports(brw, obj);
>        assert(obj->oa.results_accumulated);
>  
> diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.h b/src/mesa/drivers/dri/i965/brw_performance_query.h
> index f62786f7f1c..f8732738b4e 100644
> --- a/src/mesa/drivers/dri/i965/brw_performance_query.h
> +++ b/src/mesa/drivers/dri/i965/brw_performance_query.h
> @@ -113,6 +113,11 @@ struct brw_perf_query_object
>            * Number of reports accumulated to produce the results.
>            */
>           uint32_t reports_accumulated;
> +
> +         /**
> +          * Frequency of the GT at begin and end of the query.
> +          */
> +         uint64_t gt_frequency[2];
>        } oa;
>  
>        struct {
> 

-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: This is a digitally signed message part.
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20180323/28fec2ad/attachment.sig>


More information about the mesa-dev mailing list