[Mesa-dev] [PATCH v2 2/5] i965: perf: snapshot RPSTAT register
Kenneth Graunke
kenneth at whitecape.org
Tue Apr 17 06:03:53 UTC 2018
On Tuesday, April 3, 2018 7:48:10 AM PDT Lionel Landwerlin wrote:
> This register contains the current/previous frequency of the GT, it's
> one of the value GPA would like to have as part of their queries.
>
> v2: Don't use this register on baytrail/cherryview (Ken)
> Use GET_FIELD() macro (Ken)
>
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
> ---
> src/mesa/drivers/dri/i965/brw_defines.h | 12 ++++++
> src/mesa/drivers/dri/i965/brw_performance_query.c | 50 +++++++++++++++++++++++
> src/mesa/drivers/dri/i965/brw_performance_query.h | 5 +++
> 3 files changed, 67 insertions(+)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
> index 8bf6f68b67c..855f1c7d744 100644
> --- a/src/mesa/drivers/dri/i965/brw_defines.h
> +++ b/src/mesa/drivers/dri/i965/brw_defines.h
> @@ -1656,6 +1656,18 @@ enum brw_pixel_shader_coverage_mask_mode {
> #define CS_DEBUG_MODE2 0x20d8 /* Gen9+ */
> # define CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE (1 << 4)
>
> +#define GEN7_RPSTAT1 0xA01C
> +#define GEN7_RPSTAT1_CURR_GT_FREQ_SHIFT 7
> +#define GEN7_RPSTAT1_CURR_GT_FREQ_MASK INTEL_MASK(13, 7)
> +#define GEN7_RPSTAT1_PREV_GT_FREQ_SHIFT 0
> +#define GEN7_RPSTAT1_PREV_GT_FREQ_MASK INTEL_MASK(6, 0)
> +
> +#define GEN9_RPSTAT0 0xA01C
> +#define GEN9_RPSTAT0_CURR_GT_FREQ_SHIFT 23
> +#define GEN9_RPSTAT0_CURR_GT_FREQ_MASK INTEL_MASK(31, 23)
> +#define GEN9_RPSTAT0_PREV_GT_FREQ_SHIFT 0
> +#define GEN9_RPSTAT0_PREV_GT_FREQ_MASK INTEL_MASK(8, 0)
> +
> #define SLICE_COMMON_ECO_CHICKEN1 0x731c /* Gen9+ */
> # define GLK_SCEC_BARRIER_MODE_GPGPU (0 << 7)
> # define GLK_SCEC_BARRIER_MODE_3D_HULL (1 << 7)
> diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c b/src/mesa/drivers/dri/i965/brw_performance_query.c
> index 44cac85c6e6..32cf96a333d 100644
> --- a/src/mesa/drivers/dri/i965/brw_performance_query.c
> +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
> @@ -216,6 +216,8 @@ brw_perf_query(struct gl_perf_query_object *o)
>
> #define MI_RPC_BO_SIZE 4096
> #define MI_RPC_BO_END_OFFSET_BYTES (MI_RPC_BO_SIZE / 2)
> +#define MI_FREQ_START_OFFSET_BYTES (3072)
> +#define MI_FREQ_END_OFFSET_BYTES (3076)
>
> /******************************************************************************/
>
> @@ -946,6 +948,21 @@ close_perf(struct brw_context *brw)
> }
> }
>
> +static void
> +capture_frequency_stat_register(struct brw_context *brw,
> + struct brw_bo *bo,
> + uint32_t bo_offset)
> +{
> + const struct gen_device_info *devinfo = &brw->screen->devinfo;
> +
> + if (devinfo->gen >= 7 && devinfo->gen <= 8 &&
> + !devinfo->is_baytrail && !devinfo->is_cherryview) {
> + brw_store_register_mem32(brw, bo, GEN7_RPSTAT1, bo_offset);
> + } else if (devinfo->gen >= 9) {
> + brw_store_register_mem32(brw, bo, GEN9_RPSTAT0, bo_offset);
> + }
> +}
> +
> /**
> * Driver hook for glBeginPerfQueryINTEL().
> */
> @@ -1138,6 +1155,8 @@ brw_begin_perf_query(struct gl_context *ctx,
> /* Take a starting OA counter snapshot. */
> brw->vtbl.emit_mi_report_perf_count(brw, obj->oa.bo, 0,
> obj->oa.begin_report_id);
> + capture_frequency_stat_register(brw, obj->oa.bo, MI_FREQ_START_OFFSET_BYTES);
> +
> ++brw->perfquery.n_active_oa_queries;
>
> /* No already-buffered samples can possibly be associated with this query
> @@ -1221,6 +1240,7 @@ brw_end_perf_query(struct gl_context *ctx,
> */
> if (!obj->oa.results_accumulated) {
> /* Take an ending OA counter snapshot. */
> + capture_frequency_stat_register(brw, obj->oa.bo, MI_FREQ_END_OFFSET_BYTES);
> brw->vtbl.emit_mi_report_perf_count(brw, obj->oa.bo,
> MI_RPC_BO_END_OFFSET_BYTES,
> obj->oa.begin_report_id + 1);
> @@ -1321,6 +1341,35 @@ brw_is_perf_query_ready(struct gl_context *ctx,
> return false;
> }
>
> +static void
> +read_gt_frequency(struct brw_context *brw,
> + struct brw_perf_query_object *obj)
> +{
> + const struct gen_device_info *devinfo = &brw->screen->devinfo;
> + uint32_t start = *((uint32_t *)(obj->oa.map + MI_FREQ_START_OFFSET_BYTES)),
> + end = *((uint32_t *)(obj->oa.map + MI_FREQ_END_OFFSET_BYTES));
> +
> + switch (devinfo->gen) {
> + case 7:
> + case 8:
> + obj->oa.gt_frequency[0] = GET_FIELD(start, GEN7_RPSTAT1_CURR_GT_FREQ) * 50ULL;
> + obj->oa.gt_frequency[1] = GET_FIELD(end, GEN7_RPSTAT1_CURR_GT_FREQ) * 50ULL;
> + break;
> + case 9:
> + case 10:
> + case 11:
> + obj->oa.gt_frequency[0] = GET_FIELD(start, GEN9_RPSTAT0_CURR_GT_FREQ) * 50ULL / 3ULL;
> + obj->oa.gt_frequency[1] = GET_FIELD(end, GEN9_RPSTAT0_CURR_GT_FREQ) * 50ULL / 3ULL;
Thanks for the pointers to the other kernel code in your reply to v1.
This looks right. One thing I noticed is that the kernel rounds to the
closest, while this will truncate, but I don't think that's too crucial.
Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: This is a digitally signed message part.
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20180416/fbef2693/attachment-0001.sig>
More information about the mesa-dev
mailing list