[PATCH v2 2/3] drm/xe/oa: Make OA buffer size configurable
Dixit, Ashutosh
ashutosh.dixit at intel.com
Tue Dec 3 21:24:11 UTC 2024
On Tue, 26 Nov 2024 08:35:52 -0800, Sai Teja Pottumuttu wrote:
>
Hi Sai Teja,
Mostly looks good, I just have a few nit-picks below.
I am thinking that since this patch mostly re-writes the content of Patch
1, and Patch 3 is sort of trivial, we should just squash the 3 patches into
a single patch. That would be the cleanest and least confusing I
think. Unless you have other ideas for splitting the patches, but the
splitting of patches here is not making a lot of sense to me.
Since I authored the first patch, all we need to do is add my Signed-off-by
on the squashed patch, if I merge it that will automatically happen anyway,
so you can skip that too.
> Add a new property called DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE to
> allow OA buffer size to be configurable from userspace.
>
> With this OA buffer size can be configured to any power of 2
> size between 128KB and 128MB and it would default to 16MB in case
> the size is not supplied.
>
> Note that we also set the respective bits in OAG_OABUFFER and
> OAG_OA_DEBUG to enable up to 128MB sizing.
Delete this last paragraph, commit message should just say the why or the
reason for the patch, no need to describe how it is done.
> BSpec: 61100, 61228
> Signed-off-by: Sai Teja Pottumuttu <sai.teja.pottumuttu at intel.com>
> ---
> drivers/gpu/drm/xe/xe_oa.c | 49 +++++++++++++++++++++++++-------
> drivers/gpu/drm/xe/xe_oa_types.h | 2 +-
> include/uapi/drm/xe_drm.h | 7 +++++
> 3 files changed, 47 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
> index 0e1049079905..7e5da7fef6ad 100644
> --- a/drivers/gpu/drm/xe/xe_oa.c
> +++ b/drivers/gpu/drm/xe/xe_oa.c
> @@ -96,6 +96,7 @@ struct xe_oa_open_param {
> struct drm_xe_sync __user *syncs_user;
> int num_syncs;
> struct xe_sync_entry *syncs;
> + size_t oa_buffer_size;
> };
>
> struct xe_oa_config_bo {
> @@ -405,9 +406,17 @@ static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream)
> {
> struct xe_mmio *mmio = &stream->gt->mmio;
> u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
> - /* For 128M OA buffer size, x8 is done in xe_oa_enable_metric_set() */
> - u32 oa_buf = gtt_offset | OABUFFER_SIZE_16M | OAG_OABUFFER_MEMORY_SELECT;
> + u32 oa_buf = gtt_offset | OAG_OABUFFER_MEMORY_SELECT;
> unsigned long flags;
> + int size_exponent = __ffs(stream->oa_buffer.bo->size);
Hmm, there seem to be several other options here, such as ffs(), ilog2(),
order_base_2() etc. But I think __ffs() should be fine, it's used elsewhere
too in xe.
But just a nit: please move this line above the "u32 oa_buf" line (there is
some requirement for a reverse x'mas tree line length in declarations:
basically longest line on top and the shortest at the bottom, maybe pull
down the mmio line to the right place too).
> +
> + /*
> + * If oa buffer size is more than 16MB (exponent greater than 24), the
> + * oa buffer size field is multiplied by 8 in xe_oa_enable_metric_set by
> + * setting OAG_OA_DEBUG_BUFFER_SIZE_SELECT bit in OAG DEBUG register
nit but change last line to "setting OAG_OA_DEBUG_BUF_SIZE_SELECT bit in
oa_debug register".
It is OAG_OA_DEBUG_BUF_SIZE_SELECT not OAG_OA_DEBUG_BUFFER_SIZE_SELECT.
Also this is also applicable to OAM, not just OAG, so let's avoid
mentioning "OAG DEBUG" register.
> + */
> + oa_buf |= REG_FIELD_PREP(OABUFFER_SIZE_MASK,
> + size_exponent > 24 ? size_exponent - 20 : size_exponent - 17);
While at it, get rid of the OABUFFER_SIZE_* #defines in regs/xe_oa_regs.h
too, they will not be used after this patch I think.
>
> spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
>
> @@ -902,14 +911,14 @@ static void xe_oa_stream_destroy(struct xe_oa_stream *stream)
> xe_file_put(stream->xef);
> }
>
> -static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream)
> +static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size)
> {
> struct xe_bo *bo;
>
> BUILD_BUG_ON_NOT_POWER_OF_2(XE_OA_BUFFER_SIZE);
>
> bo = xe_bo_create_pin_map(stream->oa->xe, stream->gt->tile, NULL,
> - XE_OA_BUFFER_SIZE, ttm_bo_type_kernel,
> + size, ttm_bo_type_kernel,
> XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT);
> if (IS_ERR(bo))
> return PTR_ERR(bo);
> @@ -1087,11 +1096,12 @@ static u32 oag_report_ctx_switches(const struct xe_oa_stream *stream)
> 0 : OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS);
> }
>
> -static u32 oag_buf_size_select(void)
> +static u32 oag_buf_size_select(const struct xe_oa_stream *stream)
> {
> BUILD_BUG_ON(XE_OA_BUFFER_SIZE != SZ_16M && XE_OA_BUFFER_SIZE != SZ_128M);
> return _MASKED_FIELD(OAG_OA_DEBUG_BUF_SIZE_SELECT,
> - (XE_OA_BUFFER_SIZE == SZ_128M) ? OAG_OA_DEBUG_BUF_SIZE_SELECT : 0);
> + (stream->oa_buffer.bo->size > SZ_16M) ?
() brackets are not needed here.
> + OAG_OA_DEBUG_BUF_SIZE_SELECT : 0);
> }
>
> static int xe_oa_enable_metric_set(struct xe_oa_stream *stream)
> @@ -1126,7 +1136,7 @@ static int xe_oa_enable_metric_set(struct xe_oa_stream *stream)
> xe_mmio_write32(mmio, __oa_regs(stream)->oa_debug,
> _MASKED_BIT_ENABLE(oa_debug) |
> oag_report_ctx_switches(stream) |
> - oag_buf_size_select() |
> + oag_buf_size_select(stream) |
> oag_configure_mmio_trigger(stream, true));
>
> xe_mmio_write32(mmio, __oa_regs(stream)->oa_ctx_ctrl, stream->periodic ?
> @@ -1268,6 +1278,17 @@ static int xe_oa_set_prop_syncs_user(struct xe_oa *oa, u64 value,
> return 0;
> }
>
> +static int xe_oa_set_prop_oa_buffer_size(struct xe_oa *oa, u64 value,
> + struct xe_oa_open_param *param)
> +{
> + if (!is_power_of_2(value) || value < SZ_128K || value > SZ_128M) {
> + DRM_DEBUG("OA buffer size invalid %llu\n", value);
Use drm_dbg since that's what is used in this file.
> + return -EINVAL;
> + }
> + param->oa_buffer_size = value;
> + return 0;
> +}
> +
> static int xe_oa_set_prop_ret_inval(struct xe_oa *oa, u64 value,
> struct xe_oa_open_param *param)
> {
> @@ -1288,6 +1309,7 @@ static const xe_oa_set_property_fn xe_oa_set_property_funcs_open[] = {
> [DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_no_preempt,
> [DRM_XE_OA_PROPERTY_NUM_SYNCS] = xe_oa_set_prop_num_syncs,
> [DRM_XE_OA_PROPERTY_SYNCS] = xe_oa_set_prop_syncs_user,
> + [DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE] = xe_oa_set_prop_oa_buffer_size,
> };
>
> static const xe_oa_set_property_fn xe_oa_set_property_funcs_config[] = {
> @@ -1302,6 +1324,7 @@ static const xe_oa_set_property_fn xe_oa_set_property_funcs_config[] = {
> [DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_prop_ret_inval,
> [DRM_XE_OA_PROPERTY_NUM_SYNCS] = xe_oa_set_prop_num_syncs,
> [DRM_XE_OA_PROPERTY_SYNCS] = xe_oa_set_prop_syncs_user,
> + [DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE] = xe_oa_set_prop_ret_inval,
> };
>
> static int xe_oa_user_ext_set_property(struct xe_oa *oa, enum xe_oa_user_extn_from from,
> @@ -1791,9 +1814,10 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream,
> if (GRAPHICS_VER(stream->oa->xe) >= 20 &&
> stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG && stream->sample)
> stream->oa_buffer.circ_size =
> - XE_OA_BUFFER_SIZE - XE_OA_BUFFER_SIZE % stream->oa_buffer.format->size;
> + param->oa_buffer_size -
> + param->oa_buffer_size % stream->oa_buffer.format->size;
> else
> - stream->oa_buffer.circ_size = XE_OA_BUFFER_SIZE;
> + stream->oa_buffer.circ_size = param->oa_buffer_size;
>
> if (stream->exec_q && engine_supports_mi_query(stream->hwe)) {
> /* If we don't find the context offset, just return error */
> @@ -1836,7 +1860,7 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream,
> goto err_fw_put;
> }
>
> - ret = xe_oa_alloc_oa_buffer(stream);
> + ret = xe_oa_alloc_oa_buffer(stream, param->oa_buffer_size);
> if (ret)
> goto err_fw_put;
>
> @@ -2133,6 +2157,11 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f
> drm_dbg(&oa->xe->drm, "Using periodic sampling freq %lld Hz\n", oa_freq_hz);
> }
>
> + if (!param.oa_buffer_size) {
> + /* If the oa buffer size is not provided, default to 16MB*/
Delete the comment, we want to avoid unnecessary comments.
> + param.oa_buffer_size = XE_OA_BUFFER_SIZE;
Also, bring the change renaming this to DEFAULT_XE_OA_BUFFER_SIZE into this patch.
> + }
> +
> ret = xe_oa_parse_syncs(oa, ¶m);
> if (ret)
> goto err_exec_q;
> diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h
> index 539c75f57c09..fea9d981e414 100644
> --- a/drivers/gpu/drm/xe/xe_oa_types.h
> +++ b/drivers/gpu/drm/xe/xe_oa_types.h
> @@ -15,7 +15,7 @@
> #include "regs/xe_reg_defs.h"
> #include "xe_hw_engine_types.h"
>
> -#define XE_OA_BUFFER_SIZE SZ_128M
> +#define XE_OA_BUFFER_SIZE SZ_16M
>
> enum xe_oa_report_header {
> HDR_32_BIT = 0,
> diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
> index 4a8a4a63e99c..aeb8a0f0741c 100644
> --- a/include/uapi/drm/xe_drm.h
> +++ b/include/uapi/drm/xe_drm.h
> @@ -1651,6 +1651,13 @@ enum drm_xe_oa_property_id {
> * to the VM bind case.
> */
> DRM_XE_OA_PROPERTY_SYNCS,
> +
> + /**
> + * @DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE: Specifies the size of OA buffer
> + * allocated by the driver in bytes. The default size would be 16MB and the
> + * supported sizes are powers of 2 from 128KB to 128MB.
nit, but slightly reword as follows:
* @DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE: Size of OA buffer to be
* allocated by the driver in bytes. Supported sizes are powers of
* 2 from 128 KiB to 128 MiB. When not specified, a 16 MiB OA
* buffer is allocated by default.
> + */
> + DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE,
> };
Also:
1. Because we are changing the uapi, the patch title needs contain "uapi",
so has to be "drm/xe/oa/uapi: Make OA buffer size configurable"
2. This also needs to be added to 'capabilities' in 'struct
drm_xe_oa_unit'. So lets add DRM_XE_OA_CAPS_OA_BUFFER_SIZE, similar to
DRM_XE_OA_CAPS_SYNCS.
>
> /**
> --
> 2.34.1
>
Thanks.
--
Ashutosh
More information about the Intel-xe
mailing list