[Intel-xe] [PATCH v2 1/2] drm/xe/mmio: Move xe_mmio_wait32() to xe_mmio.c

Lucas De Marchi lucas.demarchi at intel.com
Thu Nov 16 18:12:59 UTC 2023


On Thu, Nov 16, 2023 at 12:39:01PM -0300, Gustavo Sousa wrote:
>This function is big enough, let's move it to a shared compilation unit.
>
>While at it, document it.

14.7k may seem a small gain, but IMO there's no benefit to inline this
function.

	$ ./scripts/bloat-o-meter build64/drivers/gpu/drm/xe/xe.ko{.old,}
	add/remove: 2/0 grow/shrink: 0/58 up/down: 554/-15645 (-15091)
	Function                                     old     new   delta
	xe_mmio_wait32                                 -     538    +538
	__pfx_xe_mmio_wait32                           -      16     +16
	_intel_hdcp_enable                          1390    1327     -63
	xe_guc_reset                                1339    1258     -81
	__xe_guc_upload                             2122    1984    -138
	xe_uc_fw_upload                             2815    2662    -153
	intel_mpllb_enable                          1986    1813    -173
	intel_dp_aux_xfer                           3028    2852    -176
	xe_huc_auth                                 1294    1116    -178
	_intel_hdcp_disable                         1850    1672    -178
	intel_ddi_set_idle_link_train                666     482    -184
	icl_pll_power_enable                         724     537    -187
	icl_pll_enable                               724     537    -187
	chv_dpio_cmn_power_well_enable               884     697    -187
	skl_ddi_pll_enable                          1197    1009    -188
	intel_write_sha_text                         423     234    -189
	i8xx_fbc_deactivate                          575     385    -190
	xe2lpd_pica_power_well_enable                517     323    -194
	intel_mpllb_disable                          924     729    -195
	intel_cx0_bus_reset                          556     360    -196
	xe2lpd_pica_power_well_disable               514     317    -197
	intel_cx0_wait_for_ack                       901     702    -199
	mcr_lock                                     414     213    -201
	gt_reset_worker                             1475    1271    -204
	intel_disable_transcoder                    1690    1485    -205
	wait_for_act_sent                            365     159    -206
	vlv_wait_port_ready                          785     576    -209
	gen11_dsi_enable                            1607    1398    -209
	assert_chv_phy_status                       1255    1045    -210
	intel_psr_work                              1105     894    -211
	_intel_hdcp2_disable                        2323    2112    -211
	hsw_wait_for_power_well_enable               764     552    -212
	gen11_dsi_post_disable                      4765    4551    -214
	intel_psr_wait_exit_locked                   596     380    -216
	chv_enable_pll                              2212    1996    -216
	gen9_wait_for_power_well_fuses               382     164    -218
	skl_set_cdclk                               3455    3235    -220
	intel_snps_phy_wait_for_calibration          405     183    -222
	intel_dp_mst_hdcp_stream_encryption          932     700    -232
	intel_vrr_disable                            926     690    -236
	pcode_mailbox_rw                            1376    1138    -238
	vlv_enable_pll                              1784    1543    -241
	intel_dp_mst_hdcp2_stream_encryption        1169     927    -242
	_intel_hdcp2_enable                         7533    7281    -252
	intel_mtl_pll_disable                       2785    2531    -254
	wait_panel_status                           2347    2090    -257
	gmbus_wait_idle                              929     672    -257
	__intel_cx0_read                            1236     965    -271
	xe_force_wake_get                            999     707    -292
	xe_force_wake_put                            981     676    -305
	intel_pmdemand_check_prev_transaction        520     132    -388
	icl_pll_disable                             1340     944    -396
	xe_guc_mmio_send_recv                       4920    4522    -398
	intel_cx0_powerdown_change_sequence.constprop    1281     877    -404
	__intel_cx0_write                           1564    1104    -460
	intel_psr_wait_for_idle_locked              1304     836    -468
	xe_driver_flr_fini                          1431     885    -546
	_bxt_set_cdclk.isra                         4301    3668    -633
	intel_hdcp_auth                            10233    9523    -710
	intel_mtl_pll_enable                        9114    7766   -1348
	Total: Before=2181322, After=2166231, chg -0.69%

What could make sense if we are worried about the extra function call
would be to inline the first part and let the wait logic in a function
in this compilation unit... because if we are going to spin/sleep, then
the extra call is negligible. However looking at the timeouts being
passed, I think we can just leave it here.

Maybe  add the "add/remove" and "Total" lines to the commit message?


>
>Signed-off-by: Gustavo Sousa <gustavo.sousa at intel.com>
>---
> drivers/gpu/drm/xe/xe_mmio.c | 53 ++++++++++++++++++++++++++++++++++++
> drivers/gpu/drm/xe/xe_mmio.h | 39 ++------------------------
> 2 files changed, 55 insertions(+), 37 deletions(-)
>
>diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
>index d8f9fabf715e..12df698f6764 100644
>--- a/drivers/gpu/drm/xe/xe_mmio.c
>+++ b/drivers/gpu/drm/xe/xe_mmio.c
>@@ -488,3 +488,56 @@ u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg)
> 	return (u64)udw << 32 | ldw;
> }
>
>+/**
>+ * xe_mmio_wait32() - Wait for a register to match the desired masked value
>+ * @gt: MMIO target GT
>+ * @reg: register to read value from
>+ * @mask: mask to be applied to the value read from the register
>+ * @val: desired value after applying the mask
>+ * @timeout_us: time out after this period of time

@timeout_us: time out after this period of time. Wait logic tries to be
smart, applying a exponential backoff until @timeout_us is reached.

besides the nits above, this looks good.

Reviewed-by: Lucas De Marchi <lucas.demarchi at intel.com>

thanks
Lucas De Marchi


>+ * @out_val: if not NULL, points where to store the last unmasked value
>+ * @atomic: needs to be true if calling from an atomic context
>+ *
>+ * This function polls for the desired masked value and returns zero on success
>+ * or -ETIMEDOUT if timed out.
>+ *
>+ * Note that @timeout_us represents the minimum amount of time to wait before
>+ * giving up. The actual time taken by this function can be a little more than
>+ * @timeout_us for different reasons, specially in non-atomic contexts. Thus,
>+ * it is possible that this function succeeds even after @timeout_us has passed.
>+ */
>+int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us,
>+		   u32 *out_val, bool atomic)
>+{
>+	ktime_t cur = ktime_get_raw();
>+	const ktime_t end = ktime_add_us(cur, timeout_us);
>+	int ret = -ETIMEDOUT;
>+	s64 wait = 10;
>+	u32 read;
>+
>+	for (;;) {
>+		read = xe_mmio_read32(gt, reg);
>+		if ((read & mask) == val) {
>+			ret = 0;
>+			break;
>+		}
>+
>+		cur = ktime_get_raw();
>+		if (!ktime_before(cur, end))
>+			break;
>+
>+		if (ktime_after(ktime_add_us(cur, wait), end))
>+			wait = ktime_us_delta(end, cur);
>+
>+		if (atomic)
>+			udelay(wait);
>+		else
>+			usleep_range(wait, wait << 1);
>+		wait <<= 1;
>+	}
>+
>+	if (out_val)
>+		*out_val = read;
>+
>+	return ret;
>+}
>diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h
>index ae09f777d711..51905aff600c 100644
>--- a/drivers/gpu/drm/xe/xe_mmio.h
>+++ b/drivers/gpu/drm/xe/xe_mmio.h
>@@ -87,43 +87,6 @@ static inline int xe_mmio_write32_and_verify(struct xe_gt *gt,
> 	return (reg_val & mask) != eval ? -EINVAL : 0;
> }
>
>-static inline int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask,
>-				 u32 val, u32 timeout_us, u32 *out_val,
>-				 bool atomic)
>-{
>-	ktime_t cur = ktime_get_raw();
>-	const ktime_t end = ktime_add_us(cur, timeout_us);
>-	int ret = -ETIMEDOUT;
>-	s64 wait = 10;
>-	u32 read;
>-
>-	for (;;) {
>-		read = xe_mmio_read32(gt, reg);
>-		if ((read & mask) == val) {
>-			ret = 0;
>-			break;
>-		}
>-
>-		cur = ktime_get_raw();
>-		if (!ktime_before(cur, end))
>-			break;
>-
>-		if (ktime_after(ktime_add_us(cur, wait), end))
>-			wait = ktime_us_delta(end, cur);
>-
>-		if (atomic)
>-			udelay(wait);
>-		else
>-			usleep_range(wait, wait << 1);
>-		wait <<= 1;
>-	}
>-
>-	if (out_val)
>-		*out_val = read;
>-
>-	return ret;
>-}
>-
> int xe_mmio_ioctl(struct drm_device *dev, void *data,
> 		  struct drm_file *file);
>
>@@ -140,5 +103,7 @@ static inline bool xe_mmio_in_range(const struct xe_gt *gt,
> int xe_mmio_probe_vram(struct xe_device *xe);
> int xe_mmio_tile_vram_size(struct xe_tile *tile, u64 *vram_size, u64 *tile_size, u64 *tile_base);
> u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg);
>+int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us,
>+		   u32 *out_val, bool atomic);
>
> #endif
>-- 
>2.42.0
>


More information about the Intel-xe mailing list