[Intel-xe] [PATCH v2 1/2] drm/xe/mmio: Move xe_mmio_wait32() to xe_mmio.c
Lucas De Marchi
lucas.demarchi at intel.com
Thu Nov 16 18:12:59 UTC 2023
On Thu, Nov 16, 2023 at 12:39:01PM -0300, Gustavo Sousa wrote:
>This function is big enough, let's move it to a shared compilation unit.
>
>While at it, document it.
14.7k may seem a small gain, but IMO there's no benefit to inline this
function.
$ ./scripts/bloat-o-meter build64/drivers/gpu/drm/xe/xe.ko{.old,}
add/remove: 2/0 grow/shrink: 0/58 up/down: 554/-15645 (-15091)
Function old new delta
xe_mmio_wait32 - 538 +538
__pfx_xe_mmio_wait32 - 16 +16
_intel_hdcp_enable 1390 1327 -63
xe_guc_reset 1339 1258 -81
__xe_guc_upload 2122 1984 -138
xe_uc_fw_upload 2815 2662 -153
intel_mpllb_enable 1986 1813 -173
intel_dp_aux_xfer 3028 2852 -176
xe_huc_auth 1294 1116 -178
_intel_hdcp_disable 1850 1672 -178
intel_ddi_set_idle_link_train 666 482 -184
icl_pll_power_enable 724 537 -187
icl_pll_enable 724 537 -187
chv_dpio_cmn_power_well_enable 884 697 -187
skl_ddi_pll_enable 1197 1009 -188
intel_write_sha_text 423 234 -189
i8xx_fbc_deactivate 575 385 -190
xe2lpd_pica_power_well_enable 517 323 -194
intel_mpllb_disable 924 729 -195
intel_cx0_bus_reset 556 360 -196
xe2lpd_pica_power_well_disable 514 317 -197
intel_cx0_wait_for_ack 901 702 -199
mcr_lock 414 213 -201
gt_reset_worker 1475 1271 -204
intel_disable_transcoder 1690 1485 -205
wait_for_act_sent 365 159 -206
vlv_wait_port_ready 785 576 -209
gen11_dsi_enable 1607 1398 -209
assert_chv_phy_status 1255 1045 -210
intel_psr_work 1105 894 -211
_intel_hdcp2_disable 2323 2112 -211
hsw_wait_for_power_well_enable 764 552 -212
gen11_dsi_post_disable 4765 4551 -214
intel_psr_wait_exit_locked 596 380 -216
chv_enable_pll 2212 1996 -216
gen9_wait_for_power_well_fuses 382 164 -218
skl_set_cdclk 3455 3235 -220
intel_snps_phy_wait_for_calibration 405 183 -222
intel_dp_mst_hdcp_stream_encryption 932 700 -232
intel_vrr_disable 926 690 -236
pcode_mailbox_rw 1376 1138 -238
vlv_enable_pll 1784 1543 -241
intel_dp_mst_hdcp2_stream_encryption 1169 927 -242
_intel_hdcp2_enable 7533 7281 -252
intel_mtl_pll_disable 2785 2531 -254
wait_panel_status 2347 2090 -257
gmbus_wait_idle 929 672 -257
__intel_cx0_read 1236 965 -271
xe_force_wake_get 999 707 -292
xe_force_wake_put 981 676 -305
intel_pmdemand_check_prev_transaction 520 132 -388
icl_pll_disable 1340 944 -396
xe_guc_mmio_send_recv 4920 4522 -398
intel_cx0_powerdown_change_sequence.constprop 1281 877 -404
__intel_cx0_write 1564 1104 -460
intel_psr_wait_for_idle_locked 1304 836 -468
xe_driver_flr_fini 1431 885 -546
_bxt_set_cdclk.isra 4301 3668 -633
intel_hdcp_auth 10233 9523 -710
intel_mtl_pll_enable 9114 7766 -1348
Total: Before=2181322, After=2166231, chg -0.69%
What could make sense if we are worried about the extra function call
would be to inline the first part and let the wait logic in a function
in this compilation unit... because if we are going to spin/sleep, then
the extra call is negligible. However looking at the timeouts being
passed, I think we can just leave it here.
Maybe add the "add/remove" and "Total" lines to the commit message?
>
>Signed-off-by: Gustavo Sousa <gustavo.sousa at intel.com>
>---
> drivers/gpu/drm/xe/xe_mmio.c | 53 ++++++++++++++++++++++++++++++++++++
> drivers/gpu/drm/xe/xe_mmio.h | 39 ++------------------------
> 2 files changed, 55 insertions(+), 37 deletions(-)
>
>diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
>index d8f9fabf715e..12df698f6764 100644
>--- a/drivers/gpu/drm/xe/xe_mmio.c
>+++ b/drivers/gpu/drm/xe/xe_mmio.c
>@@ -488,3 +488,56 @@ u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg)
> return (u64)udw << 32 | ldw;
> }
>
>+/**
>+ * xe_mmio_wait32() - Wait for a register to match the desired masked value
>+ * @gt: MMIO target GT
>+ * @reg: register to read value from
>+ * @mask: mask to be applied to the value read from the register
>+ * @val: desired value after applying the mask
>+ * @timeout_us: time out after this period of time
@timeout_us: time out after this period of time. Wait logic tries to be
smart, applying a exponential backoff until @timeout_us is reached.
besides the nits above, this looks good.
Reviewed-by: Lucas De Marchi <lucas.demarchi at intel.com>
thanks
Lucas De Marchi
>+ * @out_val: if not NULL, points where to store the last unmasked value
>+ * @atomic: needs to be true if calling from an atomic context
>+ *
>+ * This function polls for the desired masked value and returns zero on success
>+ * or -ETIMEDOUT if timed out.
>+ *
>+ * Note that @timeout_us represents the minimum amount of time to wait before
>+ * giving up. The actual time taken by this function can be a little more than
>+ * @timeout_us for different reasons, specially in non-atomic contexts. Thus,
>+ * it is possible that this function succeeds even after @timeout_us has passed.
>+ */
>+int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us,
>+ u32 *out_val, bool atomic)
>+{
>+ ktime_t cur = ktime_get_raw();
>+ const ktime_t end = ktime_add_us(cur, timeout_us);
>+ int ret = -ETIMEDOUT;
>+ s64 wait = 10;
>+ u32 read;
>+
>+ for (;;) {
>+ read = xe_mmio_read32(gt, reg);
>+ if ((read & mask) == val) {
>+ ret = 0;
>+ break;
>+ }
>+
>+ cur = ktime_get_raw();
>+ if (!ktime_before(cur, end))
>+ break;
>+
>+ if (ktime_after(ktime_add_us(cur, wait), end))
>+ wait = ktime_us_delta(end, cur);
>+
>+ if (atomic)
>+ udelay(wait);
>+ else
>+ usleep_range(wait, wait << 1);
>+ wait <<= 1;
>+ }
>+
>+ if (out_val)
>+ *out_val = read;
>+
>+ return ret;
>+}
>diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h
>index ae09f777d711..51905aff600c 100644
>--- a/drivers/gpu/drm/xe/xe_mmio.h
>+++ b/drivers/gpu/drm/xe/xe_mmio.h
>@@ -87,43 +87,6 @@ static inline int xe_mmio_write32_and_verify(struct xe_gt *gt,
> return (reg_val & mask) != eval ? -EINVAL : 0;
> }
>
>-static inline int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask,
>- u32 val, u32 timeout_us, u32 *out_val,
>- bool atomic)
>-{
>- ktime_t cur = ktime_get_raw();
>- const ktime_t end = ktime_add_us(cur, timeout_us);
>- int ret = -ETIMEDOUT;
>- s64 wait = 10;
>- u32 read;
>-
>- for (;;) {
>- read = xe_mmio_read32(gt, reg);
>- if ((read & mask) == val) {
>- ret = 0;
>- break;
>- }
>-
>- cur = ktime_get_raw();
>- if (!ktime_before(cur, end))
>- break;
>-
>- if (ktime_after(ktime_add_us(cur, wait), end))
>- wait = ktime_us_delta(end, cur);
>-
>- if (atomic)
>- udelay(wait);
>- else
>- usleep_range(wait, wait << 1);
>- wait <<= 1;
>- }
>-
>- if (out_val)
>- *out_val = read;
>-
>- return ret;
>-}
>-
> int xe_mmio_ioctl(struct drm_device *dev, void *data,
> struct drm_file *file);
>
>@@ -140,5 +103,7 @@ static inline bool xe_mmio_in_range(const struct xe_gt *gt,
> int xe_mmio_probe_vram(struct xe_device *xe);
> int xe_mmio_tile_vram_size(struct xe_tile *tile, u64 *vram_size, u64 *tile_size, u64 *tile_base);
> u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg);
>+int xe_mmio_wait32(struct xe_gt *gt, struct xe_reg reg, u32 mask, u32 val, u32 timeout_us,
>+ u32 *out_val, bool atomic);
>
> #endif
>--
>2.42.0
>
More information about the Intel-xe
mailing list