[Intel-xe] [PATCH v4 5/5] drm/xe/pm: Init pcode and restore vram on power lost
Rodrigo Vivi
rodrigo.vivi at intel.com
Fri Jul 7 22:00:21 UTC 2023
On Thu, Jul 06, 2023 at 05:32:08PM +0530, Anshuman Gupta wrote:
> Don't init pcode and restore VRAM objects in vain.
> We can rely on primary GT GUC_STATUS to detect whether
> card has really lost power even when d3cold is allowed by xe.
> Adding d3cold.lost_power flag to avoid pcode init and vram
> restoration.
Overall, the approach looks great!
> Also cleaning up the TODO code comment.
>
> Cc: Rodrigo Vivi <rodrigo.vivi at intel.com>
> Signed-off-by: Anshuman Gupta <anshuman.gupta at intel.com>
> ---
> drivers/gpu/drm/xe/xe_device_types.h | 3 +++
> drivers/gpu/drm/xe/xe_guc.c | 17 +++++++++++++++++
> drivers/gpu/drm/xe/xe_guc.h | 1 +
> drivers/gpu/drm/xe/xe_pci.c | 2 --
> drivers/gpu/drm/xe/xe_pm.c | 15 ++++++++++++---
> 5 files changed, 33 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
> index 8e69a6d4627a..cb4298b83373 100644
> --- a/drivers/gpu/drm/xe/xe_device_types.h
> +++ b/drivers/gpu/drm/xe/xe_device_types.h
> @@ -354,6 +354,9 @@ struct xe_device {
> /** @allowed: Indicates if d3cold is a valid device state */
> bool allowed;
>
> + /** @power_lost Indicates if card has really lost power. */
> + bool power_lost;
> +
> /**
> * @vram_threshold is the permissible threshold(in megabytes)
> * for vram save/restore. d3cold will be disallowed,
> diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
> index 8245bbc58770..0c20ffd7db98 100644
> --- a/drivers/gpu/drm/xe/xe_guc.c
> +++ b/drivers/gpu/drm/xe/xe_guc.c
> @@ -844,3 +844,20 @@ void xe_guc_print_info(struct xe_guc *guc, struct drm_printer *p)
> xe_guc_ct_print(&guc->ct, p, false);
> xe_guc_submit_print(guc, p);
> }
> +
> +/* Srtictly to be called from RPM resume handler */
There's a typo here, but could you please make this a kernel
doc instead of the comment here?
> +bool xe_guc_has_lost_power(struct xe_guc *guc)
I wonder if there would be a better name for this function,
but I'm not that good with names, so I don't have anything
better to suggest.
> +{
> + struct xe_gt *gt = guc_to_gt(guc);
> + u32 status;
> + int err;
> +
> + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
> + if (err)
> + return false;
> +
> + status = xe_mmio_read32(gt, GUC_STATUS);
> + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
> +
> + return status & GS_MIA_IN_RESET;
> +}
> diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h
> index 74a74051f354..3e628bbbe69e 100644
> --- a/drivers/gpu/drm/xe/xe_guc.h
> +++ b/drivers/gpu/drm/xe/xe_guc.h
> @@ -35,6 +35,7 @@ void xe_guc_reset_wait(struct xe_guc *guc);
> void xe_guc_stop_prepare(struct xe_guc *guc);
> int xe_guc_stop(struct xe_guc *guc);
> int xe_guc_start(struct xe_guc *guc);
> +bool xe_guc_has_lost_power(struct xe_guc *guc);
>
> static inline u16 xe_engine_class_to_guc_class(enum xe_engine_class class)
> {
> diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
> index 8585b090ff0e..40f03a9d6fb7 100644
> --- a/drivers/gpu/drm/xe/xe_pci.c
> +++ b/drivers/gpu/drm/xe/xe_pci.c
> @@ -837,8 +837,6 @@ static int xe_pci_runtime_idle(struct device *dev)
> * but maybe include some other conditions. So, before
> * we can re-enable the D3cold, we need to:
> * 1. rewrite the VRAM save / restore to avoid buffer object locks
> - * 2. at resume, detect if we really lost power and avoid memory
> - * restoration if we were only up to d3cold
> */
> xe->d3cold.allowed = false;
> }
> diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
> index 74a9bccb78c7..761d0e3e31ad 100644
> --- a/drivers/gpu/drm/xe/xe_pm.c
> +++ b/drivers/gpu/drm/xe/xe_pm.c
> @@ -17,6 +17,7 @@
> #include "xe_display.h"
> #include "xe_ggtt.h"
> #include "xe_gt.h"
> +#include "xe_guc.h"
> #include "xe_irq.h"
> #include "xe_pcode.h"
>
> @@ -193,11 +194,19 @@ int xe_pm_runtime_suspend(struct xe_device *xe)
>
> int xe_pm_runtime_resume(struct xe_device *xe)
> {
> - struct xe_gt *gt;
> + struct xe_gt *gt, *gt0;
I think we should just use the existent 'gt' instead of adding a new variable.
> u8 id;
> int err;
>
> - if (xe->d3cold.allowed) {
> + /*
> + * It can be possible that xe has allowed d3cold but other pcie devices
> + * in gfx card soc would have blocked d3cold, therefore card has not
> + * really lost power. Detecting primary Gt power is sufficient.
> + */
> + gt0 = xe_device_get_gt(xe, 0);
> + xe->d3cold.power_lost = xe_guc_has_lost_power(>0->uc.guc);
> +
> + if (xe->d3cold.allowed && xe->d3cold.power_lost) {
> for_each_gt(gt, xe, id) {
> err = xe_pcode_init(gt);
> if (err)
> @@ -218,7 +227,7 @@ int xe_pm_runtime_resume(struct xe_device *xe)
> for_each_gt(gt, xe, id)
> xe_gt_resume(gt);
>
> - if (xe->d3cold.allowed) {
> + if (xe->d3cold.allowed && xe->d3cold.power_lost) {
> err = xe_bo_restore_user(xe);
> if (err)
> return err;
> --
> 2.38.0
>
More information about the Intel-xe
mailing list