[PATCH 06/17] drm/amdgpu: Add enforce_isolation sysfs attribute
Alex Deucher
alexander.deucher at amd.com
Thu Aug 15 00:04:50 UTC 2024
From: Srinivasan Shanmugam <srinivasan.shanmugam at amd.com>
This commit adds a new sysfs attribute 'enforce_isolation' to control
the 'enforce_isolation' setting per GPU. The attribute can be read and
written, and accepts values 0 (disabled) and 1 (enabled).
When 'enforce_isolation' is enabled, reserved VMIDs are allocated for
each ring. When it's disabled, the reserved VMIDs are freed.
The set function locks a mutex before changing the 'enforce_isolation'
flag and the VMIDs, and unlocks it afterwards. This ensures that these
operations are atomic and prevents race conditions and other concurrency
issues.
Cc: Christian König <christian.koenig at amd.com>
Cc: Alex Deucher <alexander.deucher at amd.com>
Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam at amd.com>
Suggested-by: Alex Deucher <alexander.deucher at amd.com>
Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 101 +++++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 2 +
4 files changed, 107 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index ae3e827da5ec..fac632986866 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1164,6 +1164,8 @@ struct amdgpu_device {
bool debug_enable_ras_aca;
bool enforce_isolation[MAX_XCP];
+ /* Added this mutex for cleaner shader isolation between GFX and compute processes */
+ struct mutex enforce_isolation_mutex;
};
static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 95953c028ca5..a6e714d1fe4d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4064,6 +4064,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
mutex_init(&adev->notifier_lock);
mutex_init(&adev->pm.stable_pstate_ctx_lock);
mutex_init(&adev->benchmark_mutex);
+ /* Initialize the mutex for cleaner shader isolation between GFX and compute processes */
+ mutex_init(&adev->enforce_isolation_mutex);
amdgpu_device_init_apu_flags(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 4ed69fcfe9c1..2e35fc2577f9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -1391,6 +1391,88 @@ static ssize_t amdgpu_gfx_get_available_compute_partition(struct device *dev,
return sysfs_emit(buf, "%s\n", supported_partition);
}
+static ssize_t amdgpu_gfx_get_enforce_isolation(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ int i;
+ ssize_t size = 0;
+
+ if (adev->xcp_mgr) {
+ for (i = 0; i < adev->xcp_mgr->num_xcps; i++) {
+ size += sysfs_emit_at(buf, size, "%u", adev->enforce_isolation[i]);
+ if (i < (adev->xcp_mgr->num_xcps - 1))
+ size += sysfs_emit_at(buf, size, " ");
+ }
+ buf[size++] = '\n';
+ } else {
+ size = sysfs_emit_at(buf, 0, "%u\n", adev->enforce_isolation[0]);
+ }
+
+ return size;
+}
+
+static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ long partition_values[MAX_XCP] = {0};
+ int ret, i, num_partitions;
+ const char *input_buf = buf;
+
+ for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) {
+ ret = sscanf(input_buf, "%ld", &partition_values[i]);
+ if (ret <= 0)
+ break;
+
+ /* Move the pointer to the next value in the string */
+ input_buf = strchr(input_buf, ' ');
+ if (input_buf) {
+ input_buf++;
+ } else {
+ i++;
+ break;
+ }
+ }
+ num_partitions = i;
+
+ if (adev->xcp_mgr && num_partitions != adev->xcp_mgr->num_xcps)
+ return -EINVAL;
+
+ if (!adev->xcp_mgr && num_partitions != 1)
+ return -EINVAL;
+
+ for (i = 0; i < num_partitions; i++) {
+ if (partition_values[i] != 0 && partition_values[i] != 1)
+ return -EINVAL;
+ }
+
+ mutex_lock(&adev->enforce_isolation_mutex);
+
+ for (i = 0; i < num_partitions; i++) {
+ if (adev->enforce_isolation[i] && !partition_values[i]) {
+ /* Going from enabled to disabled */
+ amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(i));
+ } else if (!adev->enforce_isolation[i] && partition_values[i]) {
+ /* Going from disabled to enabled */
+ amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i));
+ }
+ adev->enforce_isolation[i] = partition_values[i];
+ }
+
+ mutex_unlock(&adev->enforce_isolation_mutex);
+
+ return count;
+}
+
+static DEVICE_ATTR(enforce_isolation, 0644,
+ amdgpu_gfx_get_enforce_isolation,
+ amdgpu_gfx_set_enforce_isolation);
+
static DEVICE_ATTR(current_compute_partition, 0644,
amdgpu_gfx_get_current_compute_partition,
amdgpu_gfx_set_compute_partition);
@@ -1417,6 +1499,25 @@ void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev)
device_remove_file(adev->dev, &dev_attr_available_compute_partition);
}
+int amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device *adev)
+{
+ int r;
+
+ if (!amdgpu_sriov_vf(adev)) {
+ r = device_create_file(adev->dev, &dev_attr_enforce_isolation);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev)
+{
+ if (!amdgpu_sriov_vf(adev))
+ device_remove_file(adev->dev, &dev_attr_enforce_isolation);
+}
+
int amdgpu_gfx_cleaner_shader_sw_init(struct amdgpu_device *adev,
unsigned int cleaner_shader_size)
{
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 5ff3ab7d429a..cb83b66aba89 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -559,6 +559,8 @@ void amdgpu_gfx_cleaner_shader_sw_fini(struct amdgpu_device *adev);
void amdgpu_gfx_cleaner_shader_init(struct amdgpu_device *adev,
unsigned int cleaner_shader_size,
const void *cleaner_shader_ptr);
+int amdgpu_gfx_sysfs_isolation_shader_init(struct amdgpu_device *adev);
+void amdgpu_gfx_sysfs_isolation_shader_fini(struct amdgpu_device *adev);
static inline const char *amdgpu_gfx_compute_mode_desc(int mode)
{
--
2.46.0
More information about the amd-gfx
mailing list