Move the multi-lrc guc_id from the lower allocation partition (0 to number of multi-lrc guc_ids) to upper allocation partition (number of single-lrc to max guc_ids).
Signed-off-by: Matthew Brost matthew.brost@intel.com --- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 57 ++++++++++++++----- 1 file changed, 42 insertions(+), 15 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 9989d121127df..1bacc9621cea8 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -147,6 +147,8 @@ guc_create_parallel(struct intel_engine_cs **engines, */ #define NUMBER_MULTI_LRC_GUC_ID(guc) \ ((guc)->submission_state.num_guc_ids / 16) +#define NUMBER_SINGLE_LRC_GUC_ID(guc) \ + ((guc)->submission_state.num_guc_ids - NUMBER_MULTI_LRC_GUC_ID(guc))
/* * Below is a set of functions which control the GuC scheduling state which @@ -1776,11 +1778,6 @@ int intel_guc_submission_init(struct intel_guc *guc) INIT_WORK(&guc->submission_state.destroyed_worker, destroyed_worker_func);
- guc->submission_state.guc_ids_bitmap = - bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL); - if (!guc->submission_state.guc_ids_bitmap) - return -ENOMEM; - spin_lock_init(&guc->timestamp.lock); INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping); guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ; @@ -1796,7 +1793,8 @@ void intel_guc_submission_fini(struct intel_guc *guc) guc_flush_destroyed_contexts(guc); guc_lrc_desc_pool_destroy(guc); i915_sched_engine_put(guc->sched_engine); - bitmap_free(guc->submission_state.guc_ids_bitmap); + if (guc->submission_state.guc_ids_bitmap) + bitmap_free(guc->submission_state.guc_ids_bitmap); }
static inline void queue_request(struct i915_sched_engine *sched_engine, @@ -1863,6 +1861,33 @@ static void guc_submit_request(struct i915_request *rq) spin_unlock_irqrestore(&sched_engine->lock, flags); }
+static int new_mlrc_guc_id(struct intel_guc *guc, struct intel_context *ce) +{ + int ret; + + GEM_BUG_ON(!intel_context_is_parent(ce)); + GEM_BUG_ON(!guc->submission_state.guc_ids_bitmap); + + ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap, + NUMBER_MULTI_LRC_GUC_ID(guc), + order_base_2(ce->parallel.number_children + + 1)); + if (likely(!(ret < 0))) + ret += NUMBER_SINGLE_LRC_GUC_ID(guc); + + return ret; +} + +static int new_slrc_guc_id(struct intel_guc *guc, struct intel_context *ce) +{ + GEM_BUG_ON(intel_context_is_parent(ce)); + + return ida_simple_get(&guc->submission_state.guc_ids, + 0, NUMBER_SINGLE_LRC_GUC_ID(guc), + GFP_KERNEL | __GFP_RETRY_MAYFAIL | + __GFP_NOWARN); +} + static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) { int ret; @@ -1870,16 +1895,10 @@ static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) GEM_BUG_ON(intel_context_is_child(ce));
if (intel_context_is_parent(ce)) - ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap, - NUMBER_MULTI_LRC_GUC_ID(guc), - order_base_2(ce->parallel.number_children - + 1)); + ret = new_mlrc_guc_id(guc, ce); else - ret = ida_simple_get(&guc->submission_state.guc_ids, - NUMBER_MULTI_LRC_GUC_ID(guc), - guc->submission_state.num_guc_ids, - GFP_KERNEL | __GFP_RETRY_MAYFAIL | - __GFP_NOWARN); + ret = new_slrc_guc_id(guc, ce); + if (unlikely(ret < 0)) return ret;
@@ -1989,6 +2008,14 @@ static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce)
GEM_BUG_ON(atomic_read(&ce->guc_id.ref));
+ if (unlikely(intel_context_is_parent(ce) && + !guc->submission_state.guc_ids_bitmap)) { + guc->submission_state.guc_ids_bitmap = + bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL); + if (!guc->submission_state.guc_ids_bitmap) + return -ENOMEM; + } + try_again: spin_lock_irqsave(&guc->submission_state.lock, flags);
On 11/01/2022 16:30, Matthew Brost wrote:
Move the multi-lrc guc_id from the lower allocation partition (0 to number of multi-lrc guc_ids) to upper allocation partition (number of single-lrc to max guc_ids).
Just a reminder that best practice for commit messages is to include the "why" as well.
Regards,
Tvrtko
Signed-off-by: Matthew Brost matthew.brost@intel.com
.../gpu/drm/i915/gt/uc/intel_guc_submission.c | 57 ++++++++++++++----- 1 file changed, 42 insertions(+), 15 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 9989d121127df..1bacc9621cea8 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -147,6 +147,8 @@ guc_create_parallel(struct intel_engine_cs **engines, */ #define NUMBER_MULTI_LRC_GUC_ID(guc) \ ((guc)->submission_state.num_guc_ids / 16) +#define NUMBER_SINGLE_LRC_GUC_ID(guc) \
((guc)->submission_state.num_guc_ids - NUMBER_MULTI_LRC_GUC_ID(guc))
/*
- Below is a set of functions which control the GuC scheduling state which
@@ -1776,11 +1778,6 @@ int intel_guc_submission_init(struct intel_guc *guc) INIT_WORK(&guc->submission_state.destroyed_worker, destroyed_worker_func);
- guc->submission_state.guc_ids_bitmap =
bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
- if (!guc->submission_state.guc_ids_bitmap)
return -ENOMEM;
- spin_lock_init(&guc->timestamp.lock); INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping); guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ;
@@ -1796,7 +1793,8 @@ void intel_guc_submission_fini(struct intel_guc *guc) guc_flush_destroyed_contexts(guc); guc_lrc_desc_pool_destroy(guc); i915_sched_engine_put(guc->sched_engine);
- bitmap_free(guc->submission_state.guc_ids_bitmap);
if (guc->submission_state.guc_ids_bitmap)
bitmap_free(guc->submission_state.guc_ids_bitmap);
}
static inline void queue_request(struct i915_sched_engine *sched_engine,
@@ -1863,6 +1861,33 @@ static void guc_submit_request(struct i915_request *rq) spin_unlock_irqrestore(&sched_engine->lock, flags); }
+static int new_mlrc_guc_id(struct intel_guc *guc, struct intel_context *ce) +{
- int ret;
- GEM_BUG_ON(!intel_context_is_parent(ce));
- GEM_BUG_ON(!guc->submission_state.guc_ids_bitmap);
- ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
NUMBER_MULTI_LRC_GUC_ID(guc),
order_base_2(ce->parallel.number_children
+ 1));
- if (likely(!(ret < 0)))
ret += NUMBER_SINGLE_LRC_GUC_ID(guc);
- return ret;
+}
+static int new_slrc_guc_id(struct intel_guc *guc, struct intel_context *ce) +{
- GEM_BUG_ON(intel_context_is_parent(ce));
- return ida_simple_get(&guc->submission_state.guc_ids,
0, NUMBER_SINGLE_LRC_GUC_ID(guc),
GFP_KERNEL | __GFP_RETRY_MAYFAIL |
__GFP_NOWARN);
+}
- static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) { int ret;
@@ -1870,16 +1895,10 @@ static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) GEM_BUG_ON(intel_context_is_child(ce));
if (intel_context_is_parent(ce))
ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
NUMBER_MULTI_LRC_GUC_ID(guc),
order_base_2(ce->parallel.number_children
+ 1));
elseret = new_mlrc_guc_id(guc, ce);
ret = ida_simple_get(&guc->submission_state.guc_ids,
NUMBER_MULTI_LRC_GUC_ID(guc),
guc->submission_state.num_guc_ids,
GFP_KERNEL | __GFP_RETRY_MAYFAIL |
__GFP_NOWARN);
ret = new_slrc_guc_id(guc, ce);
- if (unlikely(ret < 0)) return ret;
@@ -1989,6 +2008,14 @@ static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce)
GEM_BUG_ON(atomic_read(&ce->guc_id.ref));
- if (unlikely(intel_context_is_parent(ce) &&
!guc->submission_state.guc_ids_bitmap)) {
guc->submission_state.guc_ids_bitmap =
bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
if (!guc->submission_state.guc_ids_bitmap)
return -ENOMEM;
- }
- try_again: spin_lock_irqsave(&guc->submission_state.lock, flags);
Tvrtko Ursulin tvrtko.ursulin@linux.intel.com wrote on śro [2022-sty-12 08:54:19 +0000]:
On 11/01/2022 16:30, Matthew Brost wrote:
Move the multi-lrc guc_id from the lower allocation partition (0 to number of multi-lrc guc_ids) to upper allocation partition (number of single-lrc to max guc_ids).
Just a reminder that best practice for commit messages is to include the "why" as well.
Regards,
Tvrtko
In my opinion this patch is good step forward. Lazy allocation of the bitmap for MLRC and moving the MLRC pool to the end will allow easier development contexts for SR-IOV. Introduction of two new helpers (new_mlrc_guc_id and new_slrc_guc_id) cleans up the code.
I agree with Tvrtko's comment that you should expand your commit message.
One thing I personally don't like is this NUMBER_SINGLE_LRC_GUC_ID definition (same for MLRC) In my opinion it should be inline function and this value 1/16 defined as constant
- Piotr
Signed-off-by: Matthew Brost matthew.brost@intel.com
.../gpu/drm/i915/gt/uc/intel_guc_submission.c | 57 ++++++++++++++----- 1 file changed, 42 insertions(+), 15 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 9989d121127df..1bacc9621cea8 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -147,6 +147,8 @@ guc_create_parallel(struct intel_engine_cs **engines, */ #define NUMBER_MULTI_LRC_GUC_ID(guc) \ ((guc)->submission_state.num_guc_ids / 16) +#define NUMBER_SINGLE_LRC_GUC_ID(guc) \
- ((guc)->submission_state.num_guc_ids - NUMBER_MULTI_LRC_GUC_ID(guc)) /*
- Below is a set of functions which control the GuC scheduling state which
@@ -1776,11 +1778,6 @@ int intel_guc_submission_init(struct intel_guc *guc) INIT_WORK(&guc->submission_state.destroyed_worker, destroyed_worker_func);
- guc->submission_state.guc_ids_bitmap =
bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
- if (!guc->submission_state.guc_ids_bitmap)
return -ENOMEM;
- spin_lock_init(&guc->timestamp.lock); INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping); guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ;
@@ -1796,7 +1793,8 @@ void intel_guc_submission_fini(struct intel_guc *guc) guc_flush_destroyed_contexts(guc); guc_lrc_desc_pool_destroy(guc); i915_sched_engine_put(guc->sched_engine);
- bitmap_free(guc->submission_state.guc_ids_bitmap);
- if (guc->submission_state.guc_ids_bitmap)
} static inline void queue_request(struct i915_sched_engine *sched_engine,bitmap_free(guc->submission_state.guc_ids_bitmap);
@@ -1863,6 +1861,33 @@ static void guc_submit_request(struct i915_request *rq) spin_unlock_irqrestore(&sched_engine->lock, flags); } +static int new_mlrc_guc_id(struct intel_guc *guc, struct intel_context *ce) +{
- int ret;
- GEM_BUG_ON(!intel_context_is_parent(ce));
- GEM_BUG_ON(!guc->submission_state.guc_ids_bitmap);
- ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
NUMBER_MULTI_LRC_GUC_ID(guc),
order_base_2(ce->parallel.number_children
+ 1));
- if (likely(!(ret < 0)))
ret += NUMBER_SINGLE_LRC_GUC_ID(guc);
- return ret;
+}
+static int new_slrc_guc_id(struct intel_guc *guc, struct intel_context *ce) +{
- GEM_BUG_ON(intel_context_is_parent(ce));
- return ida_simple_get(&guc->submission_state.guc_ids,
0, NUMBER_SINGLE_LRC_GUC_ID(guc),
GFP_KERNEL | __GFP_RETRY_MAYFAIL |
__GFP_NOWARN);
+}
- static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) { int ret;
@@ -1870,16 +1895,10 @@ static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) GEM_BUG_ON(intel_context_is_child(ce)); if (intel_context_is_parent(ce))
ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
NUMBER_MULTI_LRC_GUC_ID(guc),
order_base_2(ce->parallel.number_children
+ 1));
elseret = new_mlrc_guc_id(guc, ce);
ret = ida_simple_get(&guc->submission_state.guc_ids,
NUMBER_MULTI_LRC_GUC_ID(guc),
guc->submission_state.num_guc_ids,
GFP_KERNEL | __GFP_RETRY_MAYFAIL |
__GFP_NOWARN);
ret = new_slrc_guc_id(guc, ce);
- if (unlikely(ret < 0)) return ret;
@@ -1989,6 +2008,14 @@ static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce) GEM_BUG_ON(atomic_read(&ce->guc_id.ref));
- if (unlikely(intel_context_is_parent(ce) &&
!guc->submission_state.guc_ids_bitmap)) {
guc->submission_state.guc_ids_bitmap =
bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
if (!guc->submission_state.guc_ids_bitmap)
return -ENOMEM;
- }
- try_again: spin_lock_irqsave(&guc->submission_state.lock, flags);
--
On Wed, Jan 12, 2022 at 06:09:06PM +0100, Piotr Piórkowski wrote:
Tvrtko Ursulin tvrtko.ursulin@linux.intel.com wrote on śro [2022-sty-12 08:54:19 +0000]:
On 11/01/2022 16:30, Matthew Brost wrote:
Move the multi-lrc guc_id from the lower allocation partition (0 to number of multi-lrc guc_ids) to upper allocation partition (number of single-lrc to max guc_ids).
Just a reminder that best practice for commit messages is to include the "why" as well.
Regards,
Tvrtko
In my opinion this patch is good step forward. Lazy allocation of the bitmap for MLRC and moving the MLRC pool to the end will allow easier development contexts for SR-IOV. Introduction of two new helpers (new_mlrc_guc_id and new_slrc_guc_id) cleans up the code.
I agree with Tvrtko's comment that you should expand your commit message.
Agree. Didn't know if I could talk about SR-IOV publicly but clearly can so add an explaination in the next rev.
One thing I personally don't like is this NUMBER_SINGLE_LRC_GUC_ID definition (same for MLRC) In my opinion it should be inline function and this value 1/16 defined as constant
Agree. I'll move these to functions in next rev.
Matt
- Piotr
Signed-off-by: Matthew Brost matthew.brost@intel.com
.../gpu/drm/i915/gt/uc/intel_guc_submission.c | 57 ++++++++++++++----- 1 file changed, 42 insertions(+), 15 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 9989d121127df..1bacc9621cea8 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -147,6 +147,8 @@ guc_create_parallel(struct intel_engine_cs **engines, */ #define NUMBER_MULTI_LRC_GUC_ID(guc) \ ((guc)->submission_state.num_guc_ids / 16) +#define NUMBER_SINGLE_LRC_GUC_ID(guc) \
- ((guc)->submission_state.num_guc_ids - NUMBER_MULTI_LRC_GUC_ID(guc)) /*
- Below is a set of functions which control the GuC scheduling state which
@@ -1776,11 +1778,6 @@ int intel_guc_submission_init(struct intel_guc *guc) INIT_WORK(&guc->submission_state.destroyed_worker, destroyed_worker_func);
- guc->submission_state.guc_ids_bitmap =
bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
- if (!guc->submission_state.guc_ids_bitmap)
return -ENOMEM;
- spin_lock_init(&guc->timestamp.lock); INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping); guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ;
@@ -1796,7 +1793,8 @@ void intel_guc_submission_fini(struct intel_guc *guc) guc_flush_destroyed_contexts(guc); guc_lrc_desc_pool_destroy(guc); i915_sched_engine_put(guc->sched_engine);
- bitmap_free(guc->submission_state.guc_ids_bitmap);
- if (guc->submission_state.guc_ids_bitmap)
} static inline void queue_request(struct i915_sched_engine *sched_engine,bitmap_free(guc->submission_state.guc_ids_bitmap);
@@ -1863,6 +1861,33 @@ static void guc_submit_request(struct i915_request *rq) spin_unlock_irqrestore(&sched_engine->lock, flags); } +static int new_mlrc_guc_id(struct intel_guc *guc, struct intel_context *ce) +{
- int ret;
- GEM_BUG_ON(!intel_context_is_parent(ce));
- GEM_BUG_ON(!guc->submission_state.guc_ids_bitmap);
- ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
NUMBER_MULTI_LRC_GUC_ID(guc),
order_base_2(ce->parallel.number_children
+ 1));
- if (likely(!(ret < 0)))
ret += NUMBER_SINGLE_LRC_GUC_ID(guc);
- return ret;
+}
+static int new_slrc_guc_id(struct intel_guc *guc, struct intel_context *ce) +{
- GEM_BUG_ON(intel_context_is_parent(ce));
- return ida_simple_get(&guc->submission_state.guc_ids,
0, NUMBER_SINGLE_LRC_GUC_ID(guc),
GFP_KERNEL | __GFP_RETRY_MAYFAIL |
__GFP_NOWARN);
+}
- static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) { int ret;
@@ -1870,16 +1895,10 @@ static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) GEM_BUG_ON(intel_context_is_child(ce)); if (intel_context_is_parent(ce))
ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
NUMBER_MULTI_LRC_GUC_ID(guc),
order_base_2(ce->parallel.number_children
+ 1));
elseret = new_mlrc_guc_id(guc, ce);
ret = ida_simple_get(&guc->submission_state.guc_ids,
NUMBER_MULTI_LRC_GUC_ID(guc),
guc->submission_state.num_guc_ids,
GFP_KERNEL | __GFP_RETRY_MAYFAIL |
__GFP_NOWARN);
ret = new_slrc_guc_id(guc, ce);
- if (unlikely(ret < 0)) return ret;
@@ -1989,6 +2008,14 @@ static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce) GEM_BUG_ON(atomic_read(&ce->guc_id.ref));
- if (unlikely(intel_context_is_parent(ce) &&
!guc->submission_state.guc_ids_bitmap)) {
guc->submission_state.guc_ids_bitmap =
bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
if (!guc->submission_state.guc_ids_bitmap)
return -ENOMEM;
- }
- try_again: spin_lock_irqsave(&guc->submission_state.lock, flags);
--
On 11.01.2022 17:30, Matthew Brost wrote:
Move the multi-lrc guc_id from the lower allocation partition (0 to number of multi-lrc guc_ids) to upper allocation partition (number of single-lrc to max guc_ids).
Signed-off-by: Matthew Brost matthew.brost@intel.com
.../gpu/drm/i915/gt/uc/intel_guc_submission.c | 57 ++++++++++++++----- 1 file changed, 42 insertions(+), 15 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 9989d121127df..1bacc9621cea8 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -147,6 +147,8 @@ guc_create_parallel(struct intel_engine_cs **engines, */ #define NUMBER_MULTI_LRC_GUC_ID(guc) \ ((guc)->submission_state.num_guc_ids / 16) +#define NUMBER_SINGLE_LRC_GUC_ID(guc) \
- ((guc)->submission_state.num_guc_ids - NUMBER_MULTI_LRC_GUC_ID(guc))
above two will likely look better if converted into inline functions, or even better if we explicitly store slrc/mlrc upper/lower id limits under guc submission state
/*
- Below is a set of functions which control the GuC scheduling state which
@@ -1776,11 +1778,6 @@ int intel_guc_submission_init(struct intel_guc *guc) INIT_WORK(&guc->submission_state.destroyed_worker, destroyed_worker_func);
- guc->submission_state.guc_ids_bitmap =
bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
- if (!guc->submission_state.guc_ids_bitmap)
return -ENOMEM;
- spin_lock_init(&guc->timestamp.lock); INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping); guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ;
@@ -1796,7 +1793,8 @@ void intel_guc_submission_fini(struct intel_guc *guc) guc_flush_destroyed_contexts(guc); guc_lrc_desc_pool_destroy(guc); i915_sched_engine_put(guc->sched_engine);
- bitmap_free(guc->submission_state.guc_ids_bitmap);
- if (guc->submission_state.guc_ids_bitmap)
bitmap_free(guc->submission_state.guc_ids_bitmap);
it should be fine to pass NULL to bitmap_free, no?
}
static inline void queue_request(struct i915_sched_engine *sched_engine, @@ -1863,6 +1861,33 @@ static void guc_submit_request(struct i915_request *rq) spin_unlock_irqrestore(&sched_engine->lock, flags); }
+static int new_mlrc_guc_id(struct intel_guc *guc, struct intel_context *ce) +{
- int ret;
- GEM_BUG_ON(!intel_context_is_parent(ce));
- GEM_BUG_ON(!guc->submission_state.guc_ids_bitmap);
- ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
NUMBER_MULTI_LRC_GUC_ID(guc),
order_base_2(ce->parallel.number_children
+ 1));
btw, is there any requirement (GuC ABI ?) that allocated ids need to be allocated with power of 2 alignment ? I don't think that we must optimize that hard and in some cases waste extra ids (as we might be limited on some configs)
- if (likely(!(ret < 0)))
ret += NUMBER_SINGLE_LRC_GUC_ID(guc);
nit: more readable would be
if (unlikely(ret < 0)) return ret;
return ret + guc->submission_state.mlrc_base;
- return ret;
+}
+static int new_slrc_guc_id(struct intel_guc *guc, struct intel_context *ce) +{
- GEM_BUG_ON(intel_context_is_parent(ce));
do we really need ce here ?
- return ida_simple_get(&guc->submission_state.guc_ids,
0, NUMBER_SINGLE_LRC_GUC_ID(guc),
if we change the logic of NUMBER_SINGLE/MULTI_LRC_GUC_ID macros from static split into more dynamic, then we could likely implement lazy increase of available slrc/mlrc id limits on demand, within available range, without deciding upfront of the hardcoded split 15 : 1
but this can be done next time ;)
GFP_KERNEL | __GFP_RETRY_MAYFAIL |
__GFP_NOWARN);
+}
static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) { int ret; @@ -1870,16 +1895,10 @@ static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) GEM_BUG_ON(intel_context_is_child(ce));
if (intel_context_is_parent(ce))
ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
NUMBER_MULTI_LRC_GUC_ID(guc),
order_base_2(ce->parallel.number_children
+ 1));
elseret = new_mlrc_guc_id(guc, ce);
ret = ida_simple_get(&guc->submission_state.guc_ids,
NUMBER_MULTI_LRC_GUC_ID(guc),
guc->submission_state.num_guc_ids,
GFP_KERNEL | __GFP_RETRY_MAYFAIL |
__GFP_NOWARN);
ret = new_slrc_guc_id(guc, ce);
with above helpers introduced, shouldn't we move code from new_guc_id() to assign_guc_id() ?
if (unlikely(ret < 0)) return ret;
@@ -1989,6 +2008,14 @@ static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce)
GEM_BUG_ON(atomic_read(&ce->guc_id.ref));
- if (unlikely(intel_context_is_parent(ce) &&
!guc->submission_state.guc_ids_bitmap)) {
guc->submission_state.guc_ids_bitmap =
bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
if (!guc->submission_state.guc_ids_bitmap)
return -ENOMEM;
- }
maybe move this chunk to new_mlrc_guc_id() ? or we can't due to the spin_lock below ? but then how do you protect guc_ids_bitmap pointer itself ?
-Michal
try_again: spin_lock_irqsave(&guc->submission_state.lock, flags);
On Thu, Jan 13, 2022 at 12:21:17AM +0100, Michal Wajdeczko wrote:
On 11.01.2022 17:30, Matthew Brost wrote:
Move the multi-lrc guc_id from the lower allocation partition (0 to number of multi-lrc guc_ids) to upper allocation partition (number of single-lrc to max guc_ids).
Signed-off-by: Matthew Brost matthew.brost@intel.com
.../gpu/drm/i915/gt/uc/intel_guc_submission.c | 57 ++++++++++++++----- 1 file changed, 42 insertions(+), 15 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 9989d121127df..1bacc9621cea8 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -147,6 +147,8 @@ guc_create_parallel(struct intel_engine_cs **engines, */ #define NUMBER_MULTI_LRC_GUC_ID(guc) \ ((guc)->submission_state.num_guc_ids / 16) +#define NUMBER_SINGLE_LRC_GUC_ID(guc) \
- ((guc)->submission_state.num_guc_ids - NUMBER_MULTI_LRC_GUC_ID(guc))
above two will likely look better if converted into inline functions, or even better if we explicitly store slrc/mlrc upper/lower id limits under guc submission state
Definitely inline functions, or I guess variables work too but that might be overkill. Let me play around with this and see how it looks.
/*
- Below is a set of functions which control the GuC scheduling state which
@@ -1776,11 +1778,6 @@ int intel_guc_submission_init(struct intel_guc *guc) INIT_WORK(&guc->submission_state.destroyed_worker, destroyed_worker_func);
- guc->submission_state.guc_ids_bitmap =
bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
- if (!guc->submission_state.guc_ids_bitmap)
return -ENOMEM;
- spin_lock_init(&guc->timestamp.lock); INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping); guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ;
@@ -1796,7 +1793,8 @@ void intel_guc_submission_fini(struct intel_guc *guc) guc_flush_destroyed_contexts(guc); guc_lrc_desc_pool_destroy(guc); i915_sched_engine_put(guc->sched_engine);
- bitmap_free(guc->submission_state.guc_ids_bitmap);
- if (guc->submission_state.guc_ids_bitmap)
bitmap_free(guc->submission_state.guc_ids_bitmap);
it should be fine to pass NULL to bitmap_free, no?
Probably? I'll double check on this.
}
static inline void queue_request(struct i915_sched_engine *sched_engine, @@ -1863,6 +1861,33 @@ static void guc_submit_request(struct i915_request *rq) spin_unlock_irqrestore(&sched_engine->lock, flags); }
+static int new_mlrc_guc_id(struct intel_guc *guc, struct intel_context *ce) +{
- int ret;
- GEM_BUG_ON(!intel_context_is_parent(ce));
- GEM_BUG_ON(!guc->submission_state.guc_ids_bitmap);
- ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
NUMBER_MULTI_LRC_GUC_ID(guc),
order_base_2(ce->parallel.number_children
+ 1));
btw, is there any requirement (GuC ABI ?) that allocated ids need to be allocated with power of 2 alignment ? I don't think that we must optimize that hard and in some cases waste extra ids (as we might be limited on some configs)
No pow2 requirement in GuC ABI, bitmaps only work on pow2 alignment and didn't optmize this.
- if (likely(!(ret < 0)))
ret += NUMBER_SINGLE_LRC_GUC_ID(guc);
nit: more readable would be
if (unlikely(ret < 0)) return ret;
return ret + guc->submission_state.mlrc_base;
Sure.
- return ret;
+}
+static int new_slrc_guc_id(struct intel_guc *guc, struct intel_context *ce) +{
- GEM_BUG_ON(intel_context_is_parent(ce));
do we really need ce here ?
Just for the GEM_BUG_ON... Can remove if it is a big deal.
- return ida_simple_get(&guc->submission_state.guc_ids,
0, NUMBER_SINGLE_LRC_GUC_ID(guc),
if we change the logic of NUMBER_SINGLE/MULTI_LRC_GUC_ID macros from static split into more dynamic, then we could likely implement lazy increase of available slrc/mlrc id limits on demand, within available range, without deciding upfront of the hardcoded split 15 : 1
but this can be done next time ;)
Yea I guess. Doubt we need anything beyond a static split tho.
GFP_KERNEL | __GFP_RETRY_MAYFAIL |
__GFP_NOWARN);
+}
static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) { int ret; @@ -1870,16 +1895,10 @@ static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) GEM_BUG_ON(intel_context_is_child(ce));
if (intel_context_is_parent(ce))
ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
NUMBER_MULTI_LRC_GUC_ID(guc),
order_base_2(ce->parallel.number_children
+ 1));
elseret = new_mlrc_guc_id(guc, ce);
ret = ida_simple_get(&guc->submission_state.guc_ids,
NUMBER_MULTI_LRC_GUC_ID(guc),
guc->submission_state.num_guc_ids,
GFP_KERNEL | __GFP_RETRY_MAYFAIL |
__GFP_NOWARN);
ret = new_slrc_guc_id(guc, ce);
with above helpers introduced, shouldn't we move code from new_guc_id() to assign_guc_id() ?
Why add inline to code to assign_guc_id?
if (unlikely(ret < 0)) return ret;
@@ -1989,6 +2008,14 @@ static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce)
GEM_BUG_ON(atomic_read(&ce->guc_id.ref));
- if (unlikely(intel_context_is_parent(ce) &&
!guc->submission_state.guc_ids_bitmap)) {
guc->submission_state.guc_ids_bitmap =
bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
if (!guc->submission_state.guc_ids_bitmap)
return -ENOMEM;
- }
maybe move this chunk to new_mlrc_guc_id() ? or we can't due to the spin_lock below ? but then how do you protect guc_ids_bitmap pointer itself ?
Can't use GFP_KERNEL inside a spin lock...
Matt
-Michal
try_again: spin_lock_irqsave(&guc->submission_state.lock, flags);
On 13.01.2022 00:26, Matthew Brost wrote:
On Thu, Jan 13, 2022 at 12:21:17AM +0100, Michal Wajdeczko wrote:
On 11.01.2022 17:30, Matthew Brost wrote:
...
@@ -1863,6 +1861,33 @@ static void guc_submit_request(struct i915_request *rq) spin_unlock_irqrestore(&sched_engine->lock, flags); }
+static int new_mlrc_guc_id(struct intel_guc *guc, struct intel_context *ce) +{
- int ret;
- GEM_BUG_ON(!intel_context_is_parent(ce));
- GEM_BUG_ON(!guc->submission_state.guc_ids_bitmap);
- ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
NUMBER_MULTI_LRC_GUC_ID(guc),
order_base_2(ce->parallel.number_children
+ 1));
btw, is there any requirement (GuC ABI ?) that allocated ids need to be allocated with power of 2 alignment ? I don't think that we must optimize that hard and in some cases waste extra ids (as we might be limited on some configs)
No pow2 requirement in GuC ABI, bitmaps only work on pow2 alignment and didn't optmize this.
there is a slower variant of "find" function:
bitmap_find_next_zero_area - find a contiguous aligned zero area
that does not have this limitation
..
@@ -1989,6 +2008,14 @@ static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce)
GEM_BUG_ON(atomic_read(&ce->guc_id.ref));
- if (unlikely(intel_context_is_parent(ce) &&
!guc->submission_state.guc_ids_bitmap)) {
guc->submission_state.guc_ids_bitmap =
bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
if (!guc->submission_state.guc_ids_bitmap)
return -ENOMEM;
- }
maybe move this chunk to new_mlrc_guc_id() ? or we can't due to the spin_lock below ? but then how do you protect guc_ids_bitmap pointer itself ?
Can't use GFP_KERNEL inside a spin lock...
ok, but what if there will be two or more parallel calls to pin_guc_id() with all being first parent context? each will see NULL guc_ids_bitmap.. or there is another layer of synchronization?
-Michal
On Thu, Jan 13, 2022 at 03:18:14PM +0100, Michal Wajdeczko wrote:
On 13.01.2022 00:26, Matthew Brost wrote:
On Thu, Jan 13, 2022 at 12:21:17AM +0100, Michal Wajdeczko wrote:
On 11.01.2022 17:30, Matthew Brost wrote:
...
@@ -1863,6 +1861,33 @@ static void guc_submit_request(struct i915_request *rq) spin_unlock_irqrestore(&sched_engine->lock, flags); }
+static int new_mlrc_guc_id(struct intel_guc *guc, struct intel_context *ce) +{
- int ret;
- GEM_BUG_ON(!intel_context_is_parent(ce));
- GEM_BUG_ON(!guc->submission_state.guc_ids_bitmap);
- ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap,
NUMBER_MULTI_LRC_GUC_ID(guc),
order_base_2(ce->parallel.number_children
+ 1));
btw, is there any requirement (GuC ABI ?) that allocated ids need to be allocated with power of 2 alignment ? I don't think that we must optimize that hard and in some cases waste extra ids (as we might be limited on some configs)
No pow2 requirement in GuC ABI, bitmaps only work on pow2 alignment and didn't optmize this.
there is a slower variant of "find" function:
bitmap_find_next_zero_area - find a contiguous aligned zero area
that does not have this limitation
Ah, wasn't aware of this. If this becomes an issue (running of multi-lrc ids) for customers I suppose this is the first thing we can do to try to address this. For now, I think we leave it as is.
..
@@ -1989,6 +2008,14 @@ static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce)
GEM_BUG_ON(atomic_read(&ce->guc_id.ref));
- if (unlikely(intel_context_is_parent(ce) &&
!guc->submission_state.guc_ids_bitmap)) {
guc->submission_state.guc_ids_bitmap =
bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
if (!guc->submission_state.guc_ids_bitmap)
return -ENOMEM;
- }
maybe move this chunk to new_mlrc_guc_id() ? or we can't due to the spin_lock below ? but then how do you protect guc_ids_bitmap pointer itself ?
Can't use GFP_KERNEL inside a spin lock...
ok, but what if there will be two or more parallel calls to pin_guc_id() with all being first parent context? each will see NULL guc_ids_bitmap.. or there is another layer of synchronization?
Good catch. Yes, it techincally possible two multi-lrc contexts to try to allocate at the same time. I guess I should just do this at driver load time + allocate the maximum number of multi-lrc ids and possibly waste a bit of memory on a PF or VF.
Matt
-Michal
dri-devel@lists.freedesktop.org