[PATCH v3 8/8] drm/xe: Migrate OOB WAs to OR rules
Gustavo Sousa
gustavo.sousa at intel.com
Mon Jul 29 17:03:27 UTC 2024
Quoting Lucas De Marchi (2024-07-26 22:59:07-03:00)
>Now that rtp has OR rules, it's not needed to extend it to process OOB
>WAs. Previously if an entry had no name, it was considered as "a set of
>rules OR'ed with the last named entry".
>
>Instead of generating new entries, add OR rules. The syntax for
>xe_wa_oob.rules remains the same, with xe_gen_wa_oob generating the
>slightly different table. Object sizes delta are negligible, but having
>just one logic makes it easier to maintain:
>
> add/remove: 0/0 grow/shrink: 1/2 up/down: 160/-269 (-109)
> Function old new delta
> __compound_literal 6104 6264 +160
> xe_wa_dump 1839 1810 -29
> oob_was 816 576 -240
> Total: Before=17257, After=17148, chg -0.63%
>
>Signed-off-by: Lucas De Marchi <lucas.demarchi at intel.com>
Reviewed-by: Gustavo Sousa <gustavo.sousa at intel.com>
>---
> drivers/gpu/drm/xe/tests/xe_rtp_test.c | 34 +++++++++++-------------
> drivers/gpu/drm/xe/xe_gen_wa_oob.c | 16 +++++++++---
> drivers/gpu/drm/xe/xe_rtp.c | 36 +++++++-------------------
> 3 files changed, 37 insertions(+), 49 deletions(-)
>
>diff --git a/drivers/gpu/drm/xe/tests/xe_rtp_test.c b/drivers/gpu/drm/xe/tests/xe_rtp_test.c
>index 9ab3d5950d59..36a3b5420fef 100644
>--- a/drivers/gpu/drm/xe/tests/xe_rtp_test.c
>+++ b/drivers/gpu/drm/xe/tests/xe_rtp_test.c
>@@ -401,16 +401,15 @@ static const struct rtp_test_case rtp_cases[] = {
> },
> {
> .name = "inactive-1st_or_active-inactive",
>- .expected_active = BIT(1) | BIT(2) | BIT(3),
>+ .expected_active = BIT(1),
> .entries = (const struct xe_rtp_entry[]) {
> { XE_RTP_NAME("r1"),
> XE_RTP_RULES(FUNC(match_no)),
> },
> { XE_RTP_NAME("r2_or_conditions"),
>- XE_RTP_RULES(FUNC(match_yes)),
>- },
>- { XE_RTP_RULES(FUNC(match_no)) },
>- { XE_RTP_RULES(FUNC(match_no)) },
>+ XE_RTP_RULES(FUNC(match_yes), OR,
>+ FUNC(match_no), OR,
>+ FUNC(match_no)) },
> { XE_RTP_NAME("r3"),
> XE_RTP_RULES(FUNC(match_no)),
> },
>@@ -419,16 +418,15 @@ static const struct rtp_test_case rtp_cases[] = {
> },
> {
> .name = "inactive-2nd_or_active-inactive",
>- .expected_active = BIT(1) | BIT(2) | BIT(3),
>+ .expected_active = BIT(1),
> .entries = (const struct xe_rtp_entry[]) {
> { XE_RTP_NAME("r1"),
> XE_RTP_RULES(FUNC(match_no)),
> },
> { XE_RTP_NAME("r2_or_conditions"),
>- XE_RTP_RULES(FUNC(match_no)),
>- },
>- { XE_RTP_RULES(FUNC(match_yes)) },
>- { XE_RTP_RULES(FUNC(match_no)) },
>+ XE_RTP_RULES(FUNC(match_no), OR,
>+ FUNC(match_yes), OR,
>+ FUNC(match_no)) },
> { XE_RTP_NAME("r3"),
> XE_RTP_RULES(FUNC(match_no)),
> },
>@@ -437,16 +435,15 @@ static const struct rtp_test_case rtp_cases[] = {
> },
> {
> .name = "inactive-last_or_active-inactive",
>- .expected_active = BIT(1) | BIT(2) | BIT(3),
>+ .expected_active = BIT(1),
> .entries = (const struct xe_rtp_entry[]) {
> { XE_RTP_NAME("r1"),
> XE_RTP_RULES(FUNC(match_no)),
> },
> { XE_RTP_NAME("r2_or_conditions"),
>- XE_RTP_RULES(FUNC(match_no)),
>- },
>- { XE_RTP_RULES(FUNC(match_no)) },
>- { XE_RTP_RULES(FUNC(match_yes)) },
>+ XE_RTP_RULES(FUNC(match_no), OR,
>+ FUNC(match_no), OR,
>+ FUNC(match_yes)) },
> { XE_RTP_NAME("r3"),
> XE_RTP_RULES(FUNC(match_no)),
> },
>@@ -461,10 +458,9 @@ static const struct rtp_test_case rtp_cases[] = {
> XE_RTP_RULES(FUNC(match_no)),
> },
> { XE_RTP_NAME("r2_or_conditions"),
>- XE_RTP_RULES(FUNC(match_no)),
>- },
>- { XE_RTP_RULES(FUNC(match_no)) },
>- { XE_RTP_RULES(FUNC(match_no)) },
>+ XE_RTP_RULES(FUNC(match_no), OR,
>+ FUNC(match_no), OR,
>+ FUNC(match_no)) },
> { XE_RTP_NAME("r3"),
> XE_RTP_RULES(FUNC(match_no)),
> },
>diff --git a/drivers/gpu/drm/xe/xe_gen_wa_oob.c b/drivers/gpu/drm/xe/xe_gen_wa_oob.c
>index 106ee2b027f0..904cf47925aa 100644
>--- a/drivers/gpu/drm/xe/xe_gen_wa_oob.c
>+++ b/drivers/gpu/drm/xe/xe_gen_wa_oob.c
>@@ -97,19 +97,27 @@ static int parse(FILE *input, FILE *csource, FILE *cheader)
>
> if (name) {
> fprintf(cheader, "\tXE_WA_OOB_%s = %u,\n", name, idx);
>- fprintf(csource, "{ XE_RTP_NAME(\"%s\"), XE_RTP_RULES(%s) },\n",
>+
>+ /* Close previous entry before starting a new one */
>+ if (idx)
>+ fprintf(csource, ") },\n");
>+
>+ fprintf(csource, "{ XE_RTP_NAME(\"%s\"),\n XE_RTP_RULES(%s",
> name, rules);
>+ idx++;
> } else {
>- fprintf(csource, "{ XE_RTP_NAME(NULL), XE_RTP_RULES(%s) },\n",
>- rules);
>+ fprintf(csource, ", OR,\n\t%s", rules);
> }
>
>- idx++;
> lineno++;
> if (!is_continuation)
> prev_name = name;
> }
>
>+ /* Close last entry */
>+ if (idx)
>+ fprintf(csource, ") },\n");
>+
> fprintf(cheader, "\t_XE_WA_OOB_COUNT = %u\n", idx);
>
> return 0;
>diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c
>index 86727f34ca25..e78ba324dd18 100644
>--- a/drivers/gpu/drm/xe/xe_rtp.c
>+++ b/drivers/gpu/drm/xe/xe_rtp.c
>@@ -221,15 +221,15 @@ EXPORT_SYMBOL_IF_KUNIT(xe_rtp_process_ctx_enable_active_tracking);
>
> static void rtp_mark_active(struct xe_device *xe,
> struct xe_rtp_process_ctx *ctx,
>- unsigned int first, unsigned int n_entries)
>+ unsigned int idx)
> {
> if (!ctx->active_entries)
> return;
>
>- if (drm_WARN_ON(&xe->drm, first + n_entries > ctx->n_entries))
>+ if (drm_WARN_ON(&xe->drm, idx >= ctx->n_entries))
> return;
>
>- bitmap_set(ctx->active_entries, first, n_entries);
>+ bitmap_set(ctx->active_entries, idx, 1);
> }
>
> /**
>@@ -274,7 +274,7 @@ void xe_rtp_process_to_sr(struct xe_rtp_process_ctx *ctx,
> }
>
> if (match)
>- rtp_mark_active(xe, ctx, entry - entries, 1);
>+ rtp_mark_active(xe, ctx, entry - entries);
> }
> }
> EXPORT_SYMBOL_IF_KUNIT(xe_rtp_process_to_sr);
>@@ -285,42 +285,26 @@ EXPORT_SYMBOL_IF_KUNIT(xe_rtp_process_to_sr);
> * @entries: Table with RTP definitions
> *
> * Walk the table pointed by @entries (with an empty sentinel), executing the
>- * rules. A few differences from xe_rtp_process_to_sr():
>- *
>- * 1. There is no action associated with each entry since this uses
>- * struct xe_rtp_entry. Its main use is for marking active workarounds via
>- * xe_rtp_process_ctx_enable_active_tracking().
>- * 2. There is support for OR operations by having entries with no name.
>+ * rules. One difference from xe_rtp_process_to_sr(): there is no action
>+ * associated with each entry since this uses struct xe_rtp_entry. Its main use
>+ * is for marking active workarounds via
>+ * xe_rtp_process_ctx_enable_active_tracking().
> */
> void xe_rtp_process(struct xe_rtp_process_ctx *ctx,
> const struct xe_rtp_entry *entries)
> {
>- const struct xe_rtp_entry *entry, *first_entry;
>+ const struct xe_rtp_entry *entry;
> struct xe_hw_engine *hwe;
> struct xe_gt *gt;
> struct xe_device *xe;
>
> rtp_get_context(ctx, &hwe, >, &xe);
>
>- first_entry = entries;
>- if (drm_WARN_ON(&xe->drm, !first_entry->name))
>- return;
>-
> for (entry = entries; entry && entry->rules; entry++) {
>- if (entry->name)
>- first_entry = entry;
>-
> if (!rule_matches(xe, gt, hwe, entry->rules, entry->n_rules))
> continue;
>
>- /* Fast-forward entry, eliminating the OR'ed entries */
>- for (entry++; entry && entry->rules; entry++)
>- if (entry->name)
>- break;
>- entry--;
>-
>- rtp_mark_active(xe, ctx, first_entry - entries,
>- entry - first_entry + 1);
>+ rtp_mark_active(xe, ctx, entry - entries);
> }
> }
> EXPORT_SYMBOL_IF_KUNIT(xe_rtp_process);
>--
>2.43.0
>
More information about the Intel-xe
mailing list