[Intel-xe] [PATCH v2 12/17] drm/xe/rtp: Improve magic macros for RTP tables
Lucas De Marchi
lucas.demarchi at intel.com
Wed Apr 26 18:15:36 UTC 2023
On Tue, Apr 25, 2023 at 05:22:33PM -0700, Lucas De Marchi wrote:
>On Tue, Apr 25, 2023 at 04:13:10PM -0700, Lucas De Marchi wrote:
>>On Tue, Apr 25, 2023 at 07:00:31PM +0200, Michal Wajdeczko wrote:
>>>
>>>
>>>On 25.04.2023 15:17, Lucas De Marchi wrote:
>>>>On Tue, Apr 25, 2023 at 01:03:54PM +0200, Michal Wajdeczko wrote:
>>>>>
>>>>>
>>>>>On 22.04.2023 00:32, Lucas De Marchi wrote:
>>>>>>The macros for writing the rtp tables have some downsides:
>>>>>>
>>>>>> 1) They are fragile, and when making a typo it's hard to know
>>>>>> where it's failing for someone not used to them
>>>>>> 2) They polute the global scope. The CONCAT/COUNT_ARGS/etc only
>>>>>> work because they are defined exactly the same as the ones in
>>>>>> kernel.h.
>>>>>
>>>>>hmm, I wonder why they were defined locally in first place ?
>>>>>Can't you always use definitions from kernel.h ?
>>>>
>>>>I didn't want to include kernel.h for the 2 macros it would use
>>>>when people are actually trying to reduce the size of kernel.h due to
>>>>being kind of what our i915_reg.h became. Also the count args to 12 is
>>>>not exactly what we want.
>>>
>>>sorry, but I still don't follow
>>>
>>>if a ready to use macro is defined, I see no reason to define own copy
>>>just because we don't like to include header where it was added/defined
>>>
>>>if size of the kernel.h is a real problem, then I would expect moving
>>>CONCAT/COUNT macros to linux/concat.h/count.h rather then creating its
>>>own macros in private header (unless they are really non-generic)
>>
>>my thought was: it *is* happening. Slowly. Over time. Let's not add one
>>more dependency on it.
>>
>>At the same time, there is no good reason to block the rest of the
>>changes (and actually the entire xe module being merged upstream)
>>by attempting to create those headers. Yes, that could happen in
>>parallel and go through the right upstream branch. That branch is not
>>drm-next though.
>>
>>We have only 3 macros that were typed over and over in the last 30 years
>>in slightly different forms. Once we have a concat.h / count.h we can
>>easily migrate to them. Example: `git log --grep str_yes_no`: such
>>common infra takes time and synchronization between several trees.
>>
>>See:
>>d2a8ebbf8192 ("kernel.h: split out container_of() and typeof_member() macros")
>>
>>And the disclaimer in the header itself:
>>
>>/*
>>* NOTE:
>>*
>>* This header has combined a lot of unrelated to each other stuff.
>>* The process of splitting its content is in progress while keeping
>>* backward compatibility. That's why it's highly recommended NOT to
>>* include this header inside another header file, especially under
>>* generic or architectural include/ directory.
>>*/
>>
>>
>>
>>>
>>>>
>>>>>
>>>>>> 3) They are not compatible with rules/actions with registers
>>>>>> passing additional options
>>>>>>
>>>>>>This new implementation fixes (2) and (3).
>>>>>>
>>>>>>For (2), prefix everything with "_XE", so there is less chance of
>>>>>>clashes. Also, the maximum number of rules and actions is 4, so there is
>>>>>>no need to make the helper macros go until 12: reduce that to 5, so if
>>>>>>more arguments is passed, the compiler gives an error that there is not
>>>>>>definition with _5 suffix.
>>>>>
>>>>>I'm not convinced that adding local definitions with XE_ prefix that
>>>>>mimics functionality already defined in kernel.h is the right way to go
>>>>
>>>>see above
>>>>
>>>>>
>>>>>>
>>>>>>For (3), change the implementation to concentrate on "pasting a prefix
>>>>>>to each argument" rather than the more general "call any macro for each
>>>>>>argument". Hopefully this will avoid trying to extending this infra and
>>>>>>making it more complex. The new changes to how the arguments are
>>>>>>constructed using tuples will allow extending the interface where it's
>>>>>>needed: passing additional register fields to allow creating our own
>>>>>>xe_reg_t.
>>>>>>
>>>>>>Signed-off-by: Lucas De Marchi <lucas.demarchi at intel.com>
>>>>>>---
>>>>>> drivers/gpu/drm/xe/xe_rtp.h | 68 ++++++++++++++++++-------------------
>>>>>> 1 file changed, 33 insertions(+), 35 deletions(-)
>>>>>>
>>>>>>diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h
>>>>>>index a3be7c77753a..53650f09efe9 100644
>>>>>>--- a/drivers/gpu/drm/xe/xe_rtp.h
>>>>>>+++ b/drivers/gpu/drm/xe/xe_rtp.h
>>>>>>@@ -22,43 +22,42 @@ struct xe_reg_sr;
>>>>>> /*
>>>>>> * Helper macros - not to be used outside this header.
>>>>>> */
>>>>>>-/* This counts to 12. Any more, it will return 13th argument. */
>>>>>>-#define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10,
>>>>>>_11, _12, _n, X...) _n
>>>>>>-#define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 12, 11, 10, 9, 8, 7, 6,
>>>>>>5, 4, 3, 2, 1, 0)
>>>>>>+#define _XE_ESC(...) __VA_ARGS__
>>>>>>+#define _XE_COUNT_ARGS(...)
>>>>>>_XE_ESC(__XE_COUNT_ARGS(__VA_ARGS__,5,4,3,2,1,))
>>>>>>+#define __XE_COUNT_ARGS(_,_5,_4,_3,_2,X_,...) X_
>>>>>>
>>>>>>-#define __CONCAT(a, b) a ## b
>>>>>>-#define CONCATENATE(a, b) __CONCAT(a, b)
>>>>>>+#define _XE_CONCAT(a, b) __XE_CONCAT(a, b)
>>>>>>+#define __XE_CONCAT(a, b) a ## b
>>>>>>
>>>>>>-#define __CALL_FOR_EACH_1(MACRO_, x, ...) MACRO_(x)
>>>>>>-#define __CALL_FOR_EACH_2(MACRO_, x, ...) \
>>>>>>- MACRO_(x) __CALL_FOR_EACH_1(MACRO_, ##__VA_ARGS__)
>>>>>>-#define __CALL_FOR_EACH_3(MACRO_, x, ...) \
>>>>>>- MACRO_(x) __CALL_FOR_EACH_2(MACRO_, ##__VA_ARGS__)
>>>>>>-#define __CALL_FOR_EACH_4(MACRO_, x, ...) \
>>>>>>- MACRO_(x) __CALL_FOR_EACH_3(MACRO_, ##__VA_ARGS__)
>>>>>>+#define _XE_FIRST(...) _XE_ESC(__XE_FIRST(__VA_ARGS__,))
>>>>>>+#define __XE_FIRST(x_,...) x_
>>>>>>+#define _XE_TUPLE_TAIL(...) _XE_ESC(__XE_TUPLE_TAIL(__VA_ARGS__))
>>>>>>+#define __XE_TUPLE_TAIL(x_,...) (__VA_ARGS__)
>>>>>>
>>>>>>-#define _CALL_FOR_EACH(NARGS_, MACRO_, x, ...) \
>>>>>>- CONCATENATE(__CALL_FOR_EACH_, NARGS_)(MACRO_, x, ##__VA_ARGS__)
>>>>>>-#define CALL_FOR_EACH(MACRO_, x, ...) \
>>>>>>- _CALL_FOR_EACH(COUNT_ARGS(x, ##__VA_ARGS__), MACRO_, x,
>>>>>>##__VA_ARGS__)
>>>>>>+#define __XE_PASTE_SEP_COMMA ,
>>>>>>+#define __XE_PASTE_SEP_BITWISE_OR |
>>>>>>+#define __XE_PASTE(prefix_, sep_, args_)
>>>>>>_XE_ESC(_XE_CONCAT(__XE_PASTE_,_XE_COUNT_ARGS args_)(prefix_, sep_,
>>>>>>args_))
>>>>>>+#define __XE_PASTE_1(prefix_, sep_, args_) prefix_ args_
>>>>>>+#define __XE_PASTE_2(prefix_, sep_, args_) prefix_(_XE_FIRST args_)
>>>>>>__XE_PASTE_SEP_ ## sep_ __XE_PASTE_1(prefix_, sep_, _XE_TUPLE_TAIL
>>>>>>args_)
>>>>>>+#define __XE_PASTE_3(prefix_, sep_, args_) prefix_(_XE_FIRST args_)
>>>>>>__XE_PASTE_SEP_ ## sep_ __XE_PASTE_2(prefix_, sep_, _XE_TUPLE_TAIL
>>>>>>args_)
>>>>>>+#define __XE_PASTE_4(prefix_, sep_, args_) prefix_(_XE_FIRST args_)
>>>>>>__XE_PASTE_SEP_ ## sep_ __XE_PASTE_3(prefix_, sep_, _XE_TUPLE_TAIL
>>>>>>args_)
>>>>>>
>>>>>>-#define _XE_RTP_REG(x_) (x_), XE_RTP_REG_REGULAR
>>>>>>-#define _XE_RTP_MCR_REG(x_) (x_), XE_RTP_REG_MCR
>>>>>>+#define _XE_RTP_REG(x_) (x_), XE_RTP_REG_REGULAR
>>>>>>+#define _XE_RTP_MCR_REG(x_) (x_), XE_RTP_REG_MCR
>>>>>>
>>>>>> /*
>>>>>> * Helper macros for concatenating prefix - do not use them directly
>>>>>>outside
>>>>>> * this header
>>>>>> */
>>>>>>-#define __ADD_XE_RTP_ENTRY_FLAG_PREFIX(x)
>>>>>>CONCATENATE(XE_RTP_ENTRY_FLAG_, x) |
>>>>>>-#define __ADD_XE_RTP_ACTION_FLAG_PREFIX(x)
>>>>>>CONCATENATE(XE_RTP_ACTION_FLAG_, x) |
>>>>>>-#define __ADD_XE_RTP_RULE_PREFIX(x) CONCATENATE(XE_RTP_RULE_, x) ,
>>>>>>-#define __ADD_XE_RTP_ACTION_PREFIX(x) CONCATENATE(XE_RTP_ACTION_, x) ,
>>>>>>+#define __XE_PASTE_XE_RTP_ENTRY_FLAG_(x_)
>>>>>>_XE_CONCAT(XE_RTP_ENTRY_FLAG_, x_)
>>>>>>+#define __XE_PASTE_XE_RTP_ACTION_FLAG_(x_)
>>>>>>_XE_CONCAT(XE_RTP_ACTION_FLAG_, x_)
>>>>>>+#define __XE_PASTE_XE_RTP_ACTION_(x_) _XE_CONCAT(XE_RTP_ACTION_, x_)
>>>>>>+#define __XE_PASTE_XE_RTP_RULE_(x_) _XE_CONCAT(XE_RTP_RULE_, x_)
>>>>>
>>>>>maybe local macros should be defined with full XE_RTP prefix and be
>>>>>tailored for your specific RTP usecases:
>>>>>
>>>>>#define _XE_RTP_CONCAT(a, b) __XE_RTP_CONCAT(a, b)
>>>>>#define __XE_RTP_CONCAT(a, b) XE_RTP_ ## a ## _ ## b
>>>>
>>>>these are not to be used throughout the source code. The only difference
>>>>I see from this and what I did is the prefix being *XE_RTP_ rather than
>>>
>>>also the actual outcome of the macro includes "XE_RTP" prefix being
>>>added automatically (making this a non-generic concat macro)
>>>
>>>>*XE_. Since this is not to be used outside the header, I'm not seeing
>>>>much value in make it even longer.
>>>>
>>>>>
>>>>>#define __XE_PASTE_XE_RTP_ACTION_(x_) _XE_RTP_CONCAT(ACTION, x_)
>>>>>#define __XE_PASTE_XE_RTP_RULE_(x_) _XE_RTP_CONCAT(RULE, x_)
>>>>
>>>>this contradicts what you said and following that would make it very
>>>>ugly:
>>>>
>>>>#define __XE_RTP_PASTE_XE_RTP_ACTION_(x_) _XE_RTP_CONCAT(ACTION, x_)
>>>>#define __XE_RTP_PASTE_XE_RTP_RULE_(x_) _XE_RTP_CONCAT(RULE, x_)
>>>
>>>or
>>>
>>>#define __XE_RTP_PASTE_ACTION(a) _XE_RTP_CONCAT(ACTION, a)
>>>#define __XE_RTP_PASTE_RULE(r) _XE_RTP_CONCAT(RULE, r)
>>
>>humn... if this works, then it seems to be cleaner. My previous attempt
>>on trying a similar thing ended up with the preprocessor not pasting it
>>on all args, but I think that was still when I was using the
>>CALL_FOR_EACH macro. I will try with this one.
>
>there's big caveat here that the macros are being joined together to
>form the macro that will be called... we can't have it called
>__XE_RTP_PASTE_ACTION since the rename cascades from the other macro,
>called by the actions/rules setup.
>
>There is also this other user without the prefix:
>
>#define __XE_PASTE(prefix_, sep_, args_) _XE_ESC(_XE_CONCAT(__XE_PASTE_,_XE_COUNT_ARGS args_)(prefix_, sep_, args_))
>
>with a lot of changes the renames could probably be made to work,
>but I'm not sure the small gain of using XE_RTP_ vs XE_ prefix really
>justifies it. It won't be simpler.
>
>I will think a little more, but if you have it done, please feel free to
>send it here.
ok, after some changes I think I could both simplify it a little bit by
removing one indirection and make it use the XE_RTP_ prefix.
$ cat a.c
#include <xe_rtp.h>
XE_RTP_PASTE_FOREACH(TEST_, COMMA, (FOO, BAR))
XE_RTP_PASTE_FOREACH(TEST2_, COMMA, (FOO))
XE_RTP_PASTE_FOREACH(TEST3_, BITWISE_OR, (FOO, BAR))
$ gcc -I <...> -D__KERNEL__ -I../drivers/gpu/drm/xe -E -P a.c
...
XE_RTP_TEST_FOO , XE_RTP_TEST_BAR
XE_RTP_TEST2_FOO
XE_RTP_TEST3_FOO | XE_RTP_TEST3_BAR
Also, the end result for xe_wa.i compared to the previous output is the
same:
$ git stash
$ make O=build64 drivers/gpu/drm/xe/xe_wa.i
$ cp drivers/gpu/drm/xe/xe_wa.i{,.old}
$ git stash pop
$ make O=build64 drivers/gpu/drm/xe/xe_wa.i
$ git diff --no-index drivers/gpu/drm/xe/xe_wa.i{.old,}
diff --git a/build64/drivers/gpu/drm/xe/xe_wa.i.old b/build64/drivers/gpu/drm/xe/xe_wa.i
index 5f08d005c8f2..d2908d6b1993 100644
--- a/build64/drivers/gpu/drm/xe/xe_wa.i.old
+++ b/build64/drivers/gpu/drm/xe/xe_wa.i
@@ -64681,16 +64681,16 @@ struct xe_rtp_entry {
struct xe_hw_engine;
struct xe_gt;
struct xe_reg_sr;
-# 391 "../drivers/gpu/drm/xe/xe_rtp.h"
+# 404 "../drivers/gpu/drm/xe/xe_rtp.h"
void xe_rtp_process(const struct xe_rtp_entry *entries, struct xe_reg_sr *sr,
struct xe_gt *gt, struct xe_hw_engine *hwe);
-# 403 "../drivers/gpu/drm/xe/xe_rtp.h"
+# 416 "../drivers/gpu/drm/xe/xe_rtp.h"
bool xe_rtp_match_even_instance(const struct xe_gt *gt,
const struct xe_hw_engine *hwe);
-# 421 "../drivers/gpu/drm/xe/xe_rtp.h"
+# 434 "../drivers/gpu/drm/xe/xe_rtp.h"
bool xe_rtp_match_first_render_or_compute(const struct xe_gt *gt,
const struct xe_hw_engine *hwe);
-# 432 "../drivers/gpu/drm/xe/xe_rtp.h"
+# 445 "../drivers/gpu/drm/xe/xe_rtp.h"
bool xe_rtp_match_first_gslice_fused_off(const struct xe_gt *gt,
const struct xe_hw_engine *hwe);
# 21 "../drivers/gpu/drm/xe/xe_wa.c" 2
This is the diff to the new implementation I will include in next
version:
--------8<-------------
diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h
index 85b107967eeb..be3481d16825 100644
--- a/drivers/gpu/drm/xe/xe_rtp.h
+++ b/drivers/gpu/drm/xe/xe_rtp.h
@@ -26,33 +26,47 @@ struct xe_reg_sr;
#define _XE_COUNT_ARGS(...) _XE_ESC(__XE_COUNT_ARGS(__VA_ARGS__,5,4,3,2,1,))
#define __XE_COUNT_ARGS(_,_5,_4,_3,_2,X_,...) X_
-#define _XE_CONCAT(a, b) __XE_CONCAT(a, b)
-#define __XE_CONCAT(a, b) a ## b
-
#define _XE_FIRST(...) _XE_ESC(__XE_FIRST(__VA_ARGS__,))
#define __XE_FIRST(x_,...) x_
#define _XE_TUPLE_TAIL(...) _XE_ESC(__XE_TUPLE_TAIL(__VA_ARGS__))
#define __XE_TUPLE_TAIL(x_,...) (__VA_ARGS__)
-#define __XE_PASTE_SEP_COMMA ,
-#define __XE_PASTE_SEP_BITWISE_OR |
-#define __XE_PASTE(prefix_, sep_, args_) _XE_ESC(_XE_CONCAT(__XE_PASTE_,_XE_COUNT_ARGS args_)(prefix_, sep_, args_))
-#define __XE_PASTE_1(prefix_, sep_, args_) prefix_ args_
-#define __XE_PASTE_2(prefix_, sep_, args_) prefix_(_XE_FIRST args_) __XE_PASTE_SEP_ ## sep_ __XE_PASTE_1(prefix_, sep_, _XE_TUPLE_TAIL args_)
-#define __XE_PASTE_3(prefix_, sep_, args_) prefix_(_XE_FIRST args_) __XE_PASTE_SEP_ ## sep_ __XE_PASTE_2(prefix_, sep_, _XE_TUPLE_TAIL args_)
-#define __XE_PASTE_4(prefix_, sep_, args_) prefix_(_XE_FIRST args_) __XE_PASTE_SEP_ ## sep_ __XE_PASTE_3(prefix_, sep_, _XE_TUPLE_TAIL args_)
-
#define _XE_DROP_FIRST(x_, ...) __VA_ARGS__
#define _XE_DROP_CAST(...) _XE_ESC(_XE_DROP_FIRST _XE_ESC __VA_ARGS__)
+#define _XE_RTP_CONCAT(a, b) __XE_RTP_CONCAT(a, b)
+#define __XE_RTP_CONCAT(a, b) XE_RTP_ ## a ## b
+
+#define __XE_RTP_PASTE_SEP_COMMA ,
+#define __XE_RTP_PASTE_SEP_BITWISE_OR |
+
/*
- * Helper macros for concatenating prefix - do not use them directly outside
- * this header
+ * XE_RTP_PASTE_FOREACH - Paste XE_RTP_<@prefix_> on each element of the tuple
+ * @args, with the end result separated by @sep_. @sep must be one of the
+ * previously declared macros __XE_RTP_PASTE_SEP_*
+ *
+ * Examples:
+ *
+ * 1) XE_RTP_PASTE_FOREACH(TEST_, COMMA, (FOO, BAR))
+ * expands to:
+ *
+ * XE_RTP_TEST_FOO , XE_RTP_TEST_BAR
+ *
+ * 2) XE_RTP_PASTE_FOREACH(TEST2_, COMMA, (FOO))
+ * expands to:
+ *
+ * XE_RTP_TEST2_FOO
+ *
+ * 2) XE_RTP_PASTE_FOREACH(TEST3, BITWISE_OR, (FOO, BAR))
+ * expands to:
+ *
+ * XE_RTP_TEST3_FOO | XE_RTP_TEST3_BAR
*/
-#define __XE_PASTE_XE_RTP_ENTRY_FLAG_(x_) _XE_CONCAT(XE_RTP_ENTRY_FLAG_, x_)
-#define __XE_PASTE_XE_RTP_ACTION_FLAG_(x_) _XE_CONCAT(XE_RTP_ACTION_FLAG_, x_)
-#define __XE_PASTE_XE_RTP_ACTION_(x_) _XE_CONCAT(XE_RTP_ACTION_, x_)
-#define __XE_PASTE_XE_RTP_RULE_(x_) _XE_CONCAT(XE_RTP_RULE_, x_)
+#define XE_RTP_PASTE_FOREACH(prefix_, sep_, args_) _XE_ESC(_XE_RTP_CONCAT(PASTE_,_XE_COUNT_ARGS args_)(prefix_, sep_, args_))
+#define XE_RTP_PASTE_1(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_)
+#define XE_RTP_PASTE_2(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_1(prefix_, sep_, _XE_TUPLE_TAIL args_)
+#define XE_RTP_PASTE_3(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_2(prefix_, sep_, _XE_TUPLE_TAIL args_)
+#define XE_RTP_PASTE_4(prefix_, sep_, args_) _XE_RTP_CONCAT(prefix_, _XE_FIRST args_) __XE_RTP_PASTE_SEP_ ## sep_ XE_RTP_PASTE_3(prefix_, sep_, _XE_TUPLE_TAIL args_)
/*
* Macros to encode rules to match against platform, IP version, stepping, etc.
@@ -311,7 +325,7 @@ struct xe_reg_sr;
* };
*/
#define XE_RTP_ENTRY_FLAG(...) \
- .flags = (__XE_PASTE(__XE_PASTE_XE_RTP_ENTRY_FLAG_, BITWISE_OR, (__VA_ARGS__)))
+ .flags = (XE_RTP_PASTE_FOREACH(ENTRY_FLAG_, BITWISE_OR, (__VA_ARGS__)))
/**
* XE_RTP_ACTION_FLAG - Helper to add multiple flags to a struct xe_rtp_action
@@ -334,7 +348,7 @@ struct xe_reg_sr;
* };
*/
#define XE_RTP_ACTION_FLAG(...) \
- .flags = (__XE_PASTE(__XE_PASTE_XE_RTP_ACTION_FLAG_, BITWISE_OR, (__VA_ARGS__)))
+ .flags = (XE_RTP_PASTE_FOREACH(ACTION_FLAG_, BITWISE_OR, (__VA_ARGS__)))
/**
* XE_RTP_RULES - Helper to set multiple rules to a struct xe_rtp_entry entry
@@ -359,7 +373,7 @@ struct xe_reg_sr;
#define XE_RTP_RULES(...) \
.n_rules = _XE_COUNT_ARGS(__VA_ARGS__), \
.rules = (const struct xe_rtp_rule[]) { \
- __XE_PASTE(__XE_PASTE_XE_RTP_RULE_, COMMA, (__VA_ARGS__)) \
+ XE_RTP_PASTE_FOREACH(RULE_, COMMA, (__VA_ARGS__)) \
}
/**
@@ -385,7 +399,7 @@ struct xe_reg_sr;
#define XE_RTP_ACTIONS(...) \
.n_actions = _XE_COUNT_ARGS(__VA_ARGS__), \
.actions = (const struct xe_rtp_action[]) { \
- __XE_PASTE(__XE_PASTE_XE_RTP_ACTION_, COMMA, (__VA_ARGS__)) \
+ XE_RTP_PASTE_FOREACH(ACTION_, COMMA, (__VA_ARGS__)) \
}
void xe_rtp_process(const struct xe_rtp_entry *entries, struct xe_reg_sr *sr,
More information about the Intel-xe
mailing list