[Mesa-dev] [PATCH 4/7] i965/disasm: Split opcode tables by the generation they were introduced in.
Francisco Jerez
currojerez at riseup.net
Fri Apr 29 21:48:23 UTC 2016
Matt Turner <mattst88 at gmail.com> writes:
> On Thu, Apr 28, 2016 at 12:46 PM, Francisco Jerez <currojerez at riseup.net> wrote:
>> Matt Turner <mattst88 at gmail.com> writes:
>>
>>> On Thu, Apr 28, 2016 at 12:19 AM, Francisco Jerez <currojerez at riseup.net> wrote:
>>>> ---
>>>> src/mesa/drivers/dri/i965/brw_disasm.c | 90 ++++++++++++++++++++++++++--------
>>>> 1 file changed, 69 insertions(+), 21 deletions(-)
>>>>
>>>> diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c
>>>> index 15d9383..0125434 100644
>>>> --- a/src/mesa/drivers/dri/i965/brw_disasm.c
>>>> +++ b/src/mesa/drivers/dri/i965/brw_disasm.c
>>>> @@ -30,9 +30,8 @@
>>>> #include "brw_inst.h"
>>>> #include "brw_eu.h"
>>>>
>>>> -static const struct opcode_desc opcode_descs[128] = {
>>>> +static const struct opcode_desc gen4_opcode_descs[128] = {
>>>> [BRW_OPCODE_MOV] = { .name = "mov", .nsrc = 1, .ndst = 1 },
>>>> - [BRW_OPCODE_MOVI] = { .name = "movi", .nsrc = 2, .ndst = 1 },
>>>> [BRW_OPCODE_FRC] = { .name = "frc", .nsrc = 1, .ndst = 1 },
>>>> [BRW_OPCODE_RNDU] = { .name = "rndu", .nsrc = 1, .ndst = 1 },
>>>> [BRW_OPCODE_RNDD] = { .name = "rndd", .nsrc = 1, .ndst = 1 },
>>>> @@ -40,27 +39,17 @@ static const struct opcode_desc opcode_descs[128] = {
>>>> [BRW_OPCODE_RNDZ] = { .name = "rndz", .nsrc = 1, .ndst = 1 },
>>>> [BRW_OPCODE_NOT] = { .name = "not", .nsrc = 1, .ndst = 1 },
>>>> [BRW_OPCODE_LZD] = { .name = "lzd", .nsrc = 1, .ndst = 1 },
>>>> - [BRW_OPCODE_F32TO16] = { .name = "f32to16", .nsrc = 1, .ndst = 1 },
>>>> - [BRW_OPCODE_F16TO32] = { .name = "f16to32", .nsrc = 1, .ndst = 1 },
>>>> - [BRW_OPCODE_BFREV] = { .name = "bfrev", .nsrc = 1, .ndst = 1 },
>>>> - [BRW_OPCODE_FBH] = { .name = "fbh", .nsrc = 1, .ndst = 1 },
>>>> - [BRW_OPCODE_FBL] = { .name = "fbl", .nsrc = 1, .ndst = 1 },
>>>> - [BRW_OPCODE_CBIT] = { .name = "cbit", .nsrc = 1, .ndst = 1 },
>>>>
>>>> [BRW_OPCODE_MUL] = { .name = "mul", .nsrc = 2, .ndst = 1 },
>>>> [BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 },
>>>> [BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1 },
>>>> [BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 },
>>>> - [BRW_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1 },
>>>> - [BRW_OPCODE_MAD] = { .name = "mad", .nsrc = 3, .ndst = 1 },
>>>> - [BRW_OPCODE_LRP] = { .name = "lrp", .nsrc = 3, .ndst = 1 },
>>>> [BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 },
>>>> [BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 },
>>>> [BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 },
>>>> [BRW_OPCODE_DPH] = { .name = "dph", .nsrc = 2, .ndst = 1 },
>>>> [BRW_OPCODE_DP3] = { .name = "dp3", .nsrc = 2, .ndst = 1 },
>>>> [BRW_OPCODE_DP2] = { .name = "dp2", .nsrc = 2, .ndst = 1 },
>>>> - [BRW_OPCODE_MATH] = { .name = "math", .nsrc = 2, .ndst = 1 },
>>>>
>>>> [BRW_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1 },
>>>> [BRW_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1 },
>>>> @@ -73,17 +62,9 @@ static const struct opcode_desc opcode_descs[128] = {
>>>> [BRW_OPCODE_ASR] = { .name = "asr", .nsrc = 2, .ndst = 1 },
>>>> [BRW_OPCODE_CMP] = { .name = "cmp", .nsrc = 2, .ndst = 1 },
>>>> [BRW_OPCODE_CMPN] = { .name = "cmpn", .nsrc = 2, .ndst = 1 },
>>>> - [BRW_OPCODE_CSEL] = { .name = "csel", .nsrc = 3, .ndst = 1 },
>>>> - [BRW_OPCODE_BFE] = { .name = "bfe", .nsrc = 3, .ndst = 1 },
>>>> - [BRW_OPCODE_BFI1] = { .name = "bfi1", .nsrc = 2, .ndst = 1 },
>>>> - [BRW_OPCODE_BFI2] = { .name = "bfi2", .nsrc = 3, .ndst = 1 },
>>>> - [BRW_OPCODE_ADDC] = { .name = "addc", .nsrc = 2, .ndst = 1 },
>>>> - [BRW_OPCODE_SUBB] = { .name = "subb", .nsrc = 2, .ndst = 1 },
>>>>
>>>> [BRW_OPCODE_SEND] = { .name = "send", .nsrc = 1, .ndst = 1 },
>>>> [BRW_OPCODE_SENDC] = { .name = "sendc", .nsrc = 1, .ndst = 1 },
>>>> - [BRW_OPCODE_SENDS] = { .name = "sends", .nsrc = 2, .ndst = 1 },
>>>> - [BRW_OPCODE_SENDSC] = { .name = "sendsc", .nsrc = 2, .ndst = 1 },
>>>> [BRW_OPCODE_ILLEGAL] = { .name = "illegal", .nsrc = 0, .ndst = 0 },
>>>> [BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 },
>>>> [BRW_OPCODE_NENOP] = { .name = "nenop", .nsrc = 0, .ndst = 0 },
>>>> @@ -104,6 +85,70 @@ static const struct opcode_desc opcode_descs[128] = {
>>>> [BRW_OPCODE_ENDIF] = { .name = "endif", .nsrc = 0, .ndst = 0 },
>>>> };
>>>>
>>>> +static const struct opcode_desc g45_opcode_descs[128] = {
>>>> + [BRW_OPCODE_MOVI] = { .name = "movi", .nsrc = 2, .ndst = 1 },
>>>> + [BRW_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1 },
>>>> +};
>>>> +
>>>> +static const struct opcode_desc gen6_opcode_descs[128] = {
>>>> + [BRW_OPCODE_MATH] = { .name = "math", .nsrc = 2, .ndst = 1 },
>>>> + [BRW_OPCODE_MAD] = { .name = "mad", .nsrc = 3, .ndst = 1 },
>>>> + [BRW_OPCODE_LRP] = { .name = "lrp", .nsrc = 3, .ndst = 1 },
>>>> +};
>>>> +
>>>> +static const struct opcode_desc gen7_opcode_descs[128] = {
>>>> + [BRW_OPCODE_F32TO16] = { .name = "f32to16", .nsrc = 1, .ndst = 1 },
>>>> + [BRW_OPCODE_F16TO32] = { .name = "f16to32", .nsrc = 1, .ndst = 1 },
>>>> + [BRW_OPCODE_BFREV] = { .name = "bfrev", .nsrc = 1, .ndst = 1 },
>>>> + [BRW_OPCODE_BFE] = { .name = "bfe", .nsrc = 3, .ndst = 1 },
>>>> + [BRW_OPCODE_BFI1] = { .name = "bfi1", .nsrc = 2, .ndst = 1 },
>>>> + [BRW_OPCODE_BFI2] = { .name = "bfi2", .nsrc = 3, .ndst = 1 },
>>>> + [BRW_OPCODE_FBH] = { .name = "fbh", .nsrc = 1, .ndst = 1 },
>>>> + [BRW_OPCODE_FBL] = { .name = "fbl", .nsrc = 1, .ndst = 1 },
>>>> + [BRW_OPCODE_CBIT] = { .name = "cbit", .nsrc = 1, .ndst = 1 },
>>>> + [BRW_OPCODE_ADDC] = { .name = "addc", .nsrc = 2, .ndst = 1 },
>>>> + [BRW_OPCODE_SUBB] = { .name = "subb", .nsrc = 2, .ndst = 1 },
>>>> +};
>>>> +
>>>> +static const struct opcode_desc gen8_opcode_descs[128] = {
>>>> + [BRW_OPCODE_CSEL] = { .name = "csel", .nsrc = 3, .ndst = 1 },
>>>> +};
>>>> +
>>>> +static const struct opcode_desc gen9_opcode_descs[128] = {
>>>> + [BRW_OPCODE_SENDS] = { .name = "sends", .nsrc = 2, .ndst = 1 },
>>>> + [BRW_OPCODE_SENDSC] = { .name = "sendsc", .nsrc = 2, .ndst = 1 },
>>>
>>> A 128*16-byte array for each differing generation seems really bad to
>>> me, especially when they add 1-3 opcodes. A half a page for... one
>>> opcode.
>>>
>> No, it's not, an array bounded by 128*16B for each major hardware
>
> I think that's exactly what I said.
>
>> generation is a tiny amount of memory by (even not so) modern standards.
>> I have the suspicion you're trying to optimize prematurely, or do you
>> have any evidence that your suggestion can substantially improve
>> performance?
>
> It's not about performance, it's about taste. I don't think it's
> premature optimization. It's simply good craftsmanship.
>
I was astonished to read this -- All the alternatives you and Ken have
suggested so far have -- with all due respect -- some drawback
especially on the "good craftsmanship" end (although obviously what is
perceived as a hack and what is perceived as good craftsmanship is
highly subjective so I don't expect you to share the same view).
The solution I've put on the table is ridiculously simple and robust --
No mixing of conceptually different paradigms based on the opcode, no
inconsistent mixing of code-driven and data-driven programming depending
on the opcode, no separate tables with algorithmically different look-up
strategies. No heuristic arguments needed to improve the runtime of a
linear look-up which still gives you a rather uncertain worst-case
complexity dependent on the location and size of the gaps in the opcode
space. No ordering sensitivity -- which I think most alternatives
suggested so far suffer from and means that you need to carefully review
that any change in the table preserves the ordering (double ordering in
fact -- by opcode and generation), *but* you cannot tell from the opcode
name alone what the right ordering is, so reviewing future changes would
involve cross-checking. Instead just have an opcode table per
generation and use plain array look-up, period. It can hardly get
simpler than this. Simplicity is IMO the metric a good engineer will
know how to value the most especially in such a situation where all
alternatives are likely to be equivalent in practice -- That's what I
consider good craftsmanship, and it's more than worth the ridiculous
price of 11kB of storage.
> But performance does seem to be something you care about -- you
> mentioned on IRC that one suggested solution wouldn't allow
> constant-time array look ups. That's why I suggested two different
> solutions that avoid multiplying the storage requirements by six,
> while still allowing likely better performance in the common case.
>
I definitely care about performance, that's why I don't think it's worth
spending time (time I was BTW planning to spend on actually trying to
improve performance) on a task whose potential practical benefit is
insignificant, and whose non-practical "taste" benefit feels completely
backwards to me.
> Would would it take to convince you? How many people would have to
> tell you they dislike this solution?
To convince me of what? Of respinning the series? Sure, I could spend
another workday on the disassembler trying to come up with something we
both consider of "good taste" if you ask me to, but I think I have
higher priority stuff to take care of right now so it will take a while.
How would you like the approach of using binary search to look-up a
single ordered table? -- Which seems like the second-least baroque one
to me.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 212 bytes
Desc: not available
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20160429/f08c9ce7/attachment-0001.sig>
More information about the mesa-dev
mailing list