[Mesa-dev] [PATCH 4/7] i965/disasm: Split opcode tables by the generation they were introduced in.
Matt Turner
mattst88 at gmail.com
Fri Apr 29 19:15:37 UTC 2016
On Thu, Apr 28, 2016 at 12:46 PM, Francisco Jerez <currojerez at riseup.net> wrote:
> Matt Turner <mattst88 at gmail.com> writes:
>
>> On Thu, Apr 28, 2016 at 12:19 AM, Francisco Jerez <currojerez at riseup.net> wrote:
>>> ---
>>> src/mesa/drivers/dri/i965/brw_disasm.c | 90 ++++++++++++++++++++++++++--------
>>> 1 file changed, 69 insertions(+), 21 deletions(-)
>>>
>>> diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c
>>> index 15d9383..0125434 100644
>>> --- a/src/mesa/drivers/dri/i965/brw_disasm.c
>>> +++ b/src/mesa/drivers/dri/i965/brw_disasm.c
>>> @@ -30,9 +30,8 @@
>>> #include "brw_inst.h"
>>> #include "brw_eu.h"
>>>
>>> -static const struct opcode_desc opcode_descs[128] = {
>>> +static const struct opcode_desc gen4_opcode_descs[128] = {
>>> [BRW_OPCODE_MOV] = { .name = "mov", .nsrc = 1, .ndst = 1 },
>>> - [BRW_OPCODE_MOVI] = { .name = "movi", .nsrc = 2, .ndst = 1 },
>>> [BRW_OPCODE_FRC] = { .name = "frc", .nsrc = 1, .ndst = 1 },
>>> [BRW_OPCODE_RNDU] = { .name = "rndu", .nsrc = 1, .ndst = 1 },
>>> [BRW_OPCODE_RNDD] = { .name = "rndd", .nsrc = 1, .ndst = 1 },
>>> @@ -40,27 +39,17 @@ static const struct opcode_desc opcode_descs[128] = {
>>> [BRW_OPCODE_RNDZ] = { .name = "rndz", .nsrc = 1, .ndst = 1 },
>>> [BRW_OPCODE_NOT] = { .name = "not", .nsrc = 1, .ndst = 1 },
>>> [BRW_OPCODE_LZD] = { .name = "lzd", .nsrc = 1, .ndst = 1 },
>>> - [BRW_OPCODE_F32TO16] = { .name = "f32to16", .nsrc = 1, .ndst = 1 },
>>> - [BRW_OPCODE_F16TO32] = { .name = "f16to32", .nsrc = 1, .ndst = 1 },
>>> - [BRW_OPCODE_BFREV] = { .name = "bfrev", .nsrc = 1, .ndst = 1 },
>>> - [BRW_OPCODE_FBH] = { .name = "fbh", .nsrc = 1, .ndst = 1 },
>>> - [BRW_OPCODE_FBL] = { .name = "fbl", .nsrc = 1, .ndst = 1 },
>>> - [BRW_OPCODE_CBIT] = { .name = "cbit", .nsrc = 1, .ndst = 1 },
>>>
>>> [BRW_OPCODE_MUL] = { .name = "mul", .nsrc = 2, .ndst = 1 },
>>> [BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 },
>>> [BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1 },
>>> [BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 },
>>> - [BRW_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1 },
>>> - [BRW_OPCODE_MAD] = { .name = "mad", .nsrc = 3, .ndst = 1 },
>>> - [BRW_OPCODE_LRP] = { .name = "lrp", .nsrc = 3, .ndst = 1 },
>>> [BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 },
>>> [BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 },
>>> [BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 },
>>> [BRW_OPCODE_DPH] = { .name = "dph", .nsrc = 2, .ndst = 1 },
>>> [BRW_OPCODE_DP3] = { .name = "dp3", .nsrc = 2, .ndst = 1 },
>>> [BRW_OPCODE_DP2] = { .name = "dp2", .nsrc = 2, .ndst = 1 },
>>> - [BRW_OPCODE_MATH] = { .name = "math", .nsrc = 2, .ndst = 1 },
>>>
>>> [BRW_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1 },
>>> [BRW_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1 },
>>> @@ -73,17 +62,9 @@ static const struct opcode_desc opcode_descs[128] = {
>>> [BRW_OPCODE_ASR] = { .name = "asr", .nsrc = 2, .ndst = 1 },
>>> [BRW_OPCODE_CMP] = { .name = "cmp", .nsrc = 2, .ndst = 1 },
>>> [BRW_OPCODE_CMPN] = { .name = "cmpn", .nsrc = 2, .ndst = 1 },
>>> - [BRW_OPCODE_CSEL] = { .name = "csel", .nsrc = 3, .ndst = 1 },
>>> - [BRW_OPCODE_BFE] = { .name = "bfe", .nsrc = 3, .ndst = 1 },
>>> - [BRW_OPCODE_BFI1] = { .name = "bfi1", .nsrc = 2, .ndst = 1 },
>>> - [BRW_OPCODE_BFI2] = { .name = "bfi2", .nsrc = 3, .ndst = 1 },
>>> - [BRW_OPCODE_ADDC] = { .name = "addc", .nsrc = 2, .ndst = 1 },
>>> - [BRW_OPCODE_SUBB] = { .name = "subb", .nsrc = 2, .ndst = 1 },
>>>
>>> [BRW_OPCODE_SEND] = { .name = "send", .nsrc = 1, .ndst = 1 },
>>> [BRW_OPCODE_SENDC] = { .name = "sendc", .nsrc = 1, .ndst = 1 },
>>> - [BRW_OPCODE_SENDS] = { .name = "sends", .nsrc = 2, .ndst = 1 },
>>> - [BRW_OPCODE_SENDSC] = { .name = "sendsc", .nsrc = 2, .ndst = 1 },
>>> [BRW_OPCODE_ILLEGAL] = { .name = "illegal", .nsrc = 0, .ndst = 0 },
>>> [BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 },
>>> [BRW_OPCODE_NENOP] = { .name = "nenop", .nsrc = 0, .ndst = 0 },
>>> @@ -104,6 +85,70 @@ static const struct opcode_desc opcode_descs[128] = {
>>> [BRW_OPCODE_ENDIF] = { .name = "endif", .nsrc = 0, .ndst = 0 },
>>> };
>>>
>>> +static const struct opcode_desc g45_opcode_descs[128] = {
>>> + [BRW_OPCODE_MOVI] = { .name = "movi", .nsrc = 2, .ndst = 1 },
>>> + [BRW_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1 },
>>> +};
>>> +
>>> +static const struct opcode_desc gen6_opcode_descs[128] = {
>>> + [BRW_OPCODE_MATH] = { .name = "math", .nsrc = 2, .ndst = 1 },
>>> + [BRW_OPCODE_MAD] = { .name = "mad", .nsrc = 3, .ndst = 1 },
>>> + [BRW_OPCODE_LRP] = { .name = "lrp", .nsrc = 3, .ndst = 1 },
>>> +};
>>> +
>>> +static const struct opcode_desc gen7_opcode_descs[128] = {
>>> + [BRW_OPCODE_F32TO16] = { .name = "f32to16", .nsrc = 1, .ndst = 1 },
>>> + [BRW_OPCODE_F16TO32] = { .name = "f16to32", .nsrc = 1, .ndst = 1 },
>>> + [BRW_OPCODE_BFREV] = { .name = "bfrev", .nsrc = 1, .ndst = 1 },
>>> + [BRW_OPCODE_BFE] = { .name = "bfe", .nsrc = 3, .ndst = 1 },
>>> + [BRW_OPCODE_BFI1] = { .name = "bfi1", .nsrc = 2, .ndst = 1 },
>>> + [BRW_OPCODE_BFI2] = { .name = "bfi2", .nsrc = 3, .ndst = 1 },
>>> + [BRW_OPCODE_FBH] = { .name = "fbh", .nsrc = 1, .ndst = 1 },
>>> + [BRW_OPCODE_FBL] = { .name = "fbl", .nsrc = 1, .ndst = 1 },
>>> + [BRW_OPCODE_CBIT] = { .name = "cbit", .nsrc = 1, .ndst = 1 },
>>> + [BRW_OPCODE_ADDC] = { .name = "addc", .nsrc = 2, .ndst = 1 },
>>> + [BRW_OPCODE_SUBB] = { .name = "subb", .nsrc = 2, .ndst = 1 },
>>> +};
>>> +
>>> +static const struct opcode_desc gen8_opcode_descs[128] = {
>>> + [BRW_OPCODE_CSEL] = { .name = "csel", .nsrc = 3, .ndst = 1 },
>>> +};
>>> +
>>> +static const struct opcode_desc gen9_opcode_descs[128] = {
>>> + [BRW_OPCODE_SENDS] = { .name = "sends", .nsrc = 2, .ndst = 1 },
>>> + [BRW_OPCODE_SENDSC] = { .name = "sendsc", .nsrc = 2, .ndst = 1 },
>>
>> A 128*16-byte array for each differing generation seems really bad to
>> me, especially when they add 1-3 opcodes. A half a page for... one
>> opcode.
>>
> No, it's not, an array bounded by 128*16B for each major hardware
I think that's exactly what I said.
> generation is a tiny amount of memory by (even not so) modern standards.
> I have the suspicion you're trying to optimize prematurely, or do you
> have any evidence that your suggestion can substantially improve
> performance?
It's not about performance, it's about taste. I don't think it's
premature optimization. It's simply good craftsmanship.
But performance does seem to be something you care about -- you
mentioned on IRC that one suggested solution wouldn't allow
constant-time array look ups. That's why I suggested two different
solutions that avoid multiplying the storage requirements by six,
while still allowing likely better performance in the common case.
Would would it take to convince you? How many people would have to
tell you they dislike this solution?
More information about the mesa-dev
mailing list