[Mesa-dev] [PATCH 3/5] gallium: add opcodes/cap for fine derivative support
Roland Scheidegger
sroland at vmware.com
Thu Aug 14 09:21:59 PDT 2014
Am 14.08.2014 16:39, schrieb Ilia Mirkin:
> I guess a question is whether we should even bother with the fine
> version at all then? Just map everything to DDX/DDY... Although I
> guess if llvmpipe does the coarse version sometimes, at least the fine
> version is warranted.
I think it's nice to have both versions. llvmpipe only does the coarse
version for its internal use.
If a shader would do a ddx and ddy and then use the values for a texture
instruction with explicit derivatives, some slower path is used for
sampling (which can handle different mip levels in a quad) (though this
is a lot subject currently to debug vars such as no_quad_lod). The
problem is that even if you'd do a coarse_ddx, we still would fall back
to that slower path anyway, because (unlike intel hw where it really
matters if the actual lod values are different) we won't detect that
there is in fact just one lod per quad, so right now there would not
really be a benefit. Obviously, if you do the derivatives calculations
as part of the sampling itself, this is not a problem. FWIW the slow
path isn't actually all THAT more complicated than the per-quad lod path
- strides, mip image offsets etc. need to be looked up per pixel rather
than per quad, plus some slowness comes from the fact that stupid
sse/avx (only avx2) doesn't have true vector shift... There's also the
fact that the tex filter may be different too per pixel (with different
min/mag filter) though since we do (in some cases at least with avx) do
texture sampling for multiple quads at once this is something which
needs to be handled in any case. I suspect hw being slower with
different effective lods per pixel has similar reasons - there's just
more work to be done.
Roland
>
> On Thu, Aug 14, 2014 at 10:12 AM, Roland Scheidegger <sroland at vmware.com> wrote:
>> Reviewed-by: Roland Scheidegger <sroland at vmware.com>
>>
>> llvmpipe also already does the fine version. A coarse version (which we
>> indeed do when used implicitly for sampling though with some other
>> changes) might be minimally simpler though not even sure (might save a
>> shuffle instruction somewhere), but probably not worth it (plus, d3d10
>> sm4 had deriv_rtx and sm5 deriv_rtx_coarse/deriv_rtx_fine but the sm4
>> versions correspond to the fine versions so this was required).
>>
>> Roland
>>
>> Am 14.08.2014 06:52, schrieb Ilia Mirkin:
>>> Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
>>> ---
>>> src/gallium/auxiliary/tgsi/tgsi_info.c | 3 +++
>>> src/gallium/auxiliary/tgsi/tgsi_util.c | 2 ++
>>> src/gallium/docs/source/screen.rst | 2 ++
>>> src/gallium/docs/source/tgsi.rst | 12 ++++++++++--
>>> src/gallium/drivers/freedreno/freedreno_screen.c | 1 +
>>> src/gallium/drivers/i915/i915_screen.c | 1 +
>>> src/gallium/drivers/ilo/ilo_screen.c | 1 +
>>> src/gallium/drivers/llvmpipe/lp_screen.c | 1 +
>>> src/gallium/drivers/nouveau/nv30/nv30_screen.c | 1 +
>>> src/gallium/drivers/nouveau/nv50/nv50_screen.c | 1 +
>>> src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 1 +
>>> src/gallium/drivers/r300/r300_screen.c | 1 +
>>> src/gallium/drivers/r600/r600_pipe.c | 1 +
>>> src/gallium/drivers/radeonsi/si_pipe.c | 1 +
>>> src/gallium/drivers/softpipe/sp_screen.c | 1 +
>>> src/gallium/drivers/svga/svga_screen.c | 1 +
>>> src/gallium/drivers/vc4/vc4_screen.c | 1 +
>>> src/gallium/include/pipe/p_defines.h | 1 +
>>> src/gallium/include/pipe/p_shader_tokens.h | 5 ++++-
>>> 19 files changed, 35 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c
>>> index e24348f..35f9747 100644
>>> --- a/src/gallium/auxiliary/tgsi/tgsi_info.c
>>> +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
>>> @@ -235,6 +235,9 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
>>> { 1, 1, 0, 0, 0, 0, OTHR, "INTERP_CENTROID", TGSI_OPCODE_INTERP_CENTROID },
>>> { 1, 2, 0, 0, 0, 0, OTHR, "INTERP_SAMPLE", TGSI_OPCODE_INTERP_SAMPLE },
>>> { 1, 2, 0, 0, 0, 0, OTHR, "INTERP_OFFSET", TGSI_OPCODE_INTERP_OFFSET },
>>> +
>>> + { 1, 1, 0, 0, 0, 0, COMP, "DDX_FINE", TGSI_OPCODE_DDX_FINE },
>>> + { 1, 1, 0, 0, 0, 0, COMP, "DDY_FINE", TGSI_OPCODE_DDY_FINE },
>>> };
>>>
>>> const struct tgsi_opcode_info *
>>> diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c b/src/gallium/auxiliary/tgsi/tgsi_util.c
>>> index e48159c..e1cba95 100644
>>> --- a/src/gallium/auxiliary/tgsi/tgsi_util.c
>>> +++ b/src/gallium/auxiliary/tgsi/tgsi_util.c
>>> @@ -245,6 +245,8 @@ tgsi_util_get_inst_usage_mask(const struct tgsi_full_instruction *inst,
>>> case TGSI_OPCODE_USNE:
>>> case TGSI_OPCODE_IMUL_HI:
>>> case TGSI_OPCODE_UMUL_HI:
>>> + case TGSI_OPCODE_DDX_FINE:
>>> + case TGSI_OPCODE_DDY_FINE:
>>> /* Channel-wise operations */
>>> read_mask = write_mask;
>>> break;
>>> diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst
>>> index 814e3ae..6fecc15 100644
>>> --- a/src/gallium/docs/source/screen.rst
>>> +++ b/src/gallium/docs/source/screen.rst
>>> @@ -213,6 +213,8 @@ The integer capabilities:
>>> * ``PIPE_CAP_DRAW_INDIRECT``: Whether the driver supports taking draw arguments
>>> { count, instance_count, start, index_bias } from a PIPE_BUFFER resource.
>>> See pipe_draw_info.
>>> +* ``PIPE_CAP_TGSI_FS_FINE_DERIVATIVE``: Whether the fragment shader supports
>>> + the FINE versions of DDX/DDY.
>>>
>>>
>>> .. _pipe_capf:
>>> diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst
>>> index ac0ea54..7d5918f 100644
>>> --- a/src/gallium/docs/source/tgsi.rst
>>> +++ b/src/gallium/docs/source/tgsi.rst
>>> @@ -433,7 +433,11 @@ This instruction replicates its result.
>>> dst = \cos{src.x}
>>>
>>>
>>> -.. opcode:: DDX - Derivative Relative To X
>>> +.. opcode:: DDX, DDX_FINE - Derivative Relative To X
>>> +
>>> +The fine variant is only used when ``PIPE_CAP_TGSI_FS_FINE_DERIVATIVE`` is
>>> +advertised. When it is, the fine version guarantees one derivative per row
>>> +while DDX is allowed to be the same for the entire 2x2 quad.
>>>
>>> .. math::
>>>
>>> @@ -446,7 +450,11 @@ This instruction replicates its result.
>>> dst.w = partialx(src.w)
>>>
>>>
>>> -.. opcode:: DDY - Derivative Relative To Y
>>> +.. opcode:: DDY, DDY_FINE - Derivative Relative To Y
>>> +
>>> +The fine variant is only used when ``PIPE_CAP_TGSI_FS_FINE_DERIVATIVE`` is
>>> +advertised. When it is, the fine version guarantees one derivative per column
>>> +while DDY is allowed to be the same for the entire 2x2 quad.
>>>
>>> .. math::
>>>
>>> diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
>>> index de69b14..b156d8b 100644
>>> --- a/src/gallium/drivers/freedreno/freedreno_screen.c
>>> +++ b/src/gallium/drivers/freedreno/freedreno_screen.c
>>> @@ -216,6 +216,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
>>> case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
>>> case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
>>> case PIPE_CAP_DRAW_INDIRECT:
>>> + case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
>>> return 0;
>>>
>>> /* Stream output. */
>>> diff --git a/src/gallium/drivers/i915/i915_screen.c b/src/gallium/drivers/i915/i915_screen.c
>>> index ca3dd4a..53d5e75 100644
>>> --- a/src/gallium/drivers/i915/i915_screen.c
>>> +++ b/src/gallium/drivers/i915/i915_screen.c
>>> @@ -231,6 +231,7 @@ i915_get_param(struct pipe_screen *screen, enum pipe_cap cap)
>>> case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
>>> case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
>>> case PIPE_CAP_DRAW_INDIRECT:
>>> + case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
>>> return 0;
>>>
>>> case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
>>> diff --git a/src/gallium/drivers/ilo/ilo_screen.c b/src/gallium/drivers/ilo/ilo_screen.c
>>> index bd6d8dd..991d2d0 100644
>>> --- a/src/gallium/drivers/ilo/ilo_screen.c
>>> +++ b/src/gallium/drivers/ilo/ilo_screen.c
>>> @@ -433,6 +433,7 @@ ilo_get_param(struct pipe_screen *screen, enum pipe_cap param)
>>> case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
>>> case PIPE_CAP_MAX_VERTEX_STREAMS:
>>> case PIPE_CAP_DRAW_INDIRECT:
>>> + case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
>>> return 0;
>>>
>>> default:
>>> diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
>>> index 347b1af..f4f3257 100644
>>> --- a/src/gallium/drivers/llvmpipe/lp_screen.c
>>> +++ b/src/gallium/drivers/llvmpipe/lp_screen.c
>>> @@ -248,6 +248,7 @@ llvmpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
>>> case PIPE_CAP_SAMPLE_SHADING:
>>> case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
>>> case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
>>> + case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
>>> return 0;
>>> case PIPE_CAP_FAKE_SW_MSAA:
>>> return 1;
>>> diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
>>> index 2860188..4766955 100644
>>> --- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
>>> +++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
>>> @@ -148,6 +148,7 @@ nv30_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
>>> case PIPE_CAP_USER_VERTEX_BUFFERS:
>>> case PIPE_CAP_COMPUTE:
>>> case PIPE_CAP_DRAW_INDIRECT:
>>> + case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
>>> return 0;
>>> }
>>>
>>> diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
>>> index 7b1b112..34cca3d 100644
>>> --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
>>> +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
>>> @@ -200,6 +200,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
>>> case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
>>> case PIPE_CAP_COMPUTE:
>>> case PIPE_CAP_DRAW_INDIRECT:
>>> + case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
>>> return 0;
>>> }
>>>
>>> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
>>> index d372a0f..17aee63 100644
>>> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
>>> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
>>> @@ -184,6 +184,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
>>> case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
>>> case PIPE_CAP_FAKE_SW_MSAA:
>>> case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
>>> + case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
>>> return 0;
>>> }
>>>
>>> diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
>>> index 4e46f77..ad599e9 100644
>>> --- a/src/gallium/drivers/r300/r300_screen.c
>>> +++ b/src/gallium/drivers/r300/r300_screen.c
>>> @@ -178,6 +178,7 @@ static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
>>> case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
>>> case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
>>> case PIPE_CAP_DRAW_INDIRECT:
>>> + case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
>>> return 0;
>>>
>>> /* SWTCL-only features. */
>>> diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
>>> index 20d9f95..8a5ba79 100644
>>> --- a/src/gallium/drivers/r600/r600_pipe.c
>>> +++ b/src/gallium/drivers/r600/r600_pipe.c
>>> @@ -319,6 +319,7 @@ static int r600_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
>>> case PIPE_CAP_SAMPLE_SHADING:
>>> case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
>>> case PIPE_CAP_DRAW_INDIRECT:
>>> + case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
>>> return 0;
>>>
>>> /* Stream output. */
>>> diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
>>> index 879387f..0f10f3b 100644
>>> --- a/src/gallium/drivers/radeonsi/si_pipe.c
>>> +++ b/src/gallium/drivers/radeonsi/si_pipe.c
>>> @@ -254,6 +254,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
>>> case PIPE_CAP_FAKE_SW_MSAA:
>>> case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
>>> case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
>>> + case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
>>> return 0;
>>>
>>> case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
>>> diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
>>> index 7be39d4..5e2640d 100644
>>> --- a/src/gallium/drivers/softpipe/sp_screen.c
>>> +++ b/src/gallium/drivers/softpipe/sp_screen.c
>>> @@ -194,6 +194,7 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
>>> case PIPE_CAP_SAMPLE_SHADING:
>>> case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
>>> case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
>>> + case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
>>> return 0;
>>> case PIPE_CAP_FAKE_SW_MSAA:
>>> return 1;
>>> diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c
>>> index 2fcc75c..d140f56 100644
>>> --- a/src/gallium/drivers/svga/svga_screen.c
>>> +++ b/src/gallium/drivers/svga/svga_screen.c
>>> @@ -278,6 +278,7 @@ svga_get_param(struct pipe_screen *screen, enum pipe_cap param)
>>> case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
>>> case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:
>>> case PIPE_CAP_DRAW_INDIRECT:
>>> + case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
>>> return 0;
>>> case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
>>> return 64;
>>> diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c
>>> index c044c8e..7e59613 100644
>>> --- a/src/gallium/drivers/vc4/vc4_screen.c
>>> +++ b/src/gallium/drivers/vc4/vc4_screen.c
>>> @@ -157,6 +157,7 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
>>> case PIPE_CAP_MAX_TEXEL_OFFSET:
>>> case PIPE_CAP_MAX_VERTEX_STREAMS:
>>> case PIPE_CAP_DRAW_INDIRECT:
>>> + case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:
>>> return 0;
>>>
>>> /* Stream output. */
>>> diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h
>>> index 7a10d98..53d5d4b 100644
>>> --- a/src/gallium/include/pipe/p_defines.h
>>> +++ b/src/gallium/include/pipe/p_defines.h
>>> @@ -562,6 +562,7 @@ enum pipe_cap {
>>> PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION = 99,
>>> PIPE_CAP_MAX_VERTEX_STREAMS = 100,
>>> PIPE_CAP_DRAW_INDIRECT = 101,
>>> + PIPE_CAP_TGSI_FS_FINE_DERIVATIVE = 102,
>>> };
>>>
>>> #define PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50 (1 << 0)
>>> diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
>>> index 0d3ad6a..2921f81 100644
>>> --- a/src/gallium/include/pipe/p_shader_tokens.h
>>> +++ b/src/gallium/include/pipe/p_shader_tokens.h
>>> @@ -481,7 +481,10 @@ struct tgsi_property_data {
>>> #define TGSI_OPCODE_INTERP_SAMPLE 193
>>> #define TGSI_OPCODE_INTERP_OFFSET 194
>>>
>>> -#define TGSI_OPCODE_LAST 195
>>> +#define TGSI_OPCODE_DDX_FINE 195
>>> +#define TGSI_OPCODE_DDY_FINE 196
>>> +
>>> +#define TGSI_OPCODE_LAST 197
>>>
>>> #define TGSI_SAT_NONE 0 /* do not saturate */
>>> #define TGSI_SAT_ZERO_ONE 1 /* clamp to [0,1] */
>>>
>>
More information about the mesa-dev
mailing list