[Nouveau] [PATCH v2 1/7] exa: add GM10x acceleration support

Samuel Pitoiset samuel.pitoiset at gmail.com
Thu Oct 27 17:37:48 UTC 2016


Two minor nitpicks below.

I didn't read all shaders carefully because it's a pain, but I didn't 
see any obvious things. :-)

Thanks for that great work Ilia!

Reviewed-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>

On 10/27/2016 04:02 PM, Ilia Mirkin wrote:
> rendercheck -f a8r8g8b8 passes as much as on a GK208, and xv appears to
> work. Very lightly tested.
>
> Instead of sticking coordinates into pushbufs, the vertex shader is
> modified to read them from a constbuf, indexed by vertex id. This
> approach could be used for all nvc0 generations, but I didn't want to
> rock the boat.
>
> Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
> ---
>  src/Makefile.am           |  16 ++++++++
>  src/nouveau_copy.c        |   1 +
>  src/nouveau_exa.c         |   2 +-
>  src/nouveau_xv.c          |   2 +-
>  src/nv_accel_common.c     |   1 +
>  src/nv_driver.c           |   1 +
>  src/nvc0_accel.c          |  37 ++++++++++++++---
>  src/nvc0_exa.c            |  48 ++++++++++++++++++++--
>  src/nvc0_xv.c             |  48 ++++++++++++++++++++--
>  src/shader/Makefile       |  23 ++++++++---
>  src/shader/exac8nv110.fp  |  47 +++++++++++++++++++++
>  src/shader/exac8nv110.fpc |  38 +++++++++++++++++
>  src/shader/exacanv110.fp  |  47 +++++++++++++++++++++
>  src/shader/exacanv110.fpc |  38 +++++++++++++++++
>  src/shader/exacmnv110.fp  |  47 +++++++++++++++++++++
>  src/shader/exacmnv110.fpc |  38 +++++++++++++++++
>  src/shader/exas8nv110.fp  |  42 +++++++++++++++++++
>  src/shader/exas8nv110.fpc |  28 +++++++++++++
>  src/shader/exasanv110.fp  |  47 +++++++++++++++++++++
>  src/shader/exasanv110.fpc |  38 +++++++++++++++++
>  src/shader/exascnv110.fp  |  38 +++++++++++++++++
>  src/shader/exascnv110.fpc |  20 +++++++++
>  src/shader/videonv110.fp  |  54 ++++++++++++++++++++++++
>  src/shader/videonv110.fpc |  52 +++++++++++++++++++++++
>  src/shader/xfrm2nv110.vp  |  82 +++++++++++++++++++++++++++++++++++++
>  src/shader/xfrm2nv110.vpc | 102 ++++++++++++++++++++++++++++++++++++++++++++++
>  26 files changed, 918 insertions(+), 19 deletions(-)
>  create mode 100644 src/shader/exac8nv110.fp
>  create mode 100644 src/shader/exac8nv110.fpc
>  create mode 100644 src/shader/exacanv110.fp
>  create mode 100644 src/shader/exacanv110.fpc
>  create mode 100644 src/shader/exacmnv110.fp
>  create mode 100644 src/shader/exacmnv110.fpc
>  create mode 100644 src/shader/exas8nv110.fp
>  create mode 100644 src/shader/exas8nv110.fpc
>  create mode 100644 src/shader/exasanv110.fp
>  create mode 100644 src/shader/exasanv110.fpc
>  create mode 100644 src/shader/exascnv110.fp
>  create mode 100644 src/shader/exascnv110.fpc
>  create mode 100644 src/shader/videonv110.fp
>  create mode 100644 src/shader/videonv110.fpc
>  create mode 100644 src/shader/xfrm2nv110.vp
>  create mode 100644 src/shader/xfrm2nv110.vpc
>
> diff --git a/src/Makefile.am b/src/Makefile.am
> index 1e04ddf..6ba8d87 100644
> --- a/src/Makefile.am
> +++ b/src/Makefile.am
> @@ -77,48 +77,64 @@ EXTRA_DIST = hwdefs/nv_3ddefs.xml.h \
>  	     shader/exac8nve0.fpc \
>  	     shader/exac8nvf0.fp \
>  	     shader/exac8nvf0.fpc \
> +	     shader/exac8nv110.fp \
> +	     shader/exac8nv110.fpc \
>  	     shader/exacanvc0.fp \
>  	     shader/exacanvc0.fpc \
>  	     shader/exacanve0.fp \
>  	     shader/exacanve0.fpc \
>  	     shader/exacanvf0.fp \
>  	     shader/exacanvf0.fpc \
> +	     shader/exacanv110.fp \
> +	     shader/exacanv110.fpc \
>  	     shader/exacmnvc0.fp \
>  	     shader/exacmnvc0.fpc \
>  	     shader/exacmnve0.fp \
>  	     shader/exacmnve0.fpc \
>  	     shader/exacmnvf0.fp \
>  	     shader/exacmnvf0.fpc \
> +	     shader/exacmnv110.fp \
> +	     shader/exacmnv110.fpc \
>  	     shader/exas8nvc0.fp \
>  	     shader/exas8nvc0.fpc \
>  	     shader/exas8nve0.fp \
>  	     shader/exas8nve0.fpc \
>  	     shader/exas8nvf0.fp \
>  	     shader/exas8nvf0.fpc \
> +	     shader/exas8nv110.fp \
> +	     shader/exas8nv110.fpc \
>  	     shader/exasanvc0.fp \
>  	     shader/exasanvc0.fpc \
>  	     shader/exasanve0.fp \
>  	     shader/exasanve0.fpc \
>  	     shader/exasanvf0.fp \
>  	     shader/exasanvf0.fpc \
> +	     shader/exasanv110.fp \
> +	     shader/exasanv110.fpc \
>  	     shader/exascnvc0.fp \
>  	     shader/exascnvc0.fpc \
>  	     shader/exascnve0.fp \
>  	     shader/exascnve0.fpc \
>  	     shader/exascnvf0.fp \
>  	     shader/exascnvf0.fpc \
> +	     shader/exascnv110.fp \
> +	     shader/exascnv110.fpc \
>  	     shader/videonvc0.fp \
>  	     shader/videonvc0.fpc \
>  	     shader/videonve0.fp \
>  	     shader/videonve0.fpc \
>  	     shader/videonvf0.fp \
>  	     shader/videonvf0.fpc \
> +	     shader/videonv110.fp \
> +	     shader/videonv110.fpc \
>  	     shader/xfrm2nvc0.vp \
>  	     shader/xfrm2nvc0.vpc \
>  	     shader/xfrm2nve0.vp \
>  	     shader/xfrm2nve0.vpc \
>  	     shader/xfrm2nvf0.vp \
>  	     shader/xfrm2nvf0.vpc \
> +	     shader/xfrm2nv110.vp \
> +	     shader/xfrm2nv110.vpc \
>  	     shader/Makefile \
>  	     nouveau_local.h \
>  	     nouveau_copy.h \
> diff --git a/src/nouveau_copy.c b/src/nouveau_copy.c
> index e152a53..c139de6 100644
> --- a/src/nouveau_copy.c
> +++ b/src/nouveau_copy.c
> @@ -81,6 +81,7 @@ nouveau_copy_init(ScreenPtr pScreen)
>  					 &pNv->ce_channel);
>  		break;
>  	case NV_KEPLER:
> +	case NV_MAXWELL:
>  		ret = nouveau_object_new(&pNv->dev->object, 0,
>  					 NOUVEAU_FIFO_CHANNEL_CLASS,
>  					 &(struct nve0_fifo) {
> diff --git a/src/nouveau_exa.c b/src/nouveau_exa.c
> index def66ac..0f02b99 100644
> --- a/src/nouveau_exa.c
> +++ b/src/nouveau_exa.c
> @@ -514,12 +514,12 @@ nouveau_exa_init(ScreenPtr pScreen)
>  		break;
>  	case NV_FERMI:
>  	case NV_KEPLER:
> +	case NV_MAXWELL:
>  		exa->CheckComposite   = NVC0EXACheckComposite;
>  		exa->PrepareComposite = NVC0EXAPrepareComposite;
>  		exa->Composite        = NVC0EXAComposite;
>  		exa->DoneComposite    = NVC0EXADoneComposite;
>  		break;
> -	case NV_MAXWELL:
>  	default:
>  		break;
>  	}
> diff --git a/src/nouveau_xv.c b/src/nouveau_xv.c
> index d514dbf..716b18d 100644
> --- a/src/nouveau_xv.c
> +++ b/src/nouveau_xv.c
> @@ -2142,7 +2142,7 @@ NVSetupTexturedVideo (ScreenPtr pScreen, XF86VideoAdaptorPtr *textureAdaptor)
>  		textureAdaptor[0] = NV40SetupTexturedVideo(pScreen, FALSE);
>  		textureAdaptor[1] = NV40SetupTexturedVideo(pScreen, TRUE);
>  	} else
> -	if (pNv->Architecture >= NV_TESLA && pNv->Architecture < NV_MAXWELL) {
> +	if (pNv->Architecture >= NV_TESLA) {
>  		textureAdaptor[0] = NV50SetupTexturedVideo(pScreen);
>  	}
>  }
> diff --git a/src/nv_accel_common.c b/src/nv_accel_common.c
> index 9361ce8..5d12dd8 100644
> --- a/src/nv_accel_common.c
> +++ b/src/nv_accel_common.c
> @@ -722,6 +722,7 @@ NVAccelCommonInit(ScrnInfoPtr pScrn)
>  	switch (pNv->Architecture) {
>  	case NV_FERMI:
>  	case NV_KEPLER:
> +	case NV_MAXWELL:
>  		INIT_CONTEXT_OBJECT(3D_NVC0);
>  		break;
>  	case NV_TESLA:
> diff --git a/src/nv_driver.c b/src/nv_driver.c
> index 4dde8e0..fff83f8 100644
> --- a/src/nv_driver.c
> +++ b/src/nv_driver.c
> @@ -389,6 +389,7 @@ NVHasKMS(struct pci_device *pci_dev, struct xf86_platform_device *platform_dev)
>  	case 0xe0:
>  	case 0xf0:
>  	case 0x100:
> +	case 0x110:
>  		break;
>  	default:
>  		xf86DrvMsg(-1, X_ERROR, "Unknown chipset: NV%02X\n", chipset);
> diff --git a/src/nvc0_accel.c b/src/nvc0_accel.c
> index d2a3b93..52a17db 100644
> --- a/src/nvc0_accel.c
> +++ b/src/nvc0_accel.c
> @@ -53,6 +53,16 @@
>  #include "shader/exas8nvf0.fp"
>  #include "shader/exac8nvf0.fp"
>
> +#include "shader/xfrm2nv110.vp"
> +#include "shader/videonv110.fp"
> +
> +#include "shader/exascnv110.fp"
> +#include "shader/exacmnv110.fp"
> +#include "shader/exacanv110.fp"
> +#include "shader/exasanv110.fp"
> +#include "shader/exas8nv110.fp"
> +#include "shader/exac8nv110.fp"
> +
>  #define NVC0PushProgram(pNv,addr,code) do {                                    \
>  	const unsigned size = sizeof(code) / sizeof(code[0]);                  \
>  	PUSH_DATAu((pNv)->pushbuf, (pNv)->scratch, (addr), size);              \
> @@ -223,9 +233,12 @@ NVAccelInit3D_NVC0(ScrnInfoPtr pScrn)
>  	} else if (pNv->dev->chipset < 0xf0) {
>  		class  = 0xa097;
>  		handle = 0x0000906e;
> -	} else {
> +	} else if (pNv->dev->chipset < 0x110) {
>  		class  = 0xa197;
>  		handle = 0x0000906e;
> +	} else {
> +		class  = 0xb097;
> +		handle = 0x0000906e;
>  	}
>
>  	ret = nouveau_object_new(pNv->channel, class, class,
> @@ -304,10 +317,12 @@ NVAccelInit3D_NVC0(ScrnInfoPtr pScrn)
>  		PUSH_DATA (push, 1);
>  	}
>
> -	BEGIN_NVC0(push, NVC0_3D(VERTEX_QUARANTINE_ADDRESS_HIGH), 3);
> -	PUSH_DATA (push, (bo->offset + MISC_OFFSET) >> 32);
> -	PUSH_DATA (push, (bo->offset + MISC_OFFSET));
> -	PUSH_DATA (push, 1);
> +	if (pNv->Architecture < NV_MAXWELL) {
> +		BEGIN_NVC0(push, NVC0_3D(VERTEX_QUARANTINE_ADDRESS_HIGH), 3);
> +		PUSH_DATA (push, (bo->offset + MISC_OFFSET) >> 32);
> +		PUSH_DATA (push, (bo->offset + MISC_OFFSET));
> +		PUSH_DATA (push, 1);
> +	}
>
>  	BEGIN_NVC0(push, NVC0_3D(CODE_ADDRESS_HIGH), 2);
>  	PUSH_DATA (push, (bo->offset + CODE_OFFSET) >> 32);
> @@ -334,7 +349,8 @@ NVAccelInit3D_NVC0(ScrnInfoPtr pScrn)
>  		NVC0PushProgram(pNv, PFP_S_A8, NVE0FP_Source_A8);
>  		NVC0PushProgram(pNv, PFP_C_A8, NVE0FP_Composite_A8);
>  		NVC0PushProgram(pNv, PFP_NV12, NVE0FP_NV12);
> -	} else {
> +	} else
> +	if (pNv->dev->chipset < 0x110) {
>  		NVC0PushProgram(pNv, PVP_PASS, NVF0VP_Transform2);
>  		NVC0PushProgram(pNv, PFP_S, NVF0FP_Source);
>  		NVC0PushProgram(pNv, PFP_C, NVF0FP_Composite);
> @@ -343,6 +359,15 @@ NVAccelInit3D_NVC0(ScrnInfoPtr pScrn)
>  		NVC0PushProgram(pNv, PFP_S_A8, NVF0FP_Source_A8);
>  		NVC0PushProgram(pNv, PFP_C_A8, NVF0FP_Composite_A8);
>  		NVC0PushProgram(pNv, PFP_NV12, NVF0FP_NV12);
> +	} else {
> +		NVC0PushProgram(pNv, PVP_PASS, NV110VP_Transform2);
> +		NVC0PushProgram(pNv, PFP_S, NV110FP_Source);
> +		NVC0PushProgram(pNv, PFP_C, NV110FP_Composite);
> +		NVC0PushProgram(pNv, PFP_CCA, NV110FP_CAComposite);
> +		NVC0PushProgram(pNv, PFP_CCASA, NV110FP_CACompositeSrcAlpha);
> +		NVC0PushProgram(pNv, PFP_S_A8, NV110FP_Source_A8);
> +		NVC0PushProgram(pNv, PFP_C_A8, NV110FP_Composite_A8);
> +		NVC0PushProgram(pNv, PFP_NV12, NV110FP_NV12);
>  	}
>
>  	BEGIN_NVC0(push, NVC0_3D(SP_SELECT(1)), 4);
> diff --git a/src/nvc0_exa.c b/src/nvc0_exa.c
> index 6add60b..a53dfe6 100644
> --- a/src/nvc0_exa.c
> +++ b/src/nvc0_exa.c
> @@ -914,14 +914,56 @@ NVC0EXAComposite(PixmapPtr pdpix,
>  	if (!PUSH_SPACE(push, 64))
>  		return;
>
> +	if (pNv->dev->chipset >= 0x110) {
> +		BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
> +		PUSH_DATA (push, 256);
> +		PUSH_DATA (push, (pNv->scratch->offset + PVP_DATA) >> 32);
> +		PUSH_DATA (push, (pNv->scratch->offset + PVP_DATA));
> +		BEGIN_1IC0(push, NVC0_3D(CB_POS), 3 * (4 + 2 + 2) + 1);

I would suggest only "1 + 24" here, but your call.

> +		PUSH_DATA (push, 0x80);
> +
> +		PUSH_DATAf(push, dx);
> +		PUSH_DATAf(push, dy + (h * 2));
> +		PUSH_DATAf(push, 0);
> +		PUSH_DATAf(push, 1);
> +		PUSH_DATAf(push, sx);
> +		PUSH_DATAf(push, sy + (h * 2));
> +		PUSH_DATAf(push, mx);
> +		PUSH_DATAf(push, my + (h * 2));
> +
> +		PUSH_DATAf(push, dx);
> +		PUSH_DATAf(push, dy);
> +		PUSH_DATAf(push, 0);
> +		PUSH_DATAf(push, 1);
> +		PUSH_DATAf(push, sx);
> +		PUSH_DATAf(push, sy);
> +		PUSH_DATAf(push, mx);
> +		PUSH_DATAf(push, my);
> +
> +		PUSH_DATAf(push, dx + (w * 2));
> +		PUSH_DATAf(push, dy);
> +		PUSH_DATAf(push, 0);
> +		PUSH_DATAf(push, 1);
> +		PUSH_DATAf(push, sx + (w * 2));
> +		PUSH_DATAf(push, sy);
> +		PUSH_DATAf(push, mx + (w * 2));
> +		PUSH_DATAf(push, my);
> +	}
> +
>  	BEGIN_NVC0(push, NVC0_3D(SCISSOR_HORIZ(0)), 2);
>  	PUSH_DATA (push, ((dx + w) << 16) | dx);
>  	PUSH_DATA (push, ((dy + h) << 16) | dy);
>  	BEGIN_NVC0(push, NVC0_3D(VERTEX_BEGIN_GL), 1);
>  	PUSH_DATA (push, NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES);
> -	PUSH_VTX2s(push, sx, sy + (h * 2), mx, my + (h * 2), dx, dy + (h * 2));
> -	PUSH_VTX2s(push, sx, sy, mx, my, dx, dy);
> -	PUSH_VTX2s(push, sx + (w * 2), sy, mx + (w * 2), my, dx + (w * 2), dy);
> +	if (pNv->dev->chipset < 0x110) {
> +		PUSH_VTX2s(push, sx, sy + (h * 2), mx, my + (h * 2), dx, dy + (h * 2));
> +		PUSH_VTX2s(push, sx, sy, mx, my, dx, dy);
> +		PUSH_VTX2s(push, sx + (w * 2), sy, mx + (w * 2), my, dx + (w * 2), dy);
> +	} else {
> +		BEGIN_NVC0(push, NVC0_3D(VERTEX_BUFFER_FIRST), 2);
> +		PUSH_DATA (push, 0);
> +		PUSH_DATA (push, 3);
> +	}
>  	BEGIN_NVC0(push, NVC0_3D(VERTEX_END_GL), 1);
>  	PUSH_DATA (push, 0);
>  }
> diff --git a/src/nvc0_xv.c b/src/nvc0_xv.c
> index d1d8f18..129c505 100644
> --- a/src/nvc0_xv.c
> +++ b/src/nvc0_xv.c
> @@ -247,15 +247,57 @@ nvc0_xv_image_put(ScrnInfoPtr pScrn,
>  		    nouveau_pushbuf_refn (push, refs, 3))
>  			return BadImplementation;
>
> +		if (pNv->dev->chipset >= 0x110) {
> +			BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
> +			PUSH_DATA (push, 256);
> +			PUSH_DATA (push, (pNv->scratch->offset + PVP_DATA) >> 32);
> +			PUSH_DATA (push, (pNv->scratch->offset + PVP_DATA));
> +			BEGIN_1IC0(push, NVC0_3D(CB_POS), 3 * (4 + 2 + 2) + 1);

And here as well.

> +			PUSH_DATA (push, 0x80);
> +
> +			PUSH_DATAf(push, sx1);
> +			PUSH_DATAf(push, sy1);
> +			PUSH_DATAf(push, 0);
> +			PUSH_DATAf(push, 1);
> +			PUSH_DATAf(push, tx1);
> +			PUSH_DATAf(push, ty1);
> +			PUSH_DATAf(push, 0);
> +			PUSH_DATAf(push, 0);
> +
> +			PUSH_DATAf(push, sx2+(sx2-sx1));
> +			PUSH_DATAf(push, sy1);
> +			PUSH_DATAf(push, 0);
> +			PUSH_DATAf(push, 1);
> +			PUSH_DATAf(push, tx2+(tx2-tx1));
> +			PUSH_DATAf(push, ty1);
> +			PUSH_DATAf(push, 0);
> +			PUSH_DATAf(push, 0);
> +
> +			PUSH_DATAf(push, sx1);
> +			PUSH_DATAf(push, sy2+(sy2-sy1));
> +			PUSH_DATAf(push, 0);
> +			PUSH_DATAf(push, 1);
> +			PUSH_DATAf(push, tx1);
> +			PUSH_DATAf(push, ty2+(ty2-ty1));
> +			PUSH_DATAf(push, 0);
> +			PUSH_DATAf(push, 0);
> +		}
> +
>  		BEGIN_NVC0(push, NVC0_3D(SCISSOR_HORIZ(0)), 2);
>  		PUSH_DATA (push, sx2 << NVC0_3D_SCISSOR_HORIZ_MAX__SHIFT | sx1);
>  		PUSH_DATA (push, sy2 << NVC0_3D_SCISSOR_VERT_MAX__SHIFT | sy1 );
>
>  		BEGIN_NVC0(push, NVC0_3D(VERTEX_BEGIN_GL), 1);
>  		PUSH_DATA (push, NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES);
> -		PUSH_VTX1s(push, tx1, ty1, sx1, sy1);
> -		PUSH_VTX1s(push, tx2+(tx2-tx1), ty1, sx2+(sx2-sx1), sy1);
> -		PUSH_VTX1s(push, tx1, ty2+(ty2-ty1), sx1, sy2+(sy2-sy1));
> +		if (pNv->dev->chipset < 0x110) {
> +			PUSH_VTX1s(push, tx1, ty1, sx1, sy1);
> +			PUSH_VTX1s(push, tx2+(tx2-tx1), ty1, sx2+(sx2-sx1), sy1);
> +			PUSH_VTX1s(push, tx1, ty2+(ty2-ty1), sx1, sy2+(sy2-sy1));
> +		} else {
> +			BEGIN_NVC0(push, NVC0_3D(VERTEX_BUFFER_FIRST), 2);
> +			PUSH_DATA (push, 0);
> +			PUSH_DATA (push, 3);
> +		}
>  		BEGIN_NVC0(push, NVC0_3D(VERTEX_END_GL), 1);
>  		PUSH_DATA (push, 0);
>
> diff --git a/src/shader/Makefile b/src/shader/Makefile
> index 2d789be..12bf455 100644
> --- a/src/shader/Makefile
> +++ b/src/shader/Makefile
> @@ -22,23 +22,36 @@ NVF0_SHADERS = xfrm2nvf0.vpc \
>  	       exas8nvf0.fpc \
>  	       exac8nvf0.fpc \
>  	       videonvf0.fpc
> +NV110_SHADERS = xfrm2nv110.vpc \
> +	       exascnv110.fpc \
> +	       exacmnv110.fpc \
> +	       exacanv110.fpc \
> +	       exasanv110.fpc \
> +	       exas8nv110.fpc \
> +	       exac8nv110.fpc \
> +	       videonv110.fpc
>
> -SHADERS = $(NVC0_SHADERS) $(NVE0_SHADERS) $(NVF0_SHADERS)
> +SHADERS = $(NVC0_SHADERS) $(NVE0_SHADERS) $(NVF0_SHADERS) $(NV110_SHADERS)
>  ENVYAS ?= envyas
>
>  all: $(SHADERS)
>
>  $(filter %nvc0.vpc,$(SHADERS)): %.vpc: %.vp
> -	cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m nvc0 -o $@
> +	cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gf100 -V gf100 -o $@
>  $(filter %nvc0.fpc,$(SHADERS)): %.fpc: %.fp
> -	cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m nvc0 -o $@
> +	cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gf100 -V gf100 -o $@
>
>  $(filter %nve0.vpc,$(SHADERS)): %.vpc: %.vp
> -	cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m nvc0 -V nve4 -o $@
> +	cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gf100 -V gk104 -o $@
>  $(filter %nve0.fpc,$(SHADERS)): %.fpc: %.fp
> -	cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m nvc0 -V nve4 -o $@
> +	cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gf100 -V gk104 -o $@
>
>  $(filter %nvf0.vpc,$(SHADERS)): %.vpc: %.vp
>  	cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gk110 -o $@
>  $(filter %nvf0.fpc,$(SHADERS)): %.fpc: %.fp
>  	cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gk110 -o $@
> +
> +$(filter %nv110.vpc,$(SHADERS)): %.vpc: %.vp
> +	cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gm107 -o $@
> +$(filter %nv110.fpc,$(SHADERS)): %.fpc: %.fp
> +	cpp -DENVYAS $< | sed -e '/^#/d' | $(ENVYAS) -w -m gm107 -o $@
> diff --git a/src/shader/exac8nv110.fp b/src/shader/exac8nv110.fp
> new file mode 100644
> index 0000000..ce78036
> --- /dev/null
> +++ b/src/shader/exac8nv110.fp
> @@ -0,0 +1,47 @@
> +#ifndef ENVYAS
> +static uint32_t
> +NV110FP_Composite_A8[] = {
> +	0x00001462,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x80000000,
> +	0x00000a0a,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x0000000f,
> +	0x00000000,
> +#include "exac8nv110.fpc"
> +};
> +#else
> +
> +sched (st 0x0) (st 0x0) (st 0x0)
> +ipa pass $r0 a[0x7c] 0x0 0x0 0x1
> +mufu rcp $r0 $r0
> +ipa $r3 a[0x94] $r0 0x0 0x1
> +sched (st 0x0) (st 0x0) (st 0x0)
> +ipa $r2 a[0x90] $r0 0x0 0x1
> +tex nodep $r1 $r2 0x0 0x1 t2d 0x8
> +ipa $r3 a[0x84] $r0 0x0 0x1
> +sched (st 0x0) (st 0x0) (st 0x0)
> +ipa $r2 a[0x80] $r0 0x0 0x1
> +tex nodep $r0 $r2 0x0 0x0 t2d 0x8
> +depbar le 0x5 0x0 0x0
> +sched (st 0x0) (st 0x0) (st 0x0)
> +fmul ftz $r3 $r0 $r1
> +mov $r2 $r3 0xf
> +mov $r1 $r3 0xf
> +sched (st 0x0) (st 0x0) (st 0x0)
> +mov $r0 $r3 0xf
> +exit
> +#endif
> diff --git a/src/shader/exac8nv110.fpc b/src/shader/exac8nv110.fpc
> new file mode 100644
> index 0000000..4aa1368
> --- /dev/null
> +++ b/src/shader/exac8nv110.fpc
> @@ -0,0 +1,38 @@
> +0xfc0007e0,
> +0x001f8000,
> +0xcff7ff00,
> +0xe003ff87,
> +0x00470000,
> +0x50800000,
> +0x4007ff03,
> +0xe043ff89,
> +0xfc0007e0,
> +0x001f8000,
> +0x0007ff02,
> +0xe043ff89,
> +0x2ff70201,
> +0xc03a0014,
> +0x4007ff03,
> +0xe043ff88,
> +0xfc0007e0,
> +0x001f8000,
> +0x0007ff02,
> +0xe043ff88,
> +0x2ff70200,
> +0xc03a0004,
> +0x34070000,
> +0xf0f00000,
> +0xfc0007e0,
> +0x001f8000,
> +0x00170003,
> +0x5c681000,
> +0x00370002,
> +0x5c980780,
> +0x00370001,
> +0x5c980780,
> +0xfc0007e0,
> +0x001f8000,
> +0x00370000,
> +0x5c980780,
> +0x0007000f,
> +0xe3000000,
> diff --git a/src/shader/exacanv110.fp b/src/shader/exacanv110.fp
> new file mode 100644
> index 0000000..a70d5c5
> --- /dev/null
> +++ b/src/shader/exacanv110.fp
> @@ -0,0 +1,47 @@
> +#ifndef ENVYAS
> +static uint32_t
> +NV110FP_CAComposite[] = {
> +	0x00001462, /* 0x0000c000 = USES_KIL, MULTI_COLORS */
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x80000000, /* FRAG_COORD_UMASK = 0x8 */
> +	0x00000a0a, /* FP_INTERP[0x080], 0022 0022 */
> +	0x00000000, /* FP_INTERP[0x0c0], 0 = OFF */
> +	0x00000000, /* FP_INTERP[0x100], 1 = FLAT */
> +	0x00000000, /* FP_INTERP[0x140], 2 = PERSPECTIVE */
> +	0x00000000, /* FP_INTERP[0x180], 3 = LINEAR */
> +	0x00000000, /* FP_INTERP[0x1c0] */
> +	0x00000000, /* FP_INTERP[0x200] */
> +	0x00000000, /* FP_INTERP[0x240] */
> +	0x00000000, /* FP_INTERP[0x280] */
> +	0x00000000, /* FP_INTERP[0x2c0] */
> +	0x00000000, /* FP_INTERP[0x300] */
> +	0x00000000,
> +	0x0000000f, /* FP_RESULT_MASK (0x8000 Face ?) */
> +	0x00000000, /* 0x2 = FragDepth, 0x1 = SampleMask */
> +#include "exacanv110.fpc"
> +};
> +#else
> +
> +sched (st 0x0) (st 0x0) (st 0x0)
> +ipa pass $r0 a[0x7c] 0x0 0x0 0x1
> +mufu rcp $r0 $r0
> +ipa $r3 a[0x94] $r0 0x0 0x1
> +sched (st 0x0) (st 0x0) (st 0x0)
> +ipa $r2 a[0x90] $r0 0x0 0x1
> +tex nodep $r4 $r2 0x0 0x1 t2d 0xf
> +ipa $r1 a[0x84] $r0 0x0 0x1
> +sched (st 0x0) (st 0x0) (st 0x0)
> +ipa $r0 a[0x80] $r0 0x0 0x1
> +tex nodep $r0 $r0 0x0 0x0 t2d 0xf
> +depbar le 0x5 0x0 0x0
> +sched (st 0x0) (st 0x0) (st 0x0)
> +fmul ftz $r3 $r3 $r7
> +fmul ftz $r2 $r2 $r6
> +fmul ftz $r1 $r1 $r5
> +sched (st 0x0) (st 0x0) (st 0x0)
> +fmul ftz $r0 $r0 $r4
> +exit
> +#endif
> diff --git a/src/shader/exacanv110.fpc b/src/shader/exacanv110.fpc
> new file mode 100644
> index 0000000..7c0ca5e
> --- /dev/null
> +++ b/src/shader/exacanv110.fpc
> @@ -0,0 +1,38 @@
> +0xfc0007e0,
> +0x001f8000,
> +0xcff7ff00,
> +0xe003ff87,
> +0x00470000,
> +0x50800000,
> +0x4007ff03,
> +0xe043ff89,
> +0xfc0007e0,
> +0x001f8000,
> +0x0007ff02,
> +0xe043ff89,
> +0xaff70204,
> +0xc03a0017,
> +0x4007ff01,
> +0xe043ff88,
> +0xfc0007e0,
> +0x001f8000,
> +0x0007ff00,
> +0xe043ff88,
> +0xaff70000,
> +0xc03a0007,
> +0x34070000,
> +0xf0f00000,
> +0xfc0007e0,
> +0x001f8000,
> +0x00770303,
> +0x5c681000,
> +0x00670202,
> +0x5c681000,
> +0x00570101,
> +0x5c681000,
> +0xfc0007e0,
> +0x001f8000,
> +0x00470000,
> +0x5c681000,
> +0x0007000f,
> +0xe3000000,
> diff --git a/src/shader/exacmnv110.fp b/src/shader/exacmnv110.fp
> new file mode 100644
> index 0000000..fe5c294
> --- /dev/null
> +++ b/src/shader/exacmnv110.fp
> @@ -0,0 +1,47 @@
> +#ifndef ENVYAS
> +static uint32_t
> +NV110FP_Composite[] = {
> +	0x00001462,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x80000000,
> +	0x00000a0a,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x0000000f,
> +	0x00000000,
> +#include "exacmnv110.fpc"
> +};
> +#else
> +
> +sched (st 0x0) (st 0x0) (st 0x0)
> +ipa pass $r0 a[0x7c] 0x0 0x0 0x1
> +mufu rcp $r0 $r0
> +ipa $r3 a[0x94] $r0 0x0 0x1
> +sched (st 0x0) (st 0x0) (st 0x0)
> +ipa $r2 a[0x90] $r0 0x0 0x1
> +tex nodep $r4 $r2 0x0 0x1 t2d 0x8
> +ipa $r1 a[0x84] $r0 0x0 0x1
> +sched (st 0x0) (st 0x0) (st 0x0)
> +ipa $r0 a[0x80] $r0 0x0 0x1
> +tex nodep $r0 $r0 0x0 0x0 t2d 0xf
> +depbar le 0x5 0x0 0x0
> +sched (st 0x0) (st 0x0) (st 0x0)
> +fmul ftz $r3 $r3 $r4
> +fmul ftz $r2 $r2 $r4
> +fmul ftz $r1 $r1 $r4
> +sched (st 0x0) (st 0x0) (st 0x0)
> +fmul ftz $r0 $r0 $r4
> +exit
> +#endif
> diff --git a/src/shader/exacmnv110.fpc b/src/shader/exacmnv110.fpc
> new file mode 100644
> index 0000000..9d62c1a
> --- /dev/null
> +++ b/src/shader/exacmnv110.fpc
> @@ -0,0 +1,38 @@
> +0xfc0007e0,
> +0x001f8000,
> +0xcff7ff00,
> +0xe003ff87,
> +0x00470000,
> +0x50800000,
> +0x4007ff03,
> +0xe043ff89,
> +0xfc0007e0,
> +0x001f8000,
> +0x0007ff02,
> +0xe043ff89,
> +0x2ff70204,
> +0xc03a0014,
> +0x4007ff01,
> +0xe043ff88,
> +0xfc0007e0,
> +0x001f8000,
> +0x0007ff00,
> +0xe043ff88,
> +0xaff70000,
> +0xc03a0007,
> +0x34070000,
> +0xf0f00000,
> +0xfc0007e0,
> +0x001f8000,
> +0x00470303,
> +0x5c681000,
> +0x00470202,
> +0x5c681000,
> +0x00470101,
> +0x5c681000,
> +0xfc0007e0,
> +0x001f8000,
> +0x00470000,
> +0x5c681000,
> +0x0007000f,
> +0xe3000000,
> diff --git a/src/shader/exas8nv110.fp b/src/shader/exas8nv110.fp
> new file mode 100644
> index 0000000..4fe2e19
> --- /dev/null
> +++ b/src/shader/exas8nv110.fp
> @@ -0,0 +1,42 @@
> +#ifndef ENVYAS
> +static uint32_t
> +NV110FP_Source_A8[] = {
> +	0x00001462,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x80000000,
> +	0x0000000a,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x0000000f,
> +	0x00000000,
> +#include "exas8nv110.fpc"
> +};
> +#else
> +
> +sched (st 0x0) (st 0x0) (st 0x0)
> +ipa pass $r0 a[0x7c] 0x0 0x0 0x1
> +mufu rcp $r0 $r0
> +ipa $r1 a[0x84] $r0 0x0 0x1
> +sched (st 0x0) (st 0x0) (st 0x0)
> +ipa $r0 a[0x80] $r0 0x0 0x1
> +tex nodep $r0 $r0 0x0 0x0 t2d 0x8
> +depbar le 0x5 0x0 0x0
> +sched (st 0x0) (st 0x0) (st 0x0)
> +mov $r3 $r0 0xf
> +mov $r2 $r0 0xf
> +mov $r1 $r0 0xf
> +sched (st 0x0) (st 0x0) (st 0x0)
> +exit
> +#endif
> diff --git a/src/shader/exas8nv110.fpc b/src/shader/exas8nv110.fpc
> new file mode 100644
> index 0000000..1181c41
> --- /dev/null
> +++ b/src/shader/exas8nv110.fpc
> @@ -0,0 +1,28 @@
> +0xfc0007e0,
> +0x001f8000,
> +0xcff7ff00,
> +0xe003ff87,
> +0x00470000,
> +0x50800000,
> +0x4007ff01,
> +0xe043ff88,
> +0xfc0007e0,
> +0x001f8000,
> +0x0007ff00,
> +0xe043ff88,
> +0x2ff70000,
> +0xc03a0004,
> +0x34070000,
> +0xf0f00000,
> +0xfc0007e0,
> +0x001f8000,
> +0x00070003,
> +0x5c980780,
> +0x00070002,
> +0x5c980780,
> +0x00070001,
> +0x5c980780,
> +0xfc0007e0,
> +0x001f8000,
> +0x0007000f,
> +0xe3000000,
> diff --git a/src/shader/exasanv110.fp b/src/shader/exasanv110.fp
> new file mode 100644
> index 0000000..61374a6
> --- /dev/null
> +++ b/src/shader/exasanv110.fp
> @@ -0,0 +1,47 @@
> +#ifndef ENVYAS
> +static uint32_t
> +NV110FP_CACompositeSrcAlpha[] = {
> +	0x00001462,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x80000000,
> +	0x00000a0a,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x0000000f,
> +	0x00000000,
> +#include "exasanv110.fpc"
> +};
> +#else
> +
> +sched (st 0x0) (st 0x0) (st 0x0)
> +ipa pass $r0 a[0x7c] 0x0 0x0 0x1
> +mufu rcp $r0 $r0
> +ipa $r3 a[0x84] $r0 0x0 0x1
> +sched (st 0x0) (st 0x0) (st 0x0)
> +ipa $r2 a[0x80] $r0 0x0 0x1
> +tex nodep $r4 $r2 0x0 0x0 t2d 0x8
> +ipa $r1 a[0x94] $r0 0x0 0x1
> +sched (st 0x0) (st 0x0) (st 0x0)
> +ipa $r0 a[0x90] $r0 0x0 0x1
> +tex nodep $r0 $r0 0x0 0x1 t2d 0xf
> +depbar le 0x5 0x0 0x0
> +sched (st 0x0) (st 0x0) (st 0x0)
> +fmul ftz $r3 $r3 $r4
> +fmul ftz $r2 $r2 $r4
> +fmul ftz $r1 $r1 $r4
> +sched (st 0x0) (st 0x0) (st 0x0)
> +fmul ftz $r0 $r0 $r4
> +exit
> +#endif
> diff --git a/src/shader/exasanv110.fpc b/src/shader/exasanv110.fpc
> new file mode 100644
> index 0000000..5516a03
> --- /dev/null
> +++ b/src/shader/exasanv110.fpc
> @@ -0,0 +1,38 @@
> +0xfc0007e0,
> +0x001f8000,
> +0xcff7ff00,
> +0xe003ff87,
> +0x00470000,
> +0x50800000,
> +0x4007ff03,
> +0xe043ff88,
> +0xfc0007e0,
> +0x001f8000,
> +0x0007ff02,
> +0xe043ff88,
> +0x2ff70204,
> +0xc03a0004,
> +0x4007ff01,
> +0xe043ff89,
> +0xfc0007e0,
> +0x001f8000,
> +0x0007ff00,
> +0xe043ff89,
> +0xaff70000,
> +0xc03a0017,
> +0x34070000,
> +0xf0f00000,
> +0xfc0007e0,
> +0x001f8000,
> +0x00470303,
> +0x5c681000,
> +0x00470202,
> +0x5c681000,
> +0x00470101,
> +0x5c681000,
> +0xfc0007e0,
> +0x001f8000,
> +0x00470000,
> +0x5c681000,
> +0x0007000f,
> +0xe3000000,
> diff --git a/src/shader/exascnv110.fp b/src/shader/exascnv110.fp
> new file mode 100644
> index 0000000..90bbb55
> --- /dev/null
> +++ b/src/shader/exascnv110.fp
> @@ -0,0 +1,38 @@
> +#ifndef ENVYAS
> +static uint32_t
> +NV110FP_Source[] = {
> +	0x00001462,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x80000000,
> +	0x0000000a,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x0000000f,
> +	0x00000000,
> +#include "exascnv110.fpc"
> +};
> +#else
> +
> +sched (st 0x0) (st 0x0) (st 0x0)
> +ipa pass $r0 a[0x7c] 0x0 0x0 0x1
> +mufu rcp $r0 $r0
> +ipa $r1 a[0x84] $r0 0x0 0x1
> +sched (st 0x0) (st 0x0) (st 0x0)
> +ipa $r0 a[0x80] $r0 0x0 0x1
> +tex nodep $r0 $r0 0x0 0x0 t2d 0xf
> +depbar le 0x5 0x0 0x0
> +sched (st 0x0) (st 0x0) (st 0x0)
> +exit
> +#endif
> diff --git a/src/shader/exascnv110.fpc b/src/shader/exascnv110.fpc
> new file mode 100644
> index 0000000..2dba15d
> --- /dev/null
> +++ b/src/shader/exascnv110.fpc
> @@ -0,0 +1,20 @@
> +0xfc0007e0,
> +0x001f8000,
> +0xcff7ff00,
> +0xe003ff87,
> +0x00470000,
> +0x50800000,
> +0x4007ff01,
> +0xe043ff88,
> +0xfc0007e0,
> +0x001f8000,
> +0x0007ff00,
> +0xe043ff88,
> +0xaff70000,
> +0xc03a0007,
> +0x34070000,
> +0xf0f00000,
> +0xfc0007e0,
> +0x001f8000,
> +0x0007000f,
> +0xe3000000,
> diff --git a/src/shader/videonv110.fp b/src/shader/videonv110.fp
> new file mode 100644
> index 0000000..2728311
> --- /dev/null
> +++ b/src/shader/videonv110.fp
> @@ -0,0 +1,54 @@
> +#ifndef ENVYAS
> +static uint32_t
> +NV110FP_NV12[] = {
> +	0x00001462,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x80000000,
> +	0x0000000a,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x0000000f,
> +	0x00000000,
> +#include "videonv110.fpc"
> +};
> +#else
> +
> +sched (st 0x0) (st 0x0) (st 0x0)
> +ipa pass $r2 a[0x7c] 0x0 0x0 0x1
> +mufu rcp $r2 $r2
> +ipa $r0 a[0x80] $r2 0x0 0x1
> +sched (st 0x0) (st 0x0) (st 0x0)
> +ipa $r1 a[0x84] $r2 0x0 0x1
> +tex nodep $r4 $r0 0x0 0x0 t2d 0x8
> +tex nodep $r0 $r0 0x0 0x1 t2d 0xc
> +sched (st 0x0) (st 0x0) (st 0x0)
> +depbar le 0x5 0x1 0x1
> +fmul ftz $r5 $r4 c0[0x0]
> +fadd ftz $r3 $r5 c0[0x4]
> +sched (st 0x0) (st 0x0) (st 0x0)
> +fadd ftz $r4 $r5 c0[0x8]
> +fadd ftz $r5 $r5 c0[0xc]
> +depbar le 0x5 0x0 0x0
> +sched (st 0x0) (st 0x0) (st 0x0)
> +ffma ftz $r3 $r0 c0[0x10] $r3
> +ffma ftz $r4 $r0 c0[0x14] $r4
> +ffma ftz $r5 $r0 c0[0x18] $r5
> +sched (st 0x0) (st 0x0) (st 0x0)
> +ffma ftz $r0 $r1 c0[0x1c] $r3
> +ffma ftz $r2 $r1 c0[0x24] $r5
> +ffma ftz $r1 $r1 c0[0x20] $r4
> +sched (st 0x0) (st 0x0) (st 0x0)
> +exit
> +#endif
> diff --git a/src/shader/videonv110.fpc b/src/shader/videonv110.fpc
> new file mode 100644
> index 0000000..31d745a
> --- /dev/null
> +++ b/src/shader/videonv110.fpc
> @@ -0,0 +1,52 @@
> +0xfc0007e0,
> +0x001f8000,
> +0xcff7ff02,
> +0xe003ff87,
> +0x00470202,
> +0x50800000,
> +0x0027ff00,
> +0xe043ff88,
> +0xfc0007e0,
> +0x001f8000,
> +0x4027ff01,
> +0xe043ff88,
> +0x2ff70004,
> +0xc03a0004,
> +0x2ff70000,
> +0xc03a0016,
> +0xfc0007e0,
> +0x001f8000,
> +0x34170001,
> +0xf0f00000,
> +0x00070405,
> +0x4c681000,
> +0x00170503,
> +0x4c581000,
> +0xfc0007e0,
> +0x001f8000,
> +0x00270504,
> +0x4c581000,
> +0x00370505,
> +0x4c581000,
> +0x34070000,
> +0xf0f00000,
> +0xfc0007e0,
> +0x001f8000,
> +0x00470003,
> +0x49a00180,
> +0x00570004,
> +0x49a00200,
> +0x00670005,
> +0x49a00280,
> +0xfc0007e0,
> +0x001f8000,
> +0x00770100,
> +0x49a00180,
> +0x00970102,
> +0x49a00280,
> +0x00870101,
> +0x49a00200,
> +0xfc0007e0,
> +0x001f8000,
> +0x0007000f,
> +0xe3000000,
> diff --git a/src/shader/xfrm2nv110.vp b/src/shader/xfrm2nv110.vp
> new file mode 100644
> index 0000000..bbfc527
> --- /dev/null
> +++ b/src/shader/xfrm2nv110.vp
> @@ -0,0 +1,82 @@
> +#ifndef ENVYAS
> +static uint32_t
> +NV110VP_Transform2[] = {
> +	0x02000461,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x000ff000,
> +	0x00000000, /* VP_ATTR_EN[0x000] */
> +	0x00000000, /* VP_ATTR_EN[0x080] */
> +	0x00000000, /* VP_ATTR_EN[0x100] */
> +	0x00000000,
> +	0x00000000, /* VP_ATTR_EN[0x200] */
> +	0x80000000, /* VERTEXID */
> +	0x00000000, /* VP_ATTR_EN[0x300] */
> +	0x00000000,
> +	0x0033f000, /* VP_EXPORT_EN[0x040] */
> +	0x00000000, /* VP_EXPORT_EN[0x0c0] */
> +	0x00000000,
> +	0x00000000,
> +	0x00000000,
> +	0x00000000, /* VP_EXPORT_EN[0x2c0] */
> +	0x00000000,
> +#include "xfrm2nv110.vpc"
> +};
> +#else
> +
> +sched (st 0x0) (st 0x0) (st 0x0)
> +ld b32 $r5 a[0x2fc] 0x0
> +shl $r5 $r5 0x5
> +ld b64 $r0 c0[$r5+0x80]
> +sched (st 0x0) (st 0x0) (st 0x0)
> +ld b64 $r2 c0[$r5+0x88]
> +st b128 a[0x70] $r0 0x0
> +
> +ld b64 $r0 c0[$r5+0x90]
> +sched (st 0x0) (st 0x0) (st 0x0)
> +fmul ftz $r2 $r0 c0[0x0]
> +fmul ftz $r3 $r0 c0[0xc]
> +fmul ftz $r4 $r0 c0[0x18]
> +sched (st 0x0) (st 0x0) (st 0x0)
> +ffma ftz $r2 $r1 c0[0x4] $r2
> +ffma ftz $r3 $r1 c0[0x10] $r3
> +ffma ftz $r4 $r1 c0[0x1c] $r4
> +sched (st 0x0) (st 0x0) (st 0x0)
> +fadd ftz $r2 $r2 c0[0x8]
> +fadd ftz $r3 $r3 c0[0x14]
> +fadd ftz $r4 $r4 c0[0x20]
> +sched (st 0x0) (st 0x0) (st 0x0)
> +mufu rcp $r4 $r4
> +fmul ftz $r2 $r2 $r4
> +fmul ftz $r3 $r3 $r4
> +sched (st 0x0) (st 0x0) (st 0x0)
> +fmul ftz $r0 $r2 c0[0x24]
> +fmul ftz $r1 $r3 c0[0x28]
> +st b64 a[0x80] $r0 0x0
> +
> +sched (st 0x0) (st 0x0) (st 0x0)
> +ld b64 $r0 c0[$r5+0x98]
> +fmul ftz $r2 $r0 c0[0x2c]
> +fmul ftz $r3 $r0 c0[0x38]
> +sched (st 0x0) (st 0x0) (st 0x0)
> +fmul ftz $r4 $r0 c0[0x44]
> +ffma ftz $r2 $r1 c0[0x30] $r2
> +ffma ftz $r3 $r1 c0[0x3c] $r3
> +sched (st 0x0) (st 0x0) (st 0x0)
> +ffma ftz $r4 $r1 c0[0x48] $r4
> +fadd ftz $r2 $r2 c0[0x34]
> +fadd ftz $r3 $r3 c0[0x40]
> +sched (st 0x0) (st 0x0) (st 0x0)
> +fadd ftz $r4 $r4 c0[0x4c]
> +mufu rcp $r4 $r4
> +fmul ftz $r2 $r2 $r4
> +sched (st 0x0) (st 0x0) (st 0x0)
> +fmul ftz $r3 $r3 $r4
> +fmul ftz $r0 $r2 c0[0x50]
> +fmul ftz $r1 $r3 c0[0x54]
> +sched (st 0x0) (st 0x0) (st 0x0)
> +st b64 a[0x90] $r0 0x0
> +
> +exit
> +#endif
> diff --git a/src/shader/xfrm2nv110.vpc b/src/shader/xfrm2nv110.vpc
> new file mode 100644
> index 0000000..0d9ebfd
> --- /dev/null
> +++ b/src/shader/xfrm2nv110.vpc
> @@ -0,0 +1,102 @@
> +0xfc0007e0,
> +0x001f8000,
> +0x2fc7ff05,
> +0xefd87f80,
> +0x00570505,
> +0x38480000,
> +0x08070500,
> +0xef950000,
> +0xfc0007e0,
> +0x001f8000,
> +0x08870502,
> +0xef950000,
> +0x0707ff00,
> +0xeff1ff80,
> +0x09070500,
> +0xef950000,
> +0xfc0007e0,
> +0x001f8000,
> +0x00070002,
> +0x4c681000,
> +0x00370003,
> +0x4c681000,
> +0x00670004,
> +0x4c681000,
> +0xfc0007e0,
> +0x001f8000,
> +0x00170102,
> +0x49a00100,
> +0x00470103,
> +0x49a00180,
> +0x00770104,
> +0x49a00200,
> +0xfc0007e0,
> +0x001f8000,
> +0x00270202,
> +0x4c581000,
> +0x00570303,
> +0x4c581000,
> +0x00870404,
> +0x4c581000,
> +0xfc0007e0,
> +0x001f8000,
> +0x00470404,
> +0x50800000,
> +0x00470202,
> +0x5c681000,
> +0x00470303,
> +0x5c681000,
> +0xfc0007e0,
> +0x001f8000,
> +0x00970200,
> +0x4c681000,
> +0x00a70301,
> +0x4c681000,
> +0x0807ff00,
> +0xeff0ff80,
> +0xfc0007e0,
> +0x001f8000,
> +0x09870500,
> +0xef950000,
> +0x00b70002,
> +0x4c681000,
> +0x00e70003,
> +0x4c681000,
> +0xfc0007e0,
> +0x001f8000,
> +0x01170004,
> +0x4c681000,
> +0x00c70102,
> +0x49a00100,
> +0x00f70103,
> +0x49a00180,
> +0xfc0007e0,
> +0x001f8000,
> +0x01270104,
> +0x49a00200,
> +0x00d70202,
> +0x4c581000,
> +0x01070303,
> +0x4c581000,
> +0xfc0007e0,
> +0x001f8000,
> +0x01370404,
> +0x4c581000,
> +0x00470404,
> +0x50800000,
> +0x00470202,
> +0x5c681000,
> +0xfc0007e0,
> +0x001f8000,
> +0x00470303,
> +0x5c681000,
> +0x01470200,
> +0x4c681000,
> +0x01570301,
> +0x4c681000,
> +0xfc0007e0,
> +0x001f8000,
> +0x0907ff00,
> +0xeff0ff80,
> +0x0007000f,
> +0xe3000000,
>


More information about the Nouveau mailing list